Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce memory consumption from attribute accumulation #290

Merged
merged 8 commits into from
Nov 5, 2024
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 2.67.0

* Reduce memory consumption of duplicate attribute names in `serial_feature`
* The maxzoom guess calculation now takes into account the number of duplicate feature locations

# 2.66.0

* Only bin by ID, not by geometry, if --bin-by-id-list is specified
Expand Down
14 changes: 7 additions & 7 deletions attribute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ void set_attribute_accum(std::unordered_map<std::string, attribute_op> &attribut
}

template <class T>
static void preserve_attribute1(attribute_op const &op, std::string const &key, T const &val, std::vector<std::string> &full_keys, std::vector<T> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state) {
static void preserve_attribute1(attribute_op const &op, std::string const &key, T const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<T> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool) {
for (size_t i = 0; i < full_keys.size(); i++) {
if (key == full_keys[i]) {
if (key == *full_keys[i]) {
switch (op) {
case op_sum:
full_values[i] = (full_values[i].to_double() + val.to_double());
Expand Down Expand Up @@ -193,14 +193,14 @@ static void preserve_attribute1(attribute_op const &op, std::string const &key,
exit(EXIT_IMPOSSIBLE);
}

full_keys.push_back(key);
full_keys.push_back(key_pool.pool(key));
full_values.push_back(v);
}

void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::string> &full_keys, std::vector<mvt_value> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state) {
preserve_attribute1(op, key, val, full_keys, full_values, attribute_accum_state);
void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<mvt_value> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool) {
preserve_attribute1(op, key, val, full_keys, full_values, attribute_accum_state, key_pool);
}

void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::string> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state) {
preserve_attribute1(op, key, val, full_keys, full_values, attribute_accum_state);
void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool) {
preserve_attribute1(op, key, val, full_keys, full_values, attribute_accum_state, key_pool);
}
6 changes: 4 additions & 2 deletions attribute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <vector>
#include <unordered_map>
#include <map>
#include <memory>
#include "mvt.hpp"
#include "milo/dtoa_milo.h"

Expand All @@ -24,12 +25,13 @@ struct accum_state {
};

struct serial_val;
struct key_pool;

void set_attribute_accum(std::unordered_map<std::string, attribute_op> &attribute_accum, std::string name, std::string type);
void set_attribute_accum(std::unordered_map<std::string, attribute_op> &attribute_accum, const char *arg, char **argv);

void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::string> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state);
void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::string> &full_keys, std::vector<mvt_value> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state);
void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool);
void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<mvt_value> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool);

extern std::map<std::string, attribute_op> numeric_operations;

Expand Down
49 changes: 26 additions & 23 deletions clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1163,14 +1163,14 @@ static void add_mean(mvt_feature &feature, mvt_layer &layer, std::string const &
};

// accumulate :sum:, :min:, :max:, and :count: versions of the specified attribute
static void preserve_numeric(const std::string &key, const mvt_value &val, // numeric attribute being accumulated
std::vector<std::string> &full_keys, // keys of feature being accumulated onto
std::vector<mvt_value> &full_values, // values of features being accumulated onto
const std::string &accumulate_numeric, // prefix of accumulations
std::set<std::string> &keys, // key presence in the source feature
std::map<std::string, size_t> &numeric_out_field, // key index in the output feature
std::unordered_map<std::string, accum_state> &attribute_accum_state // accumulation state for preserve_attribute()
) {
static void preserve_numeric(const std::string &key, const mvt_value &val, // numeric attribute being accumulated
std::vector<std::shared_ptr<std::string>> &full_keys, // keys of feature being accumulated onto
std::vector<mvt_value> &full_values, // values of features being accumulated onto
const std::string &accumulate_numeric, // prefix of accumulations
std::set<std::string> &keys, // key presence in the source feature
std::map<std::string, size_t> &numeric_out_field, // key index in the output feature
std::unordered_map<std::string, accum_state> &attribute_accum_state, // accumulation state for preserve_attribute()
key_pool &key_pool) {
// If this is a numeric attribute, but there is also a prefix:sum (etc.) for the
// same attribute, we want to use that one instead of this one.

Expand Down Expand Up @@ -1213,7 +1213,7 @@ static void preserve_numeric(const std::string &key, const mvt_value &val, /
if (out_attr == numeric_out_field.end()) {
// not present at all, so copy our value to the prefixed output
numeric_out_field.emplace(prefixed, full_keys.size());
full_keys.push_back(prefixed);
full_keys.push_back(key_pool.pool(prefixed));

if (op.second == op_count) {
if (starting_from_accumulation) {
Expand All @@ -1229,7 +1229,7 @@ static void preserve_numeric(const std::string &key, const mvt_value &val, /
} else {
// exists unprefixed, so copy it, and then accumulate on our value
numeric_out_field.emplace(prefixed, full_keys.size());
full_keys.push_back(prefixed);
full_keys.push_back(key_pool.pool(prefixed));

if (op.second == op_count) {
mvt_value v;
Expand All @@ -1243,7 +1243,7 @@ static void preserve_numeric(const std::string &key, const mvt_value &val, /
full_values.push_back(v);
} else {
full_values.push_back(full_values[out_attr->second]);
preserve_attribute(op.second, prefixed, val, full_keys, full_values, attribute_accum_state);
preserve_attribute(op.second, prefixed, val, full_keys, full_values, attribute_accum_state, key_pool);
}
}
} else {
Expand All @@ -1256,7 +1256,7 @@ static void preserve_numeric(const std::string &key, const mvt_value &val, /
full_values[prefixed_attr->second] = mvt_value(mvt_value_to_long_long(full_values[prefixed_attr->second]) + 1);
}
} else {
preserve_attribute(op.second, prefixed, val, full_keys, full_values, attribute_accum_state);
preserve_attribute(op.second, prefixed, val, full_keys, full_values, attribute_accum_state, key_pool);
}
}
}
Expand Down Expand Up @@ -1289,7 +1289,8 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
std::set<std::string> const &exclude,
std::vector<std::string> const &exclude_prefix,
std::unordered_map<std::string, attribute_op> const &attribute_accum,
std::string const &accumulate_numeric) {
std::string const &accumulate_numeric,
key_pool &key_pool) {
// Add geometry to output feature

mvt_feature outfeature;
Expand All @@ -1315,7 +1316,7 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
// multiplier cluster accumulated onto them

std::unordered_map<std::string, accum_state> attribute_accum_state;
std::vector<std::string> full_keys;
std::vector<std::shared_ptr<std::string>> full_keys;
std::vector<mvt_value> full_values;
std::map<std::string, size_t> numeric_out_field;

Expand All @@ -1324,12 +1325,12 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
auto f = attribute_accum.find(key);
if (f != attribute_accum.end()) {
// this attribute has an accumulator, so convert it
full_keys.push_back(features[0].layer->keys[features[0].tags[i]]);
full_keys.push_back(key_pool.pool(features[0].layer->keys[features[0].tags[i]]));
full_values.push_back(features[0].layer->values[features[0].tags[i + 1]]);
} else if (accumulate_numeric.size() > 0 && features[0].layer->values[features[0].tags[i + 1]].is_numeric()) {
// convert numeric for accumulation
numeric_out_field.emplace(key, full_keys.size());
full_keys.push_back(key);
full_keys.push_back(key_pool.pool(key));
full_values.push_back(features[0].layer->values[features[0].tags[i + 1]]);
} else {
// otherwise just tag it directly onto the output feature
Expand Down Expand Up @@ -1357,13 +1358,13 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
auto f = attribute_accum.find(key);
if (f != attribute_accum.end()) {
mvt_value val = features[i].layer->values[features[i].tags[j + 1]];
preserve_attribute(f->second, key, val, full_keys, full_values, attribute_accum_state);
preserve_attribute(f->second, key, val, full_keys, full_values, attribute_accum_state, key_pool);
} else if (accumulate_numeric.size() > 0) {
const mvt_value &val = features[i].layer->values[features[i].tags[j + 1]];
if (val.is_numeric()) {
preserve_numeric(key, val, full_keys, full_values,
accumulate_numeric,
keys, numeric_out_field, attribute_accum_state);
keys, numeric_out_field, attribute_accum_state, key_pool);
}
}
}
Expand All @@ -1373,8 +1374,8 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
// and tag them onto the output feature

for (size_t i = 0; i < full_keys.size(); i++) {
if (should_keep(full_keys[i], keep, exclude, exclude_prefix)) {
outlayer.tag(outfeature, full_keys[i], full_values[i]);
if (should_keep(*full_keys[i], keep, exclude, exclude_prefix)) {
outlayer.tag(outfeature, *full_keys[i], full_values[i]);
}
}

Expand Down Expand Up @@ -1522,6 +1523,7 @@ mvt_tile assign_to_bins(mvt_tile &features,
std::set<std::string> exclude,
std::vector<std::string> exclude_prefix) {
std::vector<index_event> events;
key_pool key_pool;

// Index bins
for (size_t i = 0; i < bins.size(); i++) {
Expand Down Expand Up @@ -1678,7 +1680,7 @@ mvt_tile assign_to_bins(mvt_tile &features,
if (outfeatures[i].size() > 1) {
feature_out(outfeatures[i], outlayer,
keep, exclude, exclude_prefix, attribute_accum,
accumulate_numeric);
accumulate_numeric, key_pool);
mvt_feature &nfeature = outlayer.features.back();
mvt_value val;
val.type = mvt_uint;
Expand Down Expand Up @@ -1713,6 +1715,7 @@ std::string overzoom(std::vector<source_tile> const &tiles, int nz, int nx, int
std::vector<mvt_layer> const &bins, std::string const &bin_by_id_list,
std::string const &accumulate_numeric) {
mvt_tile outtile;
key_pool key_pool;

for (auto const &tile : tiles) {
for (auto const &layer : tile.tile.layers) {
Expand Down Expand Up @@ -1837,7 +1840,7 @@ std::string overzoom(std::vector<source_tile> const &tiles, int nz, int nx, int

if (flush_multiplier_cluster) {
if (pending_tile_features.size() > 0) {
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric);
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric, key_pool);
pending_tile_features.clear();
}
}
Expand Down Expand Up @@ -1894,7 +1897,7 @@ std::string overzoom(std::vector<source_tile> const &tiles, int nz, int nx, int
}

if (pending_tile_features.size() > 0) {
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric);
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric, key_pool);
pending_tile_features.clear();
}

Expand Down
5 changes: 3 additions & 2 deletions flatgeobuf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,9 @@ void readFeature(const FlatGeobuf::Feature *feature, long long feature_sequence_
sf.geometry = dv;
sf.t = drawvec_type;

std::vector<std::string> full_keys;
std::vector<std::shared_ptr<std::string>> full_keys;
std::vector<serial_val> full_values;
key_pool key_pool;

// assume tabular schema with columns in header
size_t p_pos = 0;
Expand Down Expand Up @@ -243,7 +244,7 @@ void readFeature(const FlatGeobuf::Feature *feature, long long feature_sequence_
fprintf(stderr, "flatgeobuf has unsupported column type %u\n", (unsigned int)col_type);
exit(EXIT_IMPOSSIBLE);
}
full_keys.push_back(h_column_names[col_idx]);
full_keys.push_back(key_pool.pool(h_column_names[col_idx]));
full_values.push_back(sv);
}

Expand Down
9 changes: 5 additions & 4 deletions geobuf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,14 +270,14 @@ std::vector<drawvec_type> readGeometry(protozero::pbf_reader &pbf, size_t dim, d
return ret;
}

void readFeature(protozero::pbf_reader &pbf, size_t dim, double e, std::vector<std::string> &keys, struct serialization_state *sst, int layer, std::string layername) {
void readFeature(protozero::pbf_reader &pbf, size_t dim, double e, std::vector<std::string> &keys, struct serialization_state *sst, int layer, std::string layername, key_pool &key_pool) {
std::vector<drawvec_type> dv;
long long id = 0;
bool has_id = false;
std::vector<serial_val> values;
std::map<std::string, serial_val> other;

std::vector<std::string> full_keys;
std::vector<std::shared_ptr<std::string>> full_keys;
std::vector<serial_val> full_values;

while (pbf.next()) {
Expand Down Expand Up @@ -338,7 +338,7 @@ void readFeature(protozero::pbf_reader &pbf, size_t dim, double e, std::vector<s
exit(EXIT_IMPOSSIBLE);
}

full_keys.push_back(keys[properties[i]]);
full_keys.push_back(key_pool.pool(keys[properties[i]]));
full_values.push_back(values[properties[i + 1]]);
}

Expand Down Expand Up @@ -434,10 +434,11 @@ struct queue_run_arg {

void *run_parse_feature(void *v) {
struct queue_run_arg *qra = (struct queue_run_arg *) v;
key_pool key_pool;

for (size_t i = qra->start; i < qra->end; i++) {
struct queued_feature &qf = feature_queue[i];
readFeature(qf.pbf, qf.dim, qf.e, *qf.keys, &(*qf.sst)[qra->segment], qf.layer, qf.layername);
readFeature(qf.pbf, qf.dim, qf.e, *qf.keys, &(*qf.sst)[qra->segment], qf.layer, qf.layername, key_pool);
}

return NULL;
Expand Down
5 changes: 3 additions & 2 deletions geocsv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fnam
}

size_t seq = 0;
key_pool key_pool;
while ((s = csv_getline(f)).size() > 0) {
std::string err = check_utf8(s);
if (err != "") {
Expand Down Expand Up @@ -89,7 +90,7 @@ void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fnam
drawvec dv;
dv.push_back(draw(VT_MOVETO, x, y));

std::vector<std::string> full_keys;
std::vector<std::shared_ptr<std::string>> full_keys;
std::vector<serial_val> full_values;

for (size_t i = 0; i < line.size(); i++) {
Expand All @@ -107,7 +108,7 @@ void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fnam
}
sv.s = line[i];

full_keys.push_back(header[i]);
full_keys.push_back(key_pool.pool(header[i]));
full_values.push_back(sv);
}
}
Expand Down
9 changes: 5 additions & 4 deletions geojson.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,17 +182,18 @@ int serialize_geojson_feature(struct serialization_state *sst, json_object *geom
nprop = properties->value.object.length;
}

std::vector<std::string> keys;
std::vector<std::shared_ptr<std::string>> full_keys;
std::vector<serial_val> values;

keys.reserve(nprop);
full_keys.reserve(nprop);
values.reserve(nprop);
key_pool key_pool;

for (size_t i = 0; i < nprop; i++) {
if (properties->value.object.keys[i]->type == JSON_STRING) {
serial_val sv = stringify_value(properties->value.object.values[i], sst->fname, sst->line, feature);

keys.emplace_back(properties->value.object.keys[i]->value.string.string);
full_keys.emplace_back(key_pool.pool(properties->value.object.keys[i]->value.string.string));
values.push_back(std::move(sv));
}
}
Expand All @@ -211,7 +212,7 @@ int serialize_geojson_feature(struct serialization_state *sst, json_object *geom
sf.geometry = dv;
sf.feature_minzoom = 0; // Will be filled in during index merging
sf.seq = *(sst->layer_seq);
sf.full_keys = std::move(keys);
sf.full_keys = std::move(full_keys);
sf.full_values = std::move(values);

return serialize_feature(sst, sf, tippecanoe_layername);
Expand Down
Loading
Loading