Skip to content

Commit

Permalink
[feature](metacache) add system table catalog_meta_cache_statistics (a…
Browse files Browse the repository at this point in the history
…pache#40155)

Add new system table `catalog_meta_cache_statistics`

```
mysql> select * from information_schema.catalog_meta_cache_statistics;
+--------------+-----------------------------+----------------------+---------------------+
| CATALOG_NAME | CACHE_NAME                  | METRIC_NAME          | METRIC_VALUE        |
+--------------+-----------------------------+----------------------+---------------------+
| hms          | hive_file_cache             | eviction_count       | 0                   |
| hms          | hive_file_cache             | hit_ratio            | 0.6                 |
| hms          | hive_file_cache             | average_load_penalty | 7.60805755E7        |
| hms          | hive_file_cache             | estimated_size       | 2                   |
| hms          | hive_file_cache             | hit_count            | 3                   |
| hms          | hive_file_cache             | read_count           | 5                   |
| hms          | hive_partition_cache        | eviction_count       | 0                   |
| hms          | hive_partition_cache        | hit_ratio            | 1.0                 |
| hms          | hive_partition_cache        | average_load_penalty | 0.0                 |
| hms          | hive_partition_cache        | estimated_size       | 0                   |
| hms          | hive_partition_cache        | hit_count            | 0                   |
| hms          | hive_partition_cache        | read_count           | 0                   |
| hms          | hive_partition_values_cache | eviction_count       | 0                   |
| hms          | hive_partition_values_cache | hit_ratio            | 1.0                 |
| hms          | hive_partition_values_cache | average_load_penalty | 0.0                 |
| hms          | hive_partition_values_cache | estimated_size       | 0                   |
| hms          | hive_partition_values_cache | hit_count            | 0                   |
| hms          | hive_partition_values_cache | read_count           | 0                   |
| hms          | hudi_partition_cache        | eviction_count       | 0                   |
| hms          | hudi_partition_cache        | hit_ratio            | 1.0                 |
| hms          | hudi_partition_cache        | average_load_penalty | 0.0                 |
| hms          | hudi_partition_cache        | estimated_size       | 0                   |
| hms          | hudi_partition_cache        | hit_count            | 0                   |
| hms          | hudi_partition_cache        | read_count           | 0                   |
| iceberg      | iceberg_table_cache         | eviction_count       | 0                   |
| iceberg      | iceberg_table_cache         | hit_ratio            | 0.07407407407407407 |
| iceberg      | iceberg_table_cache         | average_load_penalty | 1.7263263424E8      |
| iceberg      | iceberg_table_cache         | estimated_size       | 25                  |
| iceberg      | iceberg_table_cache         | hit_count            | 2                   |
| iceberg      | iceberg_table_cache         | read_count           | 27                  |
| iceberg      | iceberg_snapshot_cache      | eviction_count       | 0                   |
| iceberg      | iceberg_snapshot_cache      | hit_ratio            | 1.0                 |
| iceberg      | iceberg_snapshot_cache      | average_load_penalty | 0.0                 |
| iceberg      | iceberg_snapshot_cache      | estimated_size       | 0                   |
| iceberg      | iceberg_snapshot_cache      | hit_count            | 0                   |
| iceberg      | iceberg_snapshot_cache      | read_count           | 0                   |
+--------------+-----------------------------+----------------------+---------------------+
```
  • Loading branch information
morningman authored Sep 3, 2024
1 parent 80c65f2 commit a4956d6
Show file tree
Hide file tree
Showing 26 changed files with 343 additions and 155 deletions.
3 changes: 3 additions & 0 deletions be/src/exec/schema_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

#include "exec/schema_scanner/schema_active_queries_scanner.h"
#include "exec/schema_scanner/schema_backend_active_tasks.h"
#include "exec/schema_scanner/schema_catalog_meta_cache_stats_scanner.h"
#include "exec/schema_scanner/schema_charsets_scanner.h"
#include "exec/schema_scanner/schema_collations_scanner.h"
#include "exec/schema_scanner/schema_columns_scanner.h"
Expand Down Expand Up @@ -240,6 +241,8 @@ std::unique_ptr<SchemaScanner> SchemaScanner::create(TSchemaTableType::type type
return SchemaBackendWorkloadGroupResourceUsage::create_unique();
case TSchemaTableType::SCH_TABLE_PROPERTIES:
return SchemaTablePropertiesScanner::create_unique();
case TSchemaTableType::SCH_CATALOG_META_CACHE_STATISTICS:
return SchemaCatalogMetaCacheStatsScanner::create_unique();
default:
return SchemaDummyScanner::create_unique();
break;
Expand Down
135 changes: 135 additions & 0 deletions be/src/exec/schema_scanner/schema_catalog_meta_cache_stats_scanner.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "exec/schema_scanner/schema_catalog_meta_cache_stats_scanner.h"

#include "runtime/client_cache.h"
#include "runtime/exec_env.h"
#include "runtime/query_context.h"
#include "runtime/runtime_state.h"
#include "util/thrift_rpc_helper.h"
#include "vec/common/string_ref.h"
#include "vec/core/block.h"
#include "vec/data_types/data_type_factory.hpp"

namespace doris {
std::vector<SchemaScanner::ColumnDesc> SchemaCatalogMetaCacheStatsScanner::_s_tbls_columns = {
{"CATALOG_NAME", TYPE_STRING, sizeof(StringRef), true},
{"CACHE_NAME", TYPE_STRING, sizeof(StringRef), true},
{"METRIC_NAME", TYPE_STRING, sizeof(StringRef), true},
{"METRIC_VALUE", TYPE_STRING, sizeof(StringRef), true},
};

SchemaCatalogMetaCacheStatsScanner::SchemaCatalogMetaCacheStatsScanner()
: SchemaScanner(_s_tbls_columns, TSchemaTableType::SCH_CATALOG_META_CACHE_STATISTICS) {}

SchemaCatalogMetaCacheStatsScanner::~SchemaCatalogMetaCacheStatsScanner() {}

Status SchemaCatalogMetaCacheStatsScanner::start(RuntimeState* state) {
_block_rows_limit = state->batch_size();
_rpc_timeout = state->execution_timeout() * 1000;
_fe_addr = state->get_query_ctx()->current_connect_fe;
return Status::OK();
}

Status SchemaCatalogMetaCacheStatsScanner::_get_meta_cache_from_fe() {
TSchemaTableRequestParams schema_table_request_params;
for (int i = 0; i < _s_tbls_columns.size(); i++) {
schema_table_request_params.__isset.columns_name = true;
schema_table_request_params.columns_name.emplace_back(_s_tbls_columns[i].name);
}
schema_table_request_params.__set_current_user_ident(*_param->common_param->current_user_ident);

TFetchSchemaTableDataRequest request;
request.__set_schema_table_name(TSchemaTableName::CATALOG_META_CACHE_STATS);
request.__set_schema_table_params(schema_table_request_params);

TFetchSchemaTableDataResult result;

RETURN_IF_ERROR(ThriftRpcHelper::rpc<FrontendServiceClient>(
_fe_addr.hostname, _fe_addr.port,
[&request, &result](FrontendServiceConnection& client) {
client->fetchSchemaTableData(result, request);
},
_rpc_timeout));

Status status(Status::create(result.status));
if (!status.ok()) {
LOG(WARNING) << "fetch catalog meta cache stats from FE(" << _fe_addr.hostname
<< ") failed, errmsg=" << status;
return status;
}
std::vector<TRow> result_data = result.data_batch;

_block = vectorized::Block::create_unique();
for (int i = 0; i < _s_tbls_columns.size(); ++i) {
TypeDescriptor descriptor(_s_tbls_columns[i].type);
auto data_type = vectorized::DataTypeFactory::instance().create_data_type(descriptor, true);
_block->insert(vectorized::ColumnWithTypeAndName(data_type->create_column(), data_type,
_s_tbls_columns[i].name));
}

_block->reserve(_block_rows_limit);

if (result_data.size() > 0) {
int col_size = result_data[0].column_value.size();
if (col_size != _s_tbls_columns.size()) {
return Status::InternalError<false>(
"catalog meta cache stats schema is not match for FE and BE");
}
}

for (int i = 0; i < result_data.size(); i++) {
TRow row = result_data[i];
for (int j = 0; j < _s_tbls_columns.size(); j++) {
RETURN_IF_ERROR(insert_block_column(row.column_value[j], j, _block.get(),
_s_tbls_columns[j].type));
}
}
return Status::OK();
}

Status SchemaCatalogMetaCacheStatsScanner::get_next_block_internal(vectorized::Block* block,
bool* eos) {
if (!_is_init) {
return Status::InternalError("Used before initialized.");
}

if (nullptr == block || nullptr == eos) {
return Status::InternalError("input pointer is nullptr.");
}

if (_block == nullptr) {
RETURN_IF_ERROR(_get_meta_cache_from_fe());
_total_rows = _block->rows();
}

if (_row_idx == _total_rows) {
*eos = true;
return Status::OK();
}

int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx);
vectorized::MutableBlock mblock = vectorized::MutableBlock::build_mutable_block(block);
RETURN_IF_ERROR(mblock.add_rows(_block.get(), _row_idx, current_batch_rows));
_row_idx += current_batch_rows;

*eos = _row_idx == _total_rows;
return Status::OK();
}

} // namespace doris
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <vector>

#include "common/status.h"
#include "exec/schema_scanner.h"

namespace doris {
class RuntimeState;
namespace vectorized {
class Block;
} // namespace vectorized

class SchemaCatalogMetaCacheStatsScanner : public SchemaScanner {
ENABLE_FACTORY_CREATOR(SchemaCatalogMetaCacheStatsScanner);

public:
SchemaCatalogMetaCacheStatsScanner();
~SchemaCatalogMetaCacheStatsScanner() override;

Status start(RuntimeState* state) override;
Status get_next_block_internal(vectorized::Block* block, bool* eos) override;

static std::vector<SchemaScanner::ColumnDesc> _s_tbls_columns;

private:
Status _get_meta_cache_from_fe();

TNetworkAddress _fe_addr;

int _block_rows_limit = 4096;
int _row_idx = 0;
int _total_rows = 0;
std::unique_ptr<vectorized::Block> _block = nullptr;
int _rpc_timeout = 3000;
};
}; // namespace doris
1 change: 0 additions & 1 deletion be/src/vec/exec/scan/vmeta_scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ class VMetaScanner : public VScanner {
TFetchSchemaTableDataRequest* request);
Status _build_queries_metadata_request(const TMetaScanRange& meta_scan_range,
TFetchSchemaTableDataRequest* request);

bool _meta_eos;
TupleId _tuple_id;
TUserIdentity _user_identity;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2042,7 +2042,7 @@ public class Config extends ConfigBase {
* Decrease this value if FE's memory is small
*/
@ConfField(mutable = false, masterOnly = false)
public static long max_external_file_cache_num = 100000;
public static long max_external_file_cache_num = 10000;

/**
* Max cache num of external table's schema
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ public enum SchemaTableType {
SCH_WORKLOAD_GROUP_RESOURCE_USAGE("WORKLOAD_GROUP_RESOURCE_USAGE",
"WORKLOAD_GROUP_RESOURCE_USAGE", TSchemaTableType.SCH_WORKLOAD_GROUP_RESOURCE_USAGE),
SCH_TABLE_PROPERTIES("TABLE_PROPERTIES", "TABLE_PROPERTIES",
TSchemaTableType.SCH_TABLE_PROPERTIES);
TSchemaTableType.SCH_TABLE_PROPERTIES),
SCH_CATALOG_META_CACHE_STATISTICS("CATALOG_META_CACHE_STATISTICS", "CATALOG_META_CACHE_STATISTICS",
TSchemaTableType.SCH_CATALOG_META_CACHE_STATISTICS);

private static final String dbName = "INFORMATION_SCHEMA";
private static SelectList fullSelectLists;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,14 @@ public class SchemaTable extends Table {
.column("REMOTE_SCAN_BYTES_PER_SECOND", ScalarType.createType(PrimitiveType.BIGINT))
.build())
)
.put("catalog_meta_cache_statistics",
new SchemaTable(SystemIdGenerator.getNextId(), "catalog_meta_cache_statistics", TableType.SCHEMA,
builder().column("CATALOG_NAME", ScalarType.createStringType())
.column("CACHE_NAME", ScalarType.createStringType())
.column("METRIC_NAME", ScalarType.createStringType())
.column("METRIC_VALUE", ScalarType.createStringType())
.build())
)
.build();

private boolean fetchAllFe = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

import com.github.benmanes.caffeine.cache.CacheLoader;
import com.github.benmanes.caffeine.cache.RemovalListener;
import com.github.benmanes.caffeine.cache.stats.CacheStats;
import com.google.common.collect.Maps;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down Expand Up @@ -296,4 +297,15 @@ public <T> MetaCache<T> buildMetaCache(String name,
maxSize, namesCacheLoader, metaObjCacheLoader, removalListener);
return metaCache;
}

public static Map<String, String> getCacheStats(CacheStats cacheStats, long estimatedSize) {
Map<String, String> stats = Maps.newHashMap();
stats.put("hit_ratio", String.valueOf(cacheStats.hitRate()));
stats.put("hit_count", String.valueOf(cacheStats.hitCount()));
stats.put("read_count", String.valueOf(cacheStats.hitCount() + cacheStats.missCount()));
stats.put("eviction_count", String.valueOf(cacheStats.evictionCount()));
stats.put("average_load_penalty", String.valueOf(cacheStats.averageLoadPenalty()));
stats.put("estimated_size", String.valueOf(estimatedSize));
return stats;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.doris.common.util.LocationPath;
import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.CacheException;
import org.apache.doris.datasource.ExternalMetaCacheMgr;
import org.apache.doris.datasource.hive.AcidInfo.DeleteDeltaInfo;
import org.apache.doris.datasource.property.PropertyConverter;
import org.apache.doris.fs.FileSystemCache;
Expand Down Expand Up @@ -141,7 +142,7 @@ private void init() {
OptionalLong.of(28800L),
OptionalLong.of(Config.external_cache_expire_time_minutes_after_access * 60L),
Config.max_hive_partition_table_cache_num,
false,
true,
null);
partitionValuesCache = partitionValuesCacheFactory.buildCache(key -> loadPartitionValues(key), null,
refreshExecutor);
Expand All @@ -150,7 +151,7 @@ private void init() {
OptionalLong.of(28800L),
OptionalLong.of(Config.external_cache_expire_time_minutes_after_access * 60L),
Config.max_hive_partition_cache_num,
false,
true,
null);
partitionCache = partitionCacheFactory.buildCache(new CacheLoader<PartitionCacheKey, HivePartition>() {
@Override
Expand Down Expand Up @@ -183,7 +184,7 @@ public void setNewFileCache() {
? fileMetaCacheTtlSecond : 28800L),
OptionalLong.of(Config.external_cache_expire_time_minutes_after_access * 60L),
Config.max_external_file_cache_num,
false,
true,
null);

CacheLoader<FileCacheKey, FileCacheValue> loader = new CacheBulkLoader<FileCacheKey, FileCacheValue>() {
Expand Down Expand Up @@ -1135,4 +1136,19 @@ public HivePartitionValues copy() {
return copy;
}
}

/**
* get cache stats
* @return <cache name -> <metric name -> metric value>>
*/
public Map<String, Map<String, String>> getStats() {
Map<String, Map<String, String>> res = Maps.newHashMap();
res.put("hive_partition_values_cache", ExternalMetaCacheMgr.getCacheStats(partitionValuesCache.stats(),
partitionCache.estimatedSize()));
res.put("hive_partition_cache",
ExternalMetaCacheMgr.getCacheStats(partitionCache.stats(), partitionCache.estimatedSize()));
res.put("hive_file_cache",
ExternalMetaCacheMgr.getCacheStats(fileCacheRef.get().stats(), fileCacheRef.get().estimatedSize()));
return res;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@
import org.apache.doris.common.Config;
import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.CacheException;
import org.apache.doris.datasource.ExternalMetaCacheMgr;
import org.apache.doris.datasource.TablePartitionValues;
import org.apache.doris.datasource.TablePartitionValues.TablePartitionKey;
import org.apache.doris.datasource.hive.HMSExternalCatalog;
import org.apache.doris.datasource.hive.HMSExternalTable;

import com.github.benmanes.caffeine.cache.LoadingCache;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
Expand All @@ -37,6 +39,7 @@

import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.OptionalLong;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
Expand All @@ -55,7 +58,7 @@ public HudiCachedPartitionProcessor(long catalogId, ExecutorService executor) {
OptionalLong.of(28800L),
OptionalLong.of(Config.external_cache_expire_time_minutes_after_access * 60),
Config.max_external_table_cache_num,
false,
true,
null);
this.partitionCache = partitionCacheFactory.buildCache(key -> new TablePartitionValues(), null, executor);
}
Expand Down Expand Up @@ -167,4 +170,11 @@ public TablePartitionValues getPartitionValues(HMSExternalTable table, HoodieTab
throw new CacheException("Failed to get hudi partitions: " + Util.getRootCauseMessage(e), e);
}
}

public Map<String, Map<String, String>> getCacheStats() {
Map<String, Map<String, String>> res = Maps.newHashMap();
res.put("hudi_partition_cache", ExternalMetaCacheMgr.getCacheStats(partitionCache.stats(),
partitionCache.estimatedSize()));
return res;
}
}
Loading

0 comments on commit a4956d6

Please sign in to comment.