Skip to content

Commit

Permalink
Update the weight function for Mark Cache and Min Max Index Cache (#8058
Browse files Browse the repository at this point in the history
) (#8066)

ref #8050
  • Loading branch information
ti-chi-bot committed Sep 7, 2023
1 parent 453088e commit 56a286f
Show file tree
Hide file tree
Showing 8 changed files with 173 additions and 20 deletions.
17 changes: 8 additions & 9 deletions dbms/src/Common/LRUCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,12 @@
#include <memory>
#include <mutex>
#include <unordered_map>


namespace DB
{
template <typename T>
template <typename K, typename T>
struct TrivialWeightFunction
{
size_t operator()(const T &) const { return 1; }
size_t operator()(const K &, const T &) const { return 1; }
};


Expand All @@ -39,10 +37,11 @@ struct TrivialWeightFunction
/// of that value.
/// Cache starts to evict entries when their total weight exceeds max_size.
/// Value weight should not change after insertion.
template <typename TKey,
typename TMapped,
typename HashFunction = std::hash<TKey>,
typename WeightFunction = TrivialWeightFunction<TMapped>>
template <
typename TKey,
typename TMapped,
typename HashFunction = std::hash<TKey>,
typename WeightFunction = TrivialWeightFunction<TKey, TMapped>>
class LRUCache
{
public:
Expand Down Expand Up @@ -326,7 +325,7 @@ class LRUCache
}

cell.value = mapped;
cell.size = cell.value ? weight_function(*cell.value) : 0;
cell.size = cell.value ? weight_function(key, *cell.value) : 0;
current_weight += cell.size;

removeOverflow();
Expand Down
9 changes: 8 additions & 1 deletion dbms/src/Common/PODArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,14 @@ class PODArrayBase : private boost::noncopyable

void alloc_for_num_elements(size_t num_elements)
{
alloc(roundUpToPowerOfTwoOrZero(minimum_memory_for_elements(num_elements)));
//alloc_for_num_elements is only used when initialized PODArray based on size or two iterators.
//If the users just want to do PODArray initialize, and never will push_back other elements,
//use roundUpToPowerOfTwoOrZero here just waste memory usage.
//If the users want to do PODArray initialize first, and also will push_back other elements later,
//in push_back or emplace_back will do the reserveForNextSize to alloc extra memory.
//Thus, we don't need do roundUpToPowerOfTwoOrZero here, and it can cut down extra memory usage,
//and will not have bad affact on performance.
alloc(minimum_memory_for_elements(num_elements));
}

template <typename... TAllocatorParams>
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Common/tests/gtest_lru_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ TEST(LRUCacheTest, get)

struct ValueWeight
{
size_t operator()(const size_t & x) const { return x; }
size_t operator()(const int & /*key*/, const size_t & x) const { return x; }
};

TEST(LRUCacheTest, evictOnSize)
Expand Down
5 changes: 1 addition & 4 deletions dbms/src/IO/UncompressedCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,7 @@ struct UncompressedCacheCell

struct UncompressedSizeWeightFunction
{
size_t operator()(const UncompressedCacheCell & x) const
{
return x.data.size();
}
size_t operator()(const UInt128 key, const UncompressedCacheCell & x) const { return sizeof(key) + x.data.size(); }
};


Expand Down
8 changes: 8 additions & 0 deletions dbms/src/Interpreters/AsynchronousMetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,14 @@ void AsynchronousMetrics::update()
}
}

{
if (auto min_max_cache = context.getMinMaxIndexCache())
{
set("MinMaxIndexCacheBytes", min_max_cache->weight());
set("MinMaxIndexFiles", min_max_cache->count());
}
}

{
if (auto uncompressed_cache = context.getUncompressedCache())
{
Expand Down
25 changes: 23 additions & 2 deletions dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,11 @@ class MinMaxIndex

size_t byteSize() const
{
return sizeof(UInt8) * has_null_marks->size() + sizeof(UInt8) * has_value_marks->size() + minmaxes->byteSize();
// we add 3 * sizeof(PaddedPODArray<UInt8>)
// because has_null_marks/ has_value_marks / minmaxes are all use PaddedPODArray
// Thus we need to add the structual memory cost of PaddedPODArray for each of them
return sizeof(UInt8) * has_null_marks->size() + sizeof(UInt8) * has_value_marks->size() + minmaxes->byteSize()
+ 3 * sizeof(PaddedPODArray<UInt8>);
}

void addPack(const IColumn & column, const ColumnVector<UInt8> * del_mark);
Expand Down Expand Up @@ -91,7 +95,24 @@ class MinMaxIndex

struct MinMaxIndexWeightFunction
{
size_t operator()(const MinMaxIndex & index) const { return index.byteSize(); }
size_t operator()(const String & key, const MinMaxIndex & index) const
{
auto index_memory_usage = index.byteSize(); // index
auto cells_memory_usage = 32; // Cells struct memory cost

// 2. the memory cost of key part
auto str_len = key.size(); // key_len
auto key_memory_usage = sizeof(String); // String struct memory cost

// 3. the memory cost of hash table
auto unordered_map_memory_usage = 28; // hash table struct approximate memory cost

// 4. the memory cost of LRUQueue
auto list_memory_usage = sizeof(std::list<String>); // list struct memory cost

return index_memory_usage + cells_memory_usage + str_len * 2 + key_memory_usage * 2 + unordered_map_memory_usage
+ list_memory_usage;
}
};


Expand Down
20 changes: 17 additions & 3 deletions dbms/src/Storages/MarkCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,24 @@ namespace DB
/// Estimate of number of bytes in cache for marks.
struct MarksWeightFunction
{
size_t operator()(const MarksInCompressedFile & marks) const
size_t operator()(const String & key, const MarksInCompressedFile & marks) const
{
/// NOTE Could add extra 100 bytes for overhead of std::vector, cache structures and allocator.
return marks.size() * sizeof(MarkInCompressedFile);
auto mark_memory_usage = marks.allocated_bytes(); // marksInCompressedFile
auto cells_memory_usage = 32; // Cells struct memory cost
auto pod_array_memory_usage = sizeof(decltype(marks)); // PODArray struct memory cost

// 2. the memory cost of key part
auto str_len = key.size(); // key_len
auto key_memory_usage = sizeof(String); // String struct memory cost

// 3. the memory cost of hash table
auto unordered_map_memory_usage = 28; // hash table struct approximate memory cost

// 4. the memory cost of LRUQueue
auto list_memory_usage = sizeof(std::list<String>); // list struct memory cost

return mark_memory_usage + cells_memory_usage + pod_array_memory_usage + str_len * 2 + key_memory_usage * 2
+ unordered_map_memory_usage + list_memory_usage;
}
};

Expand Down
107 changes: 107 additions & 0 deletions metrics/grafana/tiflash_summary.json
Original file line number Diff line number Diff line change
Expand Up @@ -7098,6 +7098,113 @@
"align": false,
"alignLevel": null
}
},
{
"type": "graph",
"title": "Mark Cache and Minmax Index Cache Memory Usage",
"gridPos": {
"x": 12,
"y": 64,
"w": 12,
"h": 8
},
"id": 23763571993,
"targets": [
{
"expr": "tiflash_system_asynchronous_metric_MarkCacheBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}",
"legendFormat": "mark_cache_{{instance}}",
"interval": "",
"exemplar": true,
"refId": "A",
"queryType": "randomWalk",
"hide": false
},
{
"expr": "tiflash_system_asynchronous_metric_MinMaxIndexCacheBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}",
"legendFormat": "minmax_index_cache_{{instance}}",
"interval": "",
"exemplar": true,
"refId": "B",
"hide": true
},
{
"refId": "C",
"hide": false
}
],
"options": {
"alertThreshold": true
},
"datasource": "${DS_TEST-CLUSTER}",
"fieldConfig": {
"defaults": {},
"overrides": []
},
"pluginVersion": "7.5.11",
"renderer": "flot",
"yaxes": [
{
"label": null,
"show": true,
"logBase": 1,
"min": null,
"max": null,
"format": "short"
},
{
"label": null,
"show": true,
"logBase": 1,
"min": null,
"max": null,
"format": "short"
}
],
"xaxis": {
"show": true,
"mode": "time",
"name": null,
"values": [],
"buckets": null
},
"yaxis": {
"align": false,
"alignLevel": null
},
"lines": true,
"fill": 1,
"linewidth": 1,
"dashLength": 10,
"spaceLength": 10,
"pointradius": 2,
"legend": {
"show": true,
"values": false,
"min": false,
"max": false,
"current": false,
"total": false,
"avg": false
},
"nullPointMode": "null",
"tooltip": {
"value_type": "individual",
"shared": true,
"sort": 0
},
"aliasColors": {},
"seriesOverrides": [],
"thresholds": [],
"timeRegions": [],
"fillGradient": 0,
"dashes": false,
"hiddenSeries": false,
"points": false,
"bars": false,
"stack": false,
"percentage": false,
"steppedLine": false,
"description": "The memory usage of mark cache and minmax index cache"
}
],
"repeat": null,
Expand Down

0 comments on commit 56a286f

Please sign in to comment.