Skip to content

Commit d0dd6ee

Browse files
authored
[Dynamic buffer calc][Mellanox] Fix bug: buffer over subscription in buffer pool size calculation (sonic-net#1706)
To calculate the buffer pool size, we need to accumulate all reserved memory and than subtract the sum from total available memory. In the current implementation, one part hasn’t been added to reserved memory, which causes the buffer pool size larger than it should be. To fix it, we need to add it to reserved memory. - What I did Fix buffer over subscription in the buffer pool calculation. - How I verified it Run regression and manual test Signed-off-by: Stephen Sun <stephens@nvidia.com>
1 parent d8b8ca9 commit d0dd6ee

File tree

1 file changed

+14
-3
lines changed

1 file changed

+14
-3
lines changed

cfgmgr/buffer_pool_mellanox.lua

+14-3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ local state_db = "6"
77

88
local lossypg_reserved = 19 * 1024
99
local lossypg_reserved_400g = 37 * 1024
10+
-- Number of 400G ports
11+
local port_count_400g = 0
12+
-- Number of lossy PG on 400G ports
1013
local lossypg_400g = 0
1114

1215
local result = {}
@@ -56,8 +59,11 @@ local function iterate_all_items(all_items)
5659
end
5760
profiles[index][2] = profiles[index][2] + size
5861
local speed = redis.call('HGET', 'PORT_TABLE:'..port, 'speed')
59-
if speed == '400000' and profile == '[BUFFER_PROFILE_TABLE:ingress_lossy_profile]' then
60-
lossypg_400g = lossypg_400g + size
62+
if speed == '400000' then
63+
if profile == '[BUFFER_PROFILE_TABLE:ingress_lossy_profile]' then
64+
lossypg_400g = lossypg_400g + size
65+
end
66+
port_count_400g = port_count_400g + 1
6167
end
6268
end
6369
end
@@ -145,6 +151,10 @@ end
145151
local lossypg_extra_for_400g = (lossypg_reserved_400g - lossypg_reserved) * lossypg_400g
146152
accumulative_occupied_buffer = accumulative_occupied_buffer + lossypg_extra_for_400g
147153

154+
-- Accumulate sizes for management PGs
155+
local accumulative_management_pg = (total_port - port_count_400g) * lossypg_reserved + port_count_400g * lossypg_reserved_400g
156+
accumulative_occupied_buffer = accumulative_occupied_buffer + accumulative_management_pg
157+
148158
-- Accumulate sizes for egress mirror and management pool
149159
local accumulative_egress_mirror_overhead = total_port * egress_mirror_headroom
150160
accumulative_occupied_buffer = accumulative_occupied_buffer + accumulative_egress_mirror_overhead + mgmt_pool_size
@@ -228,8 +238,9 @@ table.insert(result, "debug:accumulative size:" .. accumulative_occupied_buffer)
228238
for i = 1, #statistics do
229239
table.insert(result, "debug:" .. statistics[i][1] .. ":" .. statistics[i][2] .. ":" .. statistics[i][3])
230240
end
231-
table.insert(result, "debug:extra_400g:" .. (lossypg_reserved_400g - lossypg_reserved) .. ":" .. lossypg_400g)
241+
table.insert(result, "debug:extra_400g:" .. (lossypg_reserved_400g - lossypg_reserved) .. ":" .. lossypg_400g .. ":" .. port_count_400g)
232242
table.insert(result, "debug:mgmt_pool:" .. mgmt_pool_size)
243+
table.insert(result, "debug:accumulative_mgmt_pg:" .. accumulative_management_pg)
233244
table.insert(result, "debug:egress_mirror:" .. accumulative_egress_mirror_overhead)
234245
table.insert(result, "debug:shp_enabled:" .. tostring(shp_enabled))
235246
table.insert(result, "debug:shp_size:" .. shp_size)

0 commit comments

Comments
 (0)