Skip to content

Commit

Permalink
DAOS-16160 control: Update pool create --size % opt for MD-on-SSD p2
Browse files Browse the repository at this point in the history
Required-githooks: true

Signed-off-by: Tom Nabarro <tom.nabarro@intel.com>
  • Loading branch information
tanabarr committed Aug 19, 2024
1 parent 993793f commit 8748182
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 23 deletions.
31 changes: 18 additions & 13 deletions src/control/cmd/dmg/pretty/storage_nvme.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,22 @@ func printNvmeFormatResults(inCtrlrs storage.NvmeControllers, out io.Writer, opt
return nil
}

func rolesRankFromSmd(ctrlr *storage.NvmeController) (string, string) {
rolesStr := "NA"
roles := ctrlr.Roles()
if !roles.IsEmpty() {
rolesStr = roles.String()
}

rankStr := "None"
rank := ctrlr.Rank()
if rank != ranklist.NilRank {
rankStr = rank.String()
}

return rolesStr, rankStr
}

// PrintNvmeControllers displays controller details in a verbose table.
func PrintNvmeControllers(controllers storage.NvmeControllers, out io.Writer, opts ...PrintConfigOption) error {
w := txtfmt.NewErrWriter(out)
Expand Down Expand Up @@ -245,18 +261,7 @@ func PrintNvmeControllers(controllers storage.NvmeControllers, out io.Writer, op
row[fwTitle] = ctrlr.FwRev
row[socketTitle] = fmt.Sprint(ctrlr.SocketID)
row[capacityTitle] = humanize.Bytes(ctrlr.Capacity())
roles := "NA"
rank := "None"
// Assumes that all SMD devices on a controller have the same roles and rank.
if len(ctrlr.SmdDevices) > 0 {
sd := ctrlr.SmdDevices[0]
roles = sd.Roles.String()
if sd.Rank != ranklist.NilRank {
rank = sd.Rank.String()
}
}
row[rolesTitle] = roles
row[rankTitle] = rank
row[rolesTitle], row[rankTitle] = rolesRankFromSmd(ctrlr)

table = append(table, row)
}
Expand All @@ -276,7 +281,7 @@ func PrintNvmeHealthMap(hsm control.HostStorageMap, out io.Writer, opts ...Print
lineBreak := strings.Repeat("-", len(hosts))
fmt.Fprintf(out, "%s\n%s\n%s\n", lineBreak, hosts, lineBreak)

if len(hss.HostStorage.NvmeDevices) == 0 {
if hss.HostStorage.NvmeDevices.Len() == 0 {
fmt.Fprintln(out, " No NVMe devices detected")
continue
}
Expand Down
98 changes: 88 additions & 10 deletions src/control/lib/control/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -1111,22 +1111,77 @@ func processNVMeSpaceStats(log debugLogger, filterRank filterRankFn, nvmeControl
return nil
}

// Return the maximal SCM and NVMe size of a pool which could be created with all the storage nodes.
func getMaxPoolSize(ctx context.Context, rpcClient UnaryInvoker, ranks ranklist.RankList) (uint64, uint64, error) {
// Verify that the DAOS system is ready before attempting to query storage.
if _, err := SystemQuery(ctx, rpcClient, &SystemQueryReq{}); err != nil {
return 0, 0, err
func isMdOnSsdEnabled(hsm HostStorageMap) bool {
for _, hss := range hsm {
hs := hss.HostStorage
if hs == nil {
continue
}
nvme := hs.NvmeDevices
if nvme.Len() > 0 && !nvme[0].Roles().IsEmpty() {
return true
}
}

resp, err := StorageScan(ctx, rpcClient, &StorageScanReq{Usage: true})
if err != nil {
return 0, 0, err
return false
}

// Derive mem_size and data_size components using percentage of available ramdisk and SSD capacity.
// The rationale for deriving space usage from ramdisk usage is that this is most likely to be the
// limiting factor as opposed to SSD usage.
// - Calculate mem_size as percentage of ramdisk/tmpfs free capacity.
// - Calculate meta_size as mem_size/mem-ratio.
// - When calculating data_size in pool…
// - Subtract meta_size from free blobstore space if an SSD shares META+DATA roles
// - Subtract WAL size from free blobstore space if an SSD shares META+WAL roles
// - Take percentage of remainder
func getMaxPoolSizeMdOnSsd(ctx context.Context, rpcClient UnaryInvoker, ranks ranklist.RankList, resp *StorageScanResp) (uint64, uint64, error) {
// Generate function to verify a rank is in the provided rank slice.
filterRank := newFilterRankFunc(ranks)
rankNVMeFreeSpace := make(rankFreeSpaceMap)
scmBytes := uint64(math.MaxUint64)
for _, key := range resp.HostStorage.Keys() {
hostStorage := resp.HostStorage[key].HostStorage

if hostStorage.ScmNamespaces.Usable() == 0 {
return 0, 0, errors.Errorf("Host without SCM storage: hostname=%s",
resp.HostStorage[key].HostSet.String())
}

sb, err := processSCMSpaceStats(rpcClient, filterRank, hostStorage.ScmNamespaces, rankNVMeFreeSpace)
if err != nil {
return 0, 0, err
}
rpcClient.Debugf("scm nss: %+v, usable: %s, space stats: %+v",
hostStorage.ScmNamespaces, hostStorage.ScmNamespaces.Usable(), sb)

if scmBytes > sb {
scmBytes = sb
}

if err := processNVMeSpaceStats(rpcClient, filterRank, hostStorage.NvmeDevices, rankNVMeFreeSpace); err != nil {
return 0, 0, err
}
}

if len(resp.HostStorage) == 0 {
return 0, 0, errors.New("Empty host storage response from StorageScan")
if scmBytes == math.MaxUint64 {
return 0, 0, errors.Errorf("No SCM storage space available with rank list %s", ranks)
}

nvmeBytes := uint64(math.MaxUint64)
for _, nvmeRankBytes := range rankNVMeFreeSpace {
if nvmeBytes > nvmeRankBytes {
nvmeBytes = nvmeRankBytes
}
}

rpcClient.Debugf("Maximal size of a pool: scmBytes=%s (%d B) nvmeBytes=%s (%d B)",
humanize.Bytes(scmBytes), scmBytes, humanize.Bytes(nvmeBytes), nvmeBytes)

return scmBytes, nvmeBytes, nil
}

func getMaxPoolSizePMem(ctx context.Context, rpcClient UnaryInvoker, ranks ranklist.RankList, resp *StorageScanResp) (uint64, uint64, error) {
// Generate function to verify a rank is in the provided rank slice.
filterRank := newFilterRankFunc(ranks)
rankNVMeFreeSpace := make(rankFreeSpaceMap)
Expand Down Expand Up @@ -1169,3 +1224,26 @@ func getMaxPoolSize(ctx context.Context, rpcClient UnaryInvoker, ranks ranklist.

return scmBytes, nvmeBytes, nil
}

// Return the maximal SCM and NVMe size of a pool which could be created with all the storage nodes.
func getMaxPoolSize(ctx context.Context, rpcClient UnaryInvoker, ranks ranklist.RankList) (uint64, uint64, error) {
// Verify that the DAOS system is ready before attempting to query storage.
if _, err := SystemQuery(ctx, rpcClient, &SystemQueryReq{}); err != nil {
return 0, 0, err
}

resp, err := StorageScan(ctx, rpcClient, &StorageScanReq{Usage: true})
if err != nil {
return 0, 0, err
}

if len(resp.HostStorage) == 0 {
return 0, 0, errors.New("Empty host storage response from StorageScan")
}

if isMdOnSsdEnabled(resp.HostStorage) {
return getMaxPoolSizeMdOnSsd(ctx, rpcClient, ranks, resp)
}

return getMaxPoolSizePMem(ctx, rpcClient, ranks, resp)
}
25 changes: 25 additions & 0 deletions src/control/server/storage/bdev.go
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,26 @@ func (nc NvmeController) Free() (tb uint64) {
return
}

// Roles returns bdev_roles for NVMe controller being used in MD-on-SSD mode. Assume that all SMD
// devices on a controller have the same roles.
func (nc *NvmeController) Roles() *BdevRoles {
if len(nc.SmdDevices) > 0 {
return &nc.SmdDevices[0].Roles
}

return &BdevRoles{}
}

// Rank returns rank on which this NVMe controller is being used. Assume that all SMD devices on a
// controller have the same rank.
func (nc *NvmeController) Rank() ranklist.Rank {
if len(nc.SmdDevices) > 0 {
return nc.SmdDevices[0].Rank
}

return ranklist.NilRank
}

// NvmeControllers is a type alias for []*NvmeController.
type NvmeControllers []*NvmeController

Expand All @@ -412,6 +432,11 @@ func (ncs NvmeControllers) String() string {
return strings.Join(ss, ", ")
}

// Len returns the length of the NvmeController reference slice.
func (ncs NvmeControllers) Len() int {
return len(ncs)
}

// Capacity returns the cumulative total bytes of all controller capacities.
func (ncs NvmeControllers) Capacity() (tb uint64) {
for _, c := range ncs {
Expand Down

0 comments on commit 8748182

Please sign in to comment.