Skip to content

Commit

Permalink
net/mlx5: SD, Implement devcom communication and primary election
Browse files Browse the repository at this point in the history
Use devcom to communicate between the different devices. Add a new
devcom component type for this.

Each device registers itself to the devcom component <SD, group ID>.
Once all devices of a component are registered, the component becomes
ready, and a primary device is elected.

In principle, any of the devices can act as a primary, they are all
capable, and a random election would've worked. However, we aim to
achieve predictability and consistency, hence each group always choses
the same device, with the lowest PCI BUS number, as primary.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
  • Loading branch information
Tariq Toukan authored and Saeed Mahameed committed Dec 21, 2023
1 parent 63b9ce9 commit a45af9a
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 2 deletions.
1 change: 1 addition & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ enum mlx5_devcom_component {
MLX5_DEVCOM_ESW_OFFLOADS,
MLX5_DEVCOM_MPV,
MLX5_DEVCOM_HCA_PORTS,
MLX5_DEVCOM_SD_GROUP,
MLX5_DEVCOM_NUM_COMPONENTS,
};

Expand Down
122 changes: 120 additions & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@
struct mlx5_sd {
u32 group_id;
u8 host_buses;
struct mlx5_devcom_comp_dev *devcom;
bool primary;
union {
struct { /* primary */
struct mlx5_core_dev *secondaries[MLX5_SD_MAX_GROUP_SZ - 1];
};
struct { /* secondary */
struct mlx5_core_dev *primary_dev;
};
};
};

static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev)
Expand All @@ -26,13 +36,29 @@ static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev)
return sd->host_buses;
}

static struct mlx5_core_dev *mlx5_sd_get_primary(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);

if (!sd)
return dev;

return sd->primary ? dev : sd->primary_dev;
}

struct mlx5_core_dev *
mlx5_sd_primary_get_peer(struct mlx5_core_dev *primary, int idx)
{
struct mlx5_sd *sd;

if (idx == 0)
return primary;

return NULL;
if (idx >= mlx5_sd_get_host_buses(primary))
return NULL;

sd = mlx5_get_sd(primary);
return sd->secondaries[idx - 1];
}

int mlx5_sd_ch_ix_get_dev_ix(struct mlx5_core_dev *dev, int ch_ix)
Expand Down Expand Up @@ -136,15 +162,93 @@ static void sd_cleanup(struct mlx5_core_dev *dev)
kfree(sd);
}

static int sd_register(struct mlx5_core_dev *dev)
{
struct mlx5_devcom_comp_dev *devcom, *pos;
struct mlx5_core_dev *peer, *primary;
struct mlx5_sd *sd, *primary_sd;
int err, i;

sd = mlx5_get_sd(dev);
devcom = mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_SD_GROUP,
sd->group_id, NULL, dev);
if (!devcom)
return -ENOMEM;

sd->devcom = devcom;

if (mlx5_devcom_comp_get_size(devcom) != sd->host_buses)
return 0;

mlx5_devcom_comp_lock(devcom);
mlx5_devcom_comp_set_ready(devcom, true);
mlx5_devcom_comp_unlock(devcom);

if (!mlx5_devcom_for_each_peer_begin(devcom)) {
err = -ENODEV;
goto err_devcom_unreg;
}

primary = dev;
mlx5_devcom_for_each_peer_entry(devcom, peer, pos)
if (peer->pdev->bus->number < primary->pdev->bus->number)
primary = peer;

primary_sd = mlx5_get_sd(primary);
primary_sd->primary = true;
i = 0;
/* loop the secondaries */
mlx5_devcom_for_each_peer_entry(primary_sd->devcom, peer, pos) {
struct mlx5_sd *peer_sd = mlx5_get_sd(peer);

primary_sd->secondaries[i++] = peer;
peer_sd->primary = false;
peer_sd->primary_dev = primary;
}

mlx5_devcom_for_each_peer_end(devcom);
return 0;

err_devcom_unreg:
mlx5_devcom_comp_lock(sd->devcom);
mlx5_devcom_comp_set_ready(sd->devcom, false);
mlx5_devcom_comp_unlock(sd->devcom);
mlx5_devcom_unregister_component(sd->devcom);
return err;
}

static void sd_unregister(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);

mlx5_devcom_comp_lock(sd->devcom);
mlx5_devcom_comp_set_ready(sd->devcom, false);
mlx5_devcom_comp_unlock(sd->devcom);
mlx5_devcom_unregister_component(sd->devcom);
}

int mlx5_sd_init(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
int err;

err = sd_init(dev);
if (err)
return err;

sd = mlx5_get_sd(dev);
if (!sd)
return 0;

err = sd_register(dev);
if (err)
goto err_sd_cleanup;

return 0;

err_sd_cleanup:
sd_cleanup(dev);
return err;
}

void mlx5_sd_cleanup(struct mlx5_core_dev *dev)
Expand All @@ -154,12 +258,26 @@ void mlx5_sd_cleanup(struct mlx5_core_dev *dev)
if (!sd)
return;

sd_unregister(dev);
sd_cleanup(dev);
}

struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev,
struct auxiliary_device *adev,
int idx)
{
return adev;
struct mlx5_sd *sd = mlx5_get_sd(dev);
struct mlx5_core_dev *primary;

if (!sd)
return adev;

if (!mlx5_devcom_comp_is_ready(sd->devcom))
return NULL;

primary = mlx5_sd_get_primary(dev);
if (dev == primary)
return adev;

return &primary->priv.adev[idx]->adev;
}

0 comments on commit a45af9a

Please sign in to comment.