Skip to content

Commit

Permalink
drm/amdgpu: update ras capability's query based on mem ecc configuration
Browse files Browse the repository at this point in the history
RAS support capability needs to be updated on top of different
memeory ECC enablement, and remove redundant memory ecc check
in gmc module for vega20 and arcturus.

v2: check HBM ECC enablement and set ras mask accordingly.
v3: avoid to invoke atomfirmware interface to query twice.

Suggested-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Guchun Chen <guchun.chen@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Guchun Chen authored and alexdeucher committed Mar 13, 2020
1 parent 6397ec5 commit 88474cc
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 30 deletions.
24 changes: 18 additions & 6 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
Original file line number Diff line number Diff line change
Expand Up @@ -1765,18 +1765,30 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
*hw_supported = 0;
*supported = 0;

if (amdgpu_sriov_vf(adev) ||
if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
(adev->asic_type != CHIP_VEGA20 &&
adev->asic_type != CHIP_ARCTURUS))
return;

if (adev->is_atom_fw &&
(amdgpu_atomfirmware_mem_ecc_supported(adev) ||
amdgpu_atomfirmware_sram_ecc_supported(adev)))
*hw_supported = AMDGPU_RAS_BLOCK_MASK;
if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
DRM_INFO("HBM ECC is active.\n");
*hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC |
1 << AMDGPU_RAS_BLOCK__DF);
} else
DRM_INFO("HBM ECC is not presented.\n");

if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
DRM_INFO("SRAM ECC is active.\n");
*hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
1 << AMDGPU_RAS_BLOCK__DF);
} else
DRM_INFO("SRAM ECC is not presented.\n");

/* hw_supported needs to be aligned with RAS block mask. */
*hw_supported &= AMDGPU_RAS_BLOCK_MASK;

*supported = amdgpu_ras_enable == 0 ?
0 : *hw_supported & amdgpu_ras_mask;
0 : *hw_supported & amdgpu_ras_mask;
}

int amdgpu_ras_init(struct amdgpu_device *adev)
Expand Down
38 changes: 14 additions & 24 deletions drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -922,30 +922,20 @@ static int gmc_v9_0_late_init(void *handle)
if (r)
return r;
/* Check if ecc is available */
if (!amdgpu_sriov_vf(adev)) {
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA20:
case CHIP_ARCTURUS:
r = amdgpu_atomfirmware_mem_ecc_supported(adev);
if (!r) {
DRM_INFO("ECC is not present.\n");
if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
} else {
DRM_INFO("ECC is active.\n");
}

r = amdgpu_atomfirmware_sram_ecc_supported(adev);
if (!r) {
DRM_INFO("SRAM ECC is not present.\n");
} else {
DRM_INFO("SRAM ECC is active.\n");
}
break;
default:
break;
}
if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) {
r = amdgpu_atomfirmware_mem_ecc_supported(adev);
if (!r) {
DRM_INFO("ECC is not present.\n");
if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
} else
DRM_INFO("ECC is active.\n");

r = amdgpu_atomfirmware_sram_ecc_supported(adev);
if (!r)
DRM_INFO("SRAM ECC is not present.\n");
else
DRM_INFO("SRAM ECC is active.\n");
}

if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
Expand Down

0 comments on commit 88474cc

Please sign in to comment.