diff --git a/pkg/scheduler/plugins/deviceshare/device_allocator_test.go b/pkg/scheduler/plugins/deviceshare/device_allocator_test.go
index c5924af5f..8efd31078 100644
--- a/pkg/scheduler/plugins/deviceshare/device_allocator_test.go
+++ b/pkg/scheduler/plugins/deviceshare/device_allocator_test.go
@@ -973,6 +973,7 @@ func TestAutopilotAllocator(t *testing.T) {
 			}
 			sortDeviceAllocations(allocations)
 			sortDeviceAllocations(tt.want)
+			fillGPUTotalMem(allocations, nodeDevice)
 			assert.Equal(t, tt.want, allocations)
 		})
 	}
@@ -1931,6 +1932,7 @@ func TestAutopilotAllocatorWithExclusivePolicyAndRequiredScope(t *testing.T) {
 			}
 			sortDeviceAllocations(allocations)
 			sortDeviceAllocations(tt.want)
+			fillGPUTotalMem(allocations, nodeDevice)
 			assert.Equal(t, tt.want, allocations)
 		})
 	}
@@ -2117,6 +2119,8 @@ func Test_allocateGPUWithLeastAllocatedScorer(t *testing.T) {
 		scorer:     allocationScorer,
 	}
 	allocateResult, status := allocator.Allocate(nil, nil, nil, nil)
+	err := fillGPUTotalMem(allocateResult, nd)
+	assert.NoError(t, err)
 	assert.True(t, status.IsSuccess())
 	expectAllocations := []*apiext.DeviceAllocation{
 		{
@@ -2226,6 +2230,7 @@ func Test_nodeDevice_allocateGPUWithMostAllocatedScorer(t *testing.T) {
 			},
 		},
 	}
+	fillGPUTotalMem(allocateResult, nd)
 	assert.True(t, equality.Semantic.DeepEqual(expectAllocations, allocateResult[schedulingv1alpha1.GPU]))
 }
 
diff --git a/pkg/scheduler/plugins/deviceshare/devicehandler_gpu.go b/pkg/scheduler/plugins/deviceshare/devicehandler_gpu.go
index f322da3e0..41cecf658 100644
--- a/pkg/scheduler/plugins/deviceshare/devicehandler_gpu.go
+++ b/pkg/scheduler/plugins/deviceshare/devicehandler_gpu.go
@@ -44,55 +44,58 @@ func (h *GPUHandler) CalcDesiredRequestsAndCount(node *corev1.Node, pod *corev1.
 	}
 
 	podRequests = podRequests.DeepCopy()
-	if err := fillGPUTotalMem(totalDevice, podRequests); err != nil {
-		return nil, 0, framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())
-	}
 
 	requests := podRequests
 	desiredCount := int64(1)
 
 	gpuShare, ok := podRequests[apiext.ResourceGPUShared]
-	gpuCore, gpuMem, gpuMemoryRatio := podRequests[apiext.ResourceGPUCore], podRequests[apiext.ResourceGPUMemory], podRequests[apiext.ResourceGPUMemoryRatio]
+	gpuCore, coreExists := podRequests[apiext.ResourceGPUCore]
+	gpuMemoryRatio, memoryRatioExists := podRequests[apiext.ResourceGPUMemoryRatio]
 	// gpu share mode
 	if ok && gpuShare.Value() > 0 {
 		desiredCount = gpuShare.Value()
 	} else {
-		if gpuMemoryRatio.Value() > 100 && gpuMemoryRatio.Value()%100 == 0 {
+		if memoryRatioExists && gpuMemoryRatio.Value() > 100 && gpuMemoryRatio.Value()%100 == 0 {
 			desiredCount = gpuMemoryRatio.Value() / 100
 		}
 	}
 
 	if desiredCount > 1 {
-		requests = corev1.ResourceList{
-			apiext.ResourceGPUCore:        *resource.NewQuantity(gpuCore.Value()/desiredCount, resource.DecimalSI),
-			apiext.ResourceGPUMemory:      *resource.NewQuantity(gpuMem.Value()/desiredCount, resource.BinarySI),
-			apiext.ResourceGPUMemoryRatio: *resource.NewQuantity(gpuMemoryRatio.Value()/desiredCount, resource.DecimalSI),
+		requests = corev1.ResourceList{}
+		if coreExists {
+			requests[apiext.ResourceGPUCore] = *resource.NewQuantity(gpuCore.Value()/desiredCount, resource.DecimalSI)
+		}
+		if memoryRatioExists {
+			requests[apiext.ResourceGPUMemoryRatio] = *resource.NewQuantity(gpuMemoryRatio.Value()/desiredCount, resource.DecimalSI)
+		} else if gpuMem, memExists := podRequests[apiext.ResourceGPUMemory]; memExists {
+			requests[apiext.ResourceGPUMemory] = *resource.NewQuantity(gpuMem.Value()/desiredCount, resource.BinarySI)
 		}
 	}
 
 	return requests, int(desiredCount), nil
 }
 
-func fillGPUTotalMem(nodeDeviceTotal deviceResources, podRequest corev1.ResourceList) error {
-	// nodeDeviceTotal uses the minor of GPU as key. However, under certain circumstances,
-	// minor 0 might not exist. We need to iterate the cache once to find the active minor.
-	var total corev1.ResourceList
-	for _, resources := range nodeDeviceTotal {
-		if len(resources) > 0 && !quotav1.IsZero(resources) {
-			total = resources
-			break
-		}
+func fillGPUTotalMem(allocations apiext.DeviceAllocations, nodeDeviceInfo *nodeDevice) error {
+	gpuAllocations, ok := allocations[schedulingv1alpha1.GPU]
+	if !ok {
+		return nil
 	}
-	if total == nil {
-		return fmt.Errorf("no healthy GPU Devices")
+	gpuTotalDevices, ok := nodeDeviceInfo.deviceTotal[schedulingv1alpha1.GPU]
+	if !ok {
+		return nil
 	}
 
-	// a node can only contain one type of GPU, so each of them has the same total memory.
-	if gpuMem, ok := podRequest[apiext.ResourceGPUMemory]; ok {
-		podRequest[apiext.ResourceGPUMemoryRatio] = memoryBytesToRatio(gpuMem, total[apiext.ResourceGPUMemory])
-	} else {
-		gpuMemRatio := podRequest[apiext.ResourceGPUMemoryRatio]
-		podRequest[apiext.ResourceGPUMemory] = memoryRatioToBytes(gpuMemRatio, total[apiext.ResourceGPUMemory])
+	for i, allocation := range gpuAllocations {
+		gpuDevice, ok := gpuTotalDevices[int(allocation.Minor)]
+		if !ok || gpuDevice == nil || quotav1.IsZero(gpuDevice) {
+			return fmt.Errorf("no healthy gpu device with minor %d of allocation", allocation.Minor)
+		}
+		if gpuMem, ok := allocation.Resources[apiext.ResourceGPUMemory]; ok {
+			gpuAllocations[i].Resources[apiext.ResourceGPUMemoryRatio] = memoryBytesToRatio(gpuMem, gpuDevice[apiext.ResourceGPUMemory])
+		} else {
+			gpuMemRatio := allocation.Resources[apiext.ResourceGPUMemoryRatio]
+			gpuAllocations[i].Resources[apiext.ResourceGPUMemory] = memoryRatioToBytes(gpuMemRatio, gpuDevice[apiext.ResourceGPUMemory])
+		}
 	}
 	return nil
 }
diff --git a/pkg/scheduler/plugins/deviceshare/devicehandler_gpu_test.go b/pkg/scheduler/plugins/deviceshare/devicehandler_gpu_test.go
index 4816a0187..025dc75c2 100644
--- a/pkg/scheduler/plugins/deviceshare/devicehandler_gpu_test.go
+++ b/pkg/scheduler/plugins/deviceshare/devicehandler_gpu_test.go
@@ -24,77 +24,132 @@ import (
 	"k8s.io/apimachinery/pkg/api/resource"
 
 	apiext "github.com/koordinator-sh/koordinator/apis/extension"
+	schedulingv1alpha1 "github.com/koordinator-sh/koordinator/apis/scheduling/v1alpha1"
 )
 
 func Test_fillGPUTotalMem(t *testing.T) {
 	tests := []struct {
-		name           string
-		gpuTotal       deviceResources
-		podRequest     corev1.ResourceList
-		wantPodRequest corev1.ResourceList
-		wantErr        bool
+		name            string
+		allocations     apiext.DeviceAllocations
+		nodeDeviceInfo  *nodeDevice
+		wantAllocations apiext.DeviceAllocations
+		wantErr         bool
 	}{
 		{
 			name: "ratio to mem",
-			gpuTotal: deviceResources{
-				0: corev1.ResourceList{
-					apiext.ResourceGPUCore:        resource.MustParse("100"),
-					apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
-					apiext.ResourceGPUMemory:      resource.MustParse("32Gi"),
+			allocations: map[schedulingv1alpha1.DeviceType][]*apiext.DeviceAllocation{
+				schedulingv1alpha1.GPU: {
+					{
+						Minor: 0,
+						Resources: corev1.ResourceList{
+							apiext.ResourceGPUCore:        resource.MustParse("50"),
+							apiext.ResourceGPUMemoryRatio: resource.MustParse("50"),
+						},
+					},
 				},
 			},
-			podRequest: corev1.ResourceList{
-				apiext.ResourceGPUCore:        resource.MustParse("50"),
-				apiext.ResourceGPUMemoryRatio: resource.MustParse("50"),
+			nodeDeviceInfo: &nodeDevice{
+				deviceTotal: map[schedulingv1alpha1.DeviceType]deviceResources{
+					schedulingv1alpha1.GPU: {
+						0: corev1.ResourceList{
+							apiext.ResourceGPUCore:        resource.MustParse("100"),
+							apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
+							apiext.ResourceGPUMemory:      resource.MustParse("32Gi"),
+						},
+					},
+				},
 			},
-			wantPodRequest: corev1.ResourceList{
-				apiext.ResourceGPUCore:        resource.MustParse("50"),
-				apiext.ResourceGPUMemoryRatio: resource.MustParse("50"),
-				apiext.ResourceGPUMemory:      resource.MustParse("16Gi"),
+			wantAllocations: map[schedulingv1alpha1.DeviceType][]*apiext.DeviceAllocation{
+				schedulingv1alpha1.GPU: {
+					{
+						Minor: 0,
+						Resources: corev1.ResourceList{
+							apiext.ResourceGPUCore:        resource.MustParse("50"),
+							apiext.ResourceGPUMemoryRatio: resource.MustParse("50"),
+							apiext.ResourceGPUMemory:      resource.MustParse("16Gi"),
+						},
+					},
+				},
 			},
 		},
 		{
 			name: "mem to ratio",
-			gpuTotal: deviceResources{
-				0: corev1.ResourceList{
-					apiext.ResourceGPUCore:        resource.MustParse("100"),
-					apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
-					apiext.ResourceGPUMemory:      resource.MustParse("32Gi"),
+			allocations: map[schedulingv1alpha1.DeviceType][]*apiext.DeviceAllocation{
+				schedulingv1alpha1.GPU: {
+					{
+						Minor: 0,
+						Resources: corev1.ResourceList{
+							apiext.ResourceGPUCore:   resource.MustParse("50"),
+							apiext.ResourceGPUMemory: resource.MustParse("16Gi"),
+						},
+					},
 				},
 			},
-			podRequest: corev1.ResourceList{
-				apiext.ResourceGPUCore:   resource.MustParse("50"),
-				apiext.ResourceGPUMemory: resource.MustParse("16Gi"),
+			nodeDeviceInfo: &nodeDevice{
+				deviceTotal: map[schedulingv1alpha1.DeviceType]deviceResources{
+					schedulingv1alpha1.GPU: {
+						0: corev1.ResourceList{
+							apiext.ResourceGPUCore:        resource.MustParse("100"),
+							apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
+							apiext.ResourceGPUMemory:      resource.MustParse("32Gi"),
+						},
+					},
+				},
 			},
-			wantPodRequest: corev1.ResourceList{
-				apiext.ResourceGPUCore:        resource.MustParse("50"),
-				apiext.ResourceGPUMemoryRatio: *resource.NewQuantity(50, resource.DecimalSI),
-				apiext.ResourceGPUMemory:      resource.MustParse("16Gi"),
+			wantAllocations: map[schedulingv1alpha1.DeviceType][]*apiext.DeviceAllocation{
+				schedulingv1alpha1.GPU: {
+					{
+						Minor: 0,
+						Resources: corev1.ResourceList{
+							apiext.ResourceGPUCore:        resource.MustParse("50"),
+							apiext.ResourceGPUMemoryRatio: *resource.NewQuantity(50, resource.DecimalSI),
+							apiext.ResourceGPUMemory:      resource.MustParse("16Gi"),
+						},
+					},
+				},
 			},
 		},
 		{
 			name: "missing total",
-			gpuTotal: deviceResources{
-				0: corev1.ResourceList{},
+			allocations: map[schedulingv1alpha1.DeviceType][]*apiext.DeviceAllocation{
+				schedulingv1alpha1.GPU: {
+					{
+						Minor: 0,
+						Resources: corev1.ResourceList{
+							apiext.ResourceGPUCore:        resource.MustParse("50"),
+							apiext.ResourceGPUMemoryRatio: resource.MustParse("50"),
+						},
+					},
+				},
 			},
-			podRequest: corev1.ResourceList{
-				apiext.ResourceGPUCore:        resource.MustParse("50"),
-				apiext.ResourceGPUMemoryRatio: resource.MustParse("50"),
+			nodeDeviceInfo: &nodeDevice{
+				deviceTotal: map[schedulingv1alpha1.DeviceType]deviceResources{
+					schedulingv1alpha1.GPU: {
+						0: corev1.ResourceList{},
+					},
+				},
 			},
-			wantPodRequest: corev1.ResourceList{
-				apiext.ResourceGPUCore:        resource.MustParse("50"),
-				apiext.ResourceGPUMemoryRatio: resource.MustParse("50"),
+			wantAllocations: map[schedulingv1alpha1.DeviceType][]*apiext.DeviceAllocation{
+				schedulingv1alpha1.GPU: {
+					{
+						Minor: 0,
+						Resources: corev1.ResourceList{
+							apiext.ResourceGPUCore:        resource.MustParse("50"),
+							apiext.ResourceGPUMemoryRatio: resource.MustParse("50"),
+						},
+					},
+				},
 			},
 			wantErr: true,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			err := fillGPUTotalMem(tt.gpuTotal, tt.podRequest)
+			err := fillGPUTotalMem(tt.allocations, tt.nodeDeviceInfo)
 			if tt.wantErr != (err != nil) {
 				t.Errorf("wantErr %v but got %v", tt.wantErr, err != nil)
 			}
-			assert.Equal(t, tt.wantPodRequest, tt.podRequest)
+			assert.Equal(t, tt.wantAllocations, tt.allocations)
 		})
 	}
 }
diff --git a/pkg/scheduler/plugins/deviceshare/plugin.go b/pkg/scheduler/plugins/deviceshare/plugin.go
index 15d775e8f..7c82fa1e7 100644
--- a/pkg/scheduler/plugins/deviceshare/plugin.go
+++ b/pkg/scheduler/plugins/deviceshare/plugin.go
@@ -449,6 +449,10 @@ func (p *Plugin) Reserve(ctx context.Context, cycleState *framework.CycleState,
 			return status
 		}
 	}
+	err = fillGPUTotalMem(result, nodeDeviceInfo)
+	if err != nil {
+		return framework.AsStatus(err)
+	}
 	nodeDeviceInfo.updateCacheUsed(result, pod, true)
 	state.allocationResult = result
 	return nil
diff --git a/pkg/scheduler/plugins/deviceshare/reservation_test.go b/pkg/scheduler/plugins/deviceshare/reservation_test.go
index 0082ed665..dc2c637ca 100644
--- a/pkg/scheduler/plugins/deviceshare/reservation_test.go
+++ b/pkg/scheduler/plugins/deviceshare/reservation_test.go
@@ -687,6 +687,8 @@ func Test_tryAllocateFromReservation(t *testing.T) {
 				basicPreemptible,
 				tt.requiredFromReservation,
 			)
+			err := fillGPUTotalMem(result, nodeDeviceInfo)
+			assert.NoError(t, err)
 			assert.Equal(t, tt.wantStatus, status)
 			assert.Equal(t, tt.wantResult, result)
 		})
diff --git a/pkg/scheduler/plugins/deviceshare/topology_hint_test.go b/pkg/scheduler/plugins/deviceshare/topology_hint_test.go
index ff5f3d619..d85848dc7 100644
--- a/pkg/scheduler/plugins/deviceshare/topology_hint_test.go
+++ b/pkg/scheduler/plugins/deviceshare/topology_hint_test.go
@@ -72,11 +72,6 @@ func TestPlugin_GetPodTopologyHints(t *testing.T) {
 					{NUMANodeAffinity: newBitMask(1), Preferred: true},
 					{NUMANodeAffinity: newBitMask(0, 1), Preferred: false},
 				},
-				string(apiext.ResourceGPUMemory): {
-					{NUMANodeAffinity: newBitMask(0), Preferred: true},
-					{NUMANodeAffinity: newBitMask(1), Preferred: true},
-					{NUMANodeAffinity: newBitMask(0, 1), Preferred: false},
-				},
 				string(apiext.ResourceGPUMemoryRatio): {
 					{NUMANodeAffinity: newBitMask(0), Preferred: true},
 					{NUMANodeAffinity: newBitMask(1), Preferred: true},
@@ -110,10 +105,6 @@ func TestPlugin_GetPodTopologyHints(t *testing.T) {
 					{NUMANodeAffinity: newBitMask(1), Preferred: true},
 					{NUMANodeAffinity: newBitMask(0, 1), Preferred: false},
 				},
-				string(apiext.ResourceGPUMemory): {
-					{NUMANodeAffinity: newBitMask(1), Preferred: true},
-					{NUMANodeAffinity: newBitMask(0, 1), Preferred: false},
-				},
 				string(apiext.ResourceGPUMemoryRatio): {
 					{NUMANodeAffinity: newBitMask(1), Preferred: true},
 					{NUMANodeAffinity: newBitMask(0, 1), Preferred: false},
@@ -205,11 +196,6 @@ func TestPlugin_GetPodTopologyHints(t *testing.T) {
 					{NUMANodeAffinity: newBitMask(1), Preferred: true},
 					{NUMANodeAffinity: newBitMask(0, 1), Preferred: false},
 				},
-				string(apiext.ResourceGPUMemory): {
-					{NUMANodeAffinity: newBitMask(0), Preferred: true},
-					{NUMANodeAffinity: newBitMask(1), Preferred: true},
-					{NUMANodeAffinity: newBitMask(0, 1), Preferred: false},
-				},
 				string(apiext.ResourceGPUMemoryRatio): {
 					{NUMANodeAffinity: newBitMask(0), Preferred: true},
 					{NUMANodeAffinity: newBitMask(1), Preferred: true},