Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: sync master to develop #1426

Merged
merged 11 commits into from
Jun 18, 2024
20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,25 @@
# Changelog

## v1.8.0
BUGFIXES
* [#1404](https://github.com/bnb-chain/greenfield-storage-provider/pull/1404) fix: use exponential backoff for task retry policy
* [#1406](https://github.com/bnb-chain/greenfield-storage-provider/pull/1406) fix: change replicate failure error code to 404 when object is not found
* [#1407](https://github.com/bnb-chain/greenfield-storage-provider/pull/1407) fix: refine error code for bucket migration
* [#1409](https://github.com/bnb-chain/greenfield-storage-provider/pull/1409) fix: avoid users to select network in universal endpoint page
* [#1412](https://github.com/bnb-chain/greenfield-storage-provider/pull/1412) fix: bs bucket migrate event bug
* [#1414](https://github.com/bnb-chain/greenfield-storage-provider/pull/1414) fix: metric check sp
* [#1415](https://github.com/bnb-chain/greenfield-storage-provider/pull/1415) fix: check sp health retry
* [#1422](https://github.com/bnb-chain/greenfield-storage-provider/pull/1422) fix: gvg staking storage size

FEATURES
* [#1405](https://github.com/bnb-chain/greenfield-storage-provider/pull/1405) feat: bs add bucket status field
* [#1408](https://github.com/bnb-chain/greenfield-storage-provider/pull/1408) feat: add detailed logs for special customized logs
* [#1416](https://github.com/bnb-chain/greenfield-storage-provider/pull/1416) perf: logic

DOCS
* [#1402](https://github.com/bnb-chain/greenfield-storage-provider/pull/1402) docs: move SP cmd/module/api docs docs.bnbchain.org to sp repo
* [#1417](https://github.com/bnb-chain/greenfield-storage-provider/pull/1417) fix:readme

## v1.7.0
BUGFIXES
* [#1394](https://github.com/bnb-chain/greenfield-storage-provider/pull/1394) fix: pick new gvg when retry failed replicate piece task
Expand Down
63 changes: 41 additions & 22 deletions base/gfspvgmgr/virtual_group_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"math/rand"
"net/http"
"sort"
"strconv"
"strings"
"sync"
"time"
Expand All @@ -19,6 +20,7 @@ import (
"github.com/bnb-chain/greenfield-storage-provider/core/consensus"
"github.com/bnb-chain/greenfield-storage-provider/core/vgmgr"
"github.com/bnb-chain/greenfield-storage-provider/pkg/log"
"github.com/bnb-chain/greenfield-storage-provider/pkg/metrics"
"github.com/bnb-chain/greenfield-storage-provider/util"
sptypes "github.com/bnb-chain/greenfield/x/sp/types"
virtualgrouptypes "github.com/bnb-chain/greenfield/x/virtualgroup/types"
Expand All @@ -30,12 +32,14 @@ const (
VirtualGroupManagerSpace = "VirtualGroupManager"
RefreshMetaInterval = 5 * time.Second
MaxStorageUsageRatio = 0.95
DefaultInitialGVGStakingStorageSize = uint64(1) * 1024 * 1024 * 1024 * 1024 // 1TB per GVG, chain side DefaultMaxStoreSizePerFamily is 64 TB
additionalGVGStakingStorageSize = uint64(1) * 1024 * 1024 * 1024 * 512 // 0.5TB
DefaultInitialGVGStakingStorageSize = uint64(1) * 1024 * 1024 * 1024 * 256 // 256GB per GVG, chain side DefaultMaxStoreSizePerFamily is 64 TB
additionalGVGStakingStorageSize = uint64(1) * 1024 * 1024 * 1024 * 512 // 0.5TB

defaultSPCheckTimeout = 3 * time.Second
defaultSPHealthCheckerInterval = 10 * time.Second
httpStatusPath = "/status"
defaultSPCheckTimeout = 1 * time.Minute
defaultSPHealthCheckerInterval = 10 * time.Second
defaultSPHealthCheckerRetryInterval = 1 * time.Second
defaultSPHealthCheckerMaxRetries = 5
httpStatusPath = "/status"

emptyGVGSafeDeletePeriod = int64(60) * 60 * 24
)
Expand Down Expand Up @@ -525,6 +529,10 @@ func (vgm *virtualGroupManager) FreezeSPAndGVGs(spID uint32, gvgs []*virtualgrou
vgm.freezeSPPool.FreezeSPAndGVGs(spID, gvgs)
}

func (vgm *virtualGroupManager) ReleaseAllSP() {
vgm.freezeSPPool.ReleaseAllSP()
}

// releaseSPAndGVGLoop runs periodically to release SP from the freeze pool
func (vgm *virtualGroupManager) releaseSPAndGVGLoop() {
ticker := time.NewTicker(ReleaseSPJobInterval)
Expand Down Expand Up @@ -773,7 +781,7 @@ func (checker *HealthChecker) checkAllSPHealth() {

func (checker *HealthChecker) checkSPHealth(sp *sptypes.StorageProvider) bool {
if !sp.IsInService() {
log.CtxInfow(context.Background(), "the sp is not in service,sp is treated as unhealthy", "sp", sp)
log.CtxInfow(context.Background(), "the sp is not in service, sp is treated as unhealthy", "sp", sp)
return false
}

Expand All @@ -785,30 +793,41 @@ func (checker *HealthChecker) checkSPHealth(sp *sptypes.StorageProvider) bool {
Transport: &http.Transport{
TLSClientConfig: &tls.Config{MinVersion: tls.VersionTLS12},
},
Timeout: defaultSPCheckTimeout * time.Second,
Timeout: defaultSPCheckTimeout,
}

// Create an HTTP request to test the validity of the endpoint
urlToCheck := fmt.Sprintf("%s%s", endpoint, httpStatusPath)
req, err := http.NewRequestWithContext(ctxTimeout, http.MethodGet, urlToCheck, nil)
if err != nil {
return false
}
for attempt := 0; attempt < defaultSPHealthCheckerMaxRetries; attempt++ {
start := time.Now()
req, err := http.NewRequestWithContext(ctxTimeout, http.MethodGet, urlToCheck, nil)
if err != nil {
log.CtxErrorw(context.Background(), "failed to create request", "sp", sp, "error", err)
return false
}

resp, err := client.Do(req)
if err != nil {
log.CtxErrorw(context.Background(), "failed to connect to sp", "sp", sp, "error", err)
return false
}
defer resp.Body.Close()
resp, err := client.Do(req)
duration := time.Since(start)
metrics.SPHealthCheckerTime.WithLabelValues(strconv.Itoa(int(sp.Id))).Observe(duration.Seconds())
if err != nil {
log.CtxErrorw(context.Background(), "failed to connect to sp", "sp", sp, "error", err, "duration", duration)
time.Sleep(defaultSPHealthCheckerRetryInterval)
continue
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
log.CtxErrorw(context.Background(), "failed to check sp healthy", "sp", sp, "http_status_code", resp.StatusCode, "resp_body", resp.Body)
return false
if resp.StatusCode == http.StatusOK {
log.CtxInfow(context.Background(), "succeed to check the sp healthy", "sp", sp, "duration", duration)
return true
} else {
metrics.SPHealthCheckerFailureCounter.WithLabelValues(strconv.Itoa(int(sp.Id))).Inc()
log.CtxErrorw(context.Background(), "failed to check sp healthy", "sp", sp, "http_status_code", resp.StatusCode, "duration", duration)
time.Sleep(defaultSPHealthCheckerRetryInterval)
}
}

log.CtxInfow(context.Background(), "succeed to check the sp healthy", "sp", sp)
return true
log.CtxErrorw(context.Background(), "failed to check sp healthy after retries", "sp", sp)
return false
}

func (checker *HealthChecker) Start() {
Expand Down
3 changes: 3 additions & 0 deletions core/vgmgr/virtual_group_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ type VirtualGroupManager interface {
// For those SPs which are in the pool will be skipped when creating a GVG, GVGs in the pool will not be chosen to seal Object
// until released
FreezeSPAndGVGs(spID uint32, gvgs []*virtualgrouptypes.GlobalVirtualGroup)
// ReleaseAllSP release all sp and their related GVG, in case that there is no enough balance to create a new GVG.
// should use the exisiting GVG even it failed to serve previously.
ReleaseAllSP()
}

// NewVirtualGroupManager is the virtual group manager init api.
Expand Down
12 changes: 12 additions & 0 deletions core/vgmgr/virtual_group_manager_mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ require (
github.com/aliyun/credentials-go v1.3.0
github.com/avast/retry-go/v4 v4.3.1
github.com/aws/aws-sdk-go v1.44.159
github.com/bnb-chain/greenfield v1.7.1-0.20240521062200-cc41c389096c
github.com/bnb-chain/greenfield v1.8.0
github.com/bnb-chain/greenfield-common/go v0.0.0-20240228080631-2683b0ee669a
github.com/bytedance/gopkg v0.0.0-20221122125632-68358b8ecec6
github.com/cometbft/cometbft v0.38.6
Expand Down Expand Up @@ -306,7 +306,7 @@ replace (
github.com/cometbft/cometbft => github.com/bnb-chain/greenfield-cometbft v1.2.1-0.20240408033601-a6b682aa870e
github.com/cometbft/cometbft-db => github.com/bnb-chain/greenfield-cometbft-db v0.8.1-alpha.1
github.com/confio/ics23/go => github.com/cosmos/cosmos-sdk/ics23/go v0.8.0
github.com/cosmos/cosmos-sdk => github.com/bnb-chain/greenfield-cosmos-sdk v1.6.1-0.20240419024340-b5c75cfd8110
github.com/cosmos/cosmos-sdk => github.com/bnb-chain/greenfield-cosmos-sdk v1.8.0
github.com/cosmos/iavl => github.com/bnb-chain/greenfield-iavl v0.20.1
github.com/forbole/juno/v4 => github.com/bnb-chain/juno/v4 v4.0.0-20240604033531-028f2cc8f76d
github.com/gogo/protobuf => github.com/regen-network/protobuf v1.3.3-alpha.regen.1
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -176,16 +176,16 @@ github.com/bgentry/speakeasy v0.1.1-0.20220910012023-760eaf8b6816/go.mod h1:+zsy
github.com/bits-and-blooms/bitset v1.10.0 h1:ePXTeiPEazB5+opbv5fr8umg2R/1NlzgDsyepwsSr88=
github.com/bits-and-blooms/bitset v1.10.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/bmizerany/pat v0.0.0-20170815010413-6226ea591a40/go.mod h1:8rLXio+WjiTceGBHIoTvn60HIbs7Hm7bcHjyrSqYB9c=
github.com/bnb-chain/greenfield v1.7.1-0.20240521062200-cc41c389096c h1:azKKvzAC/yLthneXB1sH6iPwVVj7sRK8X58PuTU/TsM=
github.com/bnb-chain/greenfield v1.7.1-0.20240521062200-cc41c389096c/go.mod h1:RkoY1ISUFUMNbw2iR7iX8M+ToqmB8AlcRFKpwCJEX3Q=
github.com/bnb-chain/greenfield v1.8.0 h1:5E8GQF9bS+ltG2PVXOiDXr6zBNL8aG/QhWYp7C2EgDM=
github.com/bnb-chain/greenfield v1.8.0/go.mod h1:R4itO5Q7d5wj0L9sAXpbrVZMUrdDyRtSjccW8XOEFvI=
github.com/bnb-chain/greenfield-cometbft v1.2.1-0.20240408033601-a6b682aa870e h1:4ttDy8yBhBUW0gdFyBK0wHMJS5ZtlhBdoYx/O6T6Eqg=
github.com/bnb-chain/greenfield-cometbft v1.2.1-0.20240408033601-a6b682aa870e/go.mod h1:q9/nqW19iXvxyma5XgcZfxL/OkWI9s5e7yX9ecePz8A=
github.com/bnb-chain/greenfield-cometbft-db v0.8.1-alpha.1 h1:XcWulGacHVRiSCx90Q8Y//ajOrLNBQWR/KDB89dy3cU=
github.com/bnb-chain/greenfield-cometbft-db v0.8.1-alpha.1/go.mod h1:ey1CiK4bYo1RBNJLRiVbYr5CMdSxci9S/AZRINLtppI=
github.com/bnb-chain/greenfield-common/go v0.0.0-20240228080631-2683b0ee669a h1:VjUknQkIcqkjYCt1hmfpinM7kToOBuUU+KykrrqFsEM=
github.com/bnb-chain/greenfield-common/go v0.0.0-20240228080631-2683b0ee669a/go.mod h1:K9jK80fbahciC+FAvrch8Qsbw9ZkvVgjfKsqrzPTAVA=
github.com/bnb-chain/greenfield-cosmos-sdk v1.6.1-0.20240419024340-b5c75cfd8110 h1:max1dH2HkKrNZpL2Jv6xwl+XWHsjJC6Ay+caN17u3CI=
github.com/bnb-chain/greenfield-cosmos-sdk v1.6.1-0.20240419024340-b5c75cfd8110/go.mod h1:siglWrVkM1+6tj8ZPwzMIITWQh7D8gsKJUk0Suz+ul0=
github.com/bnb-chain/greenfield-cosmos-sdk v1.8.0 h1:XaHBYnlAJNIEVTr9dXp3jzw12gCoIEL5jHiAMp+PX0s=
github.com/bnb-chain/greenfield-cosmos-sdk v1.8.0/go.mod h1:2bwmwdXYBISnQoMwgAcZTVGt21lMsHZSeeeMByTvDlQ=
github.com/bnb-chain/greenfield-cosmos-sdk/api v0.0.0-20231206043955-0855e0965bc8 h1:mUMOeNo3K0SZvAhiOHNKW4mmkrhOphBF8tDUyK6e1tY=
github.com/bnb-chain/greenfield-cosmos-sdk/api v0.0.0-20231206043955-0855e0965bc8/go.mod h1:vhsZxXE9tYJeYB5JR4hPhd6Pc/uPf7j1T8IJ7p9FdeM=
github.com/bnb-chain/greenfield-cosmos-sdk/math v0.0.0-20231206043955-0855e0965bc8 h1:1Ud7itq03c4Q9h0kBpw1FYlWKN3kco8cgj59vdd50UQ=
Expand Down
2 changes: 2 additions & 0 deletions modular/manager/manage_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ func (m *ManageModular) pickGVGAndReplicate(ctx context.Context, vgfID uint32, t
gvgMeta, err := m.pickGlobalVirtualGroup(ctx, vgfID, task.GetStorageParams())
log.CtxInfow(ctx, "pick global virtual group", "time_cost", time.Since(startPickGVGTime).Seconds(), "gvg_meta", gvgMeta, "error", err)
if err != nil {
// If there is no way to create a new GVG, release all sp from freeze Pool, better than not serving requests.
m.virtualGroupManager.ReleaseAllSP()
return err
}
replicateTask := &gfsptask.GfSpReplicatePieceTask{}
Expand Down
16 changes: 16 additions & 0 deletions pkg/metrics/metric_items.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ var MetricsItems = []prometheus.Collector{
ManagerCounter,
ManagerTime,
GCBlockNumberGauge,
SPHealthCheckerTime,
SPHealthCheckerFailureCounter,

// workflow metrics category
PerfApprovalTime,
Expand Down Expand Up @@ -247,6 +249,20 @@ var (
Name: "gc_block_number",
Help: "Track the next gc block number.",
}, []string{"gc_block_number"})
SPHealthCheckerTime = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "sp_health_checker_request_time",
Help: "Request duration in seconds.",
},
[]string{"sp_id"},
)
SPHealthCheckerFailureCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "sp_health_checker_request_counter",
Help: "Request failure count.",
},
[]string{"sp_id"},
)
)

// workflow metrics items
Expand Down
2 changes: 1 addition & 1 deletion store/sqldb/object_integrity.go
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ func (s *SpDBImpl) DeleteReplicatePieceChecksum(objectID uint64, segmentIdx uint
metrics.SPDBTime.WithLabelValues(SPDBSuccessDelReplicatePieceChecksum).Observe(
time.Since(startTime).Seconds())
}()
err = s.db.Where("object_id = ? and segment_idx = ? and redundancy_index = ? ", objectID, segmentIdx, redundancyIdx).Delete(PieceHashTable{}).Error
err = s.db.Where("object_id = ? and segment_index = ? and redundancy_index = ? ", objectID, segmentIdx, redundancyIdx).Delete(PieceHashTable{}).Error
return err
}

Expand Down
6 changes: 3 additions & 3 deletions store/sqldb/object_integrity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ func TestSpDBImpl_DeleteReplicatePieceChecksumSuccess(t *testing.T) {
)
s, mock := setupDB(t)
mock.ExpectBegin()
mock.ExpectExec("DELETE FROM `piece_hash` WHERE object_id = ? and segment_idx = ? and redundancy_index = ?").
mock.ExpectExec("DELETE FROM `piece_hash` WHERE object_id = ? and segment_index = ? and redundancy_index = ?").
WithArgs(objectID, segmentIdx, redundancyIdx).WillReturnResult(sqlmock.NewResult(1, 1))
mock.ExpectCommit()
err := s.DeleteReplicatePieceChecksum(objectID, segmentIdx, redundancyIdx)
Expand Down Expand Up @@ -546,7 +546,7 @@ func TestSpDBImpl_DeleteAllReplicatePieceChecksumSuccess(t *testing.T) {
)
s, mock := setupDB(t)
mock.ExpectBegin()
mock.ExpectExec("DELETE FROM `piece_hash` WHERE object_id = ? and segment_idx = ? and redundancy_index = ?").
mock.ExpectExec("DELETE FROM `piece_hash` WHERE object_id = ? and segment_index = ? and redundancy_index = ?").
WithArgs(objectID, segmentIdx, redundancyIdx).WillReturnResult(sqlmock.NewResult(1, 1))
mock.ExpectCommit()
err := s.DeleteAllReplicatePieceChecksum(objectID, redundancyIdx, pieceCount)
Expand All @@ -562,7 +562,7 @@ func TestSpDBImpl_DeleteAllReplicatePieceChecksumFailure(t *testing.T) {
)
s, mock := setupDB(t)
mock.ExpectBegin()
mock.ExpectExec("DELETE FROM `piece_hash` WHERE object_id = ? and segment_idx = ? and redundancy_index = ?").
mock.ExpectExec("DELETE FROM `piece_hash` WHERE object_id = ? and segment_index = ? and redundancy_index = ?").
WithArgs(objectID, segmentIdx, redundancyIdx).WillReturnError(mockDBInternalError)
mock.ExpectRollback()
mock.ExpectCommit()
Expand Down
Loading