Skip to content
This repository has been archived by the owner on Apr 18, 2024. It is now read-only.

Commit

Permalink
fix: DefaultSaturnCarRequestTimeout
Browse files Browse the repository at this point in the history
19s is not enough for fetching CAR stream of unknown length,
every bigger request was failing.

If we need to pick some ceiling, 30m sound like a good starting point
(this is when CAR stream got timeouted on the old ipfs.io).
  • Loading branch information
lidel committed Mar 29, 2023
1 parent b4cc708 commit 5b37545
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 9 deletions.
14 changes: 11 additions & 3 deletions caboose.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,19 @@ type Config struct {
MaxNCoolOff int
}

const DefaultLoggingInterval = 5 * time.Second
const DefaultSaturnLoggerRequestTimeout = 1 * time.Minute

const DefaultSaturnOrchestratorRequestTimeout = 30 * time.Second

const DefaultSaturnBlockRequestTimeout = 19 * time.Second
const DefaultSaturnCarRequestTimeout = 30 * time.Minute

const DefaultMaxRetries = 3
const DefaultPoolFailureDownvoteDebounce = 1 * time.Minute
const DefaultPoolMembershipDebounce = 3 * DefaultPoolRefreshInterval
const DefaultPoolLowWatermark = 5
const DefaultSaturnRequestTimeout = 19 * time.Second

const maxBlockSize = 4194305 // 4 Mib + 1 byte
const DefaultOrchestratorEndpoint = "https://orchestrator.strn.pl/nodes/nearby?count=1000"
const DefaultPoolRefreshInterval = 5 * time.Minute
Expand Down Expand Up @@ -122,7 +130,7 @@ type ErrCoolDown struct {
}

func (e *ErrCoolDown) Error() string {
return fmt.Sprintf("multiple saturn retrieval failures seen for CID %s/Path %s, please retry after %s", e.Cid, e.Path, humanRetry(e.retryAfter))
return fmt.Sprintf("multiple saturn retrieval failures seen for CID %q / Path %q, please retry after %s", e.Cid, e.Path, humanRetry(e.retryAfter))
}

func (e *ErrCoolDown) RetryAfter() time.Duration {
Expand Down Expand Up @@ -188,7 +196,7 @@ func NewCaboose(config *Config) (*Caboose, error) {

if c.config.SaturnClient == nil {
c.config.SaturnClient = &http.Client{
Timeout: DefaultSaturnRequestTimeout,
Timeout: DefaultSaturnCarRequestTimeout,
}
}
if c.config.OrchestratorEndpoint == nil {
Expand Down
6 changes: 3 additions & 3 deletions cmd/caboose/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,15 @@ func main1() int {

cb, err := caboose.NewCaboose(&caboose.Config{
OrchestratorClient: &http.Client{
Timeout: 30 * time.Second,
Timeout: caboose.DefaultSaturnOrchestratorRequestTimeout,
},

LoggingEndpoint: *le,
LoggingClient: http.DefaultClient,
LoggingInterval: 5 * time.Second,
LoggingInterval: caboose.DefaultLoggingInterval,

DoValidation: true,
PoolRefresh: 5 * time.Minute,
PoolRefresh: caboose.DefaultPoolRefreshInterval,
SaturnClient: &saturnClient,
})
if err != nil {
Expand Down
20 changes: 17 additions & 3 deletions fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ func (p *pool) fetchResource(ctx context.Context, from string, resource string,
isCacheHit := false
networkError := ""

isBlockRequest := false
if mime == "application/vnd.ipld.raw" {
isBlockRequest = true
}

defer func() {
var ttfbMs int64
durationSecs := time.Since(start).Seconds()
Expand All @@ -92,15 +97,15 @@ func (p *pool) fetchResource(ctx context.Context, from string, resource string,
ttfbMs = fb.Sub(start).Milliseconds()
fetchTTFBPerBlockPerPeerSuccessMetric.Observe(float64(ttfbMs))
// track individual block metrics separately
if mime == "application/vnd.ipld.raw" {
if isBlockRequest {
fetchDurationPerBlockPerPeerSuccessMetric.Observe(float64(response_success_end.Sub(start).Milliseconds()))
} else {
fetchDurationPerCarPerPeerSuccessMetric.Observe(float64(response_success_end.Sub(start).Milliseconds()))
}
fetchSpeedPerBlockPerPeerMetric.Observe(float64(received) / float64(durationMs))
} else {
fetchTTFBPerBlockPerPeerFailureMetric.Observe(float64(ttfbMs))
if mime == "application/vnd.ipld.raw" {
if isBlockRequest {
fetchDurationPerBlockPerPeerFailureMetric.Observe(float64(time.Since(start).Milliseconds()))
} else {
fetchDurationPerCarPerPeerFailureMetric.Observe(float64(time.Since(start).Milliseconds()))
Expand Down Expand Up @@ -145,7 +150,16 @@ func (p *pool) fetchResource(ctx context.Context, from string, resource string,
}
}()

reqCtx, cancel := context.WithTimeout(ctx, DefaultSaturnRequestTimeout)
// TODO: Ideally, we would have additional "PerRequestInactivityTimeout"
// which is the amount of time without any NEW data from the server, but
// that can be added later. We need both because a slow trickle of data
// could take a large amount of time.
requestTimeout := DefaultSaturnCarRequestTimeout
if isBlockRequest {
requestTimeout = DefaultSaturnBlockRequestTimeout
}

reqCtx, cancel := context.WithTimeout(ctx, requestTimeout)
defer cancel()
req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, reqUrl, nil)
if err != nil {
Expand Down

0 comments on commit 5b37545

Please sign in to comment.