Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

debuginfo: Improve debuginfo quality checking using metadata #1157

Merged
merged 17 commits into from
Jun 30, 2022
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion deploy/lib/parca/parca.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,8 @@ function(params) {
else ['--cors-allowed-origins=' + prc.config.corsAllowedOrigins]) +
(if prc.config.storageRetentionTime == '' then []
else ['--storage-tsdb-retention-time=' + prc.config.storageRetentionTime]) +
['--debug-infod-upstream-servers=' + std.join(',', prc.config.debugInfodUpstreamServers)] +
(if std.length(prc.config.debugInfodUpstreamServers) <= 0 then []
else ['--debug-infod-upstream-servers=' + std.join(',', prc.config.debugInfodUpstreamServers)]) +
(if prc.config.debugInfodHTTPRequestTimeout == '' then []
else ['--debug-infod-http-request-timeout=' + prc.config.debugInfodHTTPRequestTimeout]),
ports: [
Expand Down
2 changes: 1 addition & 1 deletion gen/proto/go/parca/debuginfo/v1alpha1/debuginfo.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
},
"hash": {
"type": "string",
"title": "hash is the hash of the debug information file"
"title": "hash is the hash of the source file that debug information extracted from"
}
},
"title": "UploadInfo contains the build_id and other metadata for the debug data"
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ require (
github.com/grpc-ecosystem/grpc-gateway/v2 v2.10.3
github.com/ianlancetaylor/demangle v0.0.0-20220319035150-800ac71e25c2
github.com/improbable-eng/grpc-web v0.15.0
github.com/nanmu42/limitio v1.0.0
github.com/oklog/run v1.1.0
github.com/polarsignals/frostdb v0.0.0-20220627125235-571b4dc57775
github.com/prometheus/client_golang v1.12.2
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,8 @@ github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mwitkow/grpc-proxy v0.0.0-20181017164139-0f1106ef9c76/go.mod h1:x5OoJHDHqxHS801UIuhqGl6QdSAEJvtausosHSdazIo=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
github.com/nanmu42/limitio v1.0.0 h1:dpopBYPwUyLOPv+vsGja0iax+dG0SP9paTEmz+Sy7KU=
github.com/nanmu42/limitio v1.0.0/go.mod h1:8H40zQ7pqxzbwZ9jxsK2hDoE06TH5ziybtApt1io8So=
github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg=
github.com/nats-io/jwt v0.3.2/go.mod h1:/euKqTS1ZD+zzjYrY7pseZrTtWQSjujC7xjPc8wL6eU=
github.com/nats-io/nats-server/v2 v2.1.2/go.mod h1:Afk+wRZqkMQs/p45uXdrVLuab3gwv3Z8C4HTBu8GD/k=
Expand Down
19 changes: 10 additions & 9 deletions pkg/debuginfo/debuginfod.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ func (c *DebugInfodClientObjectStorageCache) GetDebugInfo(ctx context.Context, b
defer w.Close()
defer debugInfo.Close()

// TODO(kakkoyun): Use store.upload() to upload the debug info to object storage.
if err := c.bucket.Upload(ctx, objectPath(buildID), r); err != nil {
level.Error(logger).Log("msg", "failed to upload downloaded debuginfod file", "err", err)
}
Expand All @@ -142,15 +143,15 @@ func (c *HTTPDebugInfodClient) GetDebugInfo(ctx context.Context, buildID string)
logger := log.With(c.logger, "buildid", buildID)

// e.g:
//"https://debuginfod.elfutils.org/"
//"https://debuginfod.systemtap.org/"
//"https://debuginfod.opensuse.org/"
//"https://debuginfod.s.voidlinux.org/"
//"https://debuginfod.debian.net/"
//"https://debuginfod.fedoraproject.org/"
//"https://debuginfod.altlinux.org/"
//"https://debuginfod.archlinux.org/"
//"https://debuginfod.centos.org/"
// "https://debuginfod.elfutils.org/"
// "https://debuginfod.systemtap.org/"
// "https://debuginfod.opensuse.org/"
// "https://debuginfod.s.voidlinux.org/"
// "https://debuginfod.debian.net/"
// "https://debuginfod.fedoraproject.org/"
// "https://debuginfod.altlinux.org/"
// "https://debuginfod.archlinux.org/"
// "https://debuginfod.centos.org/"
for _, u := range c.UpstreamServers {
serverURL := *u
rc, err := func(serverURL url.URL) (io.ReadCloser, error) {
Expand Down
168 changes: 96 additions & 72 deletions pkg/debuginfo/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"context"
"encoding/json"
"errors"
"fmt"
"path"
"time"

Expand All @@ -27,36 +28,37 @@ import (
)

var (
ErrMetadataShouldExist = errors.New("debug info metadata should exist")
ErrMetadataExpectedStateUploading = errors.New("debug info metadata state should be uploading")
ErrMetadataUnknownState = errors.New("debug info metadata state is unknown")
ErrMetadataShouldExist = errors.New("debug info metadata should exist")
ErrMetadataUnexpectedState = errors.New("debug info metadata state is unexpected")
// There's no debug info metadata. This could mean that an older version
// uploaded the debug info files, but there's no record of the metadata, yet.
ErrMetadataNotFound = errors.New("debug info metadata not found")
)

type metadataState int64

const (
metadataStateUnknown metadataState = iota
// There's no debug info metadata. This could mean that an older version
// uploaded the debug info files, but there's no record of the metadata, yet.
metadataStateEmpty
kakkoyun marked this conversation as resolved.
Show resolved Hide resolved
// The debug info file is being uploaded.
metadataStateUploading
// The debug info file is fully uploaded.
metadataStateUploaded
// The debug info file is corrupted.
metadataStateCorrupted
)

var mdStateStr = map[metadataState]string{
metadataStateUnknown: "METADATA_STATE_UNKNOWN",
metadataStateEmpty: "METADATA_STATE_EMPTY",
metadataStateUploading: "METADATA_STATE_UPLOADING",
metadataStateUploaded: "METADATA_STATE_UPLOADED",
metadataStateCorrupted: "METADATA_STATE_CORRUPTED",
}

var strMdState = map[string]metadataState{
"METADATA_STATE_UNKNOWN": metadataStateUnknown,
"METADATA_STATE_EMPTY": metadataStateEmpty,
"METADATA_STATE_UPLOADING": metadataStateUploading,
"METADATA_STATE_UPLOADED": metadataStateUploaded,
"METADATA_STATE_CORRUPTED": metadataStateCorrupted,
}

func (m metadataState) String() string {
Expand Down Expand Up @@ -96,98 +98,120 @@ func newMetadataManager(logger log.Logger, bucket objstore.Bucket) *metadataMana

type metadata struct {
State metadataState `json:"state"`
StartedUploadAt int64 `json:"started_upload_at"`
FinishedUploadAt int64 `json:"finished_upload_at"`
BuildID string `json:"build_id"`
kakkoyun marked this conversation as resolved.
Show resolved Hide resolved
Hash string `json:"hash"`
kakkoyun marked this conversation as resolved.
Show resolved Hide resolved
UploadStartedAt int64 `json:"upload_started_at"`
kakkoyun marked this conversation as resolved.
Show resolved Hide resolved
UploadFinishedAt int64 `json:"upload_finished_at"`
}

func (m *metadataManager) update(ctx context.Context, buildID string, state metadataState) error {
kakkoyun marked this conversation as resolved.
Show resolved Hide resolved
level.Debug(m.logger).Log("msg", "attempting state update to", "state", state)

switch state {
case metadataStateUploading:
_, err := m.bucket.Get(ctx, metadataObjectPath(buildID))
// The metadata file should not exist yet. Not erroring here because there's
// room for a race condition.
if err == nil {
level.Info(m.logger).Log("msg", "there should not be a metadata file")
return nil
}
func (m *metadataManager) markAsCorrupted(ctx context.Context, buildID string) error {
if err := m.write(ctx, buildID, &metadata{
State: metadataStateCorrupted,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't this override the other metadata, such as the upload_started_at timestamp, as we call it after metadataManager.uploading?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. When we have the corrupted state, we don't care about any of the metadata or the debug file itself. We just get rid of it very next time we had the chance.

Which type of data do you think would be useful in this state?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's fair, it's definitely not a big deal. I was thinking of being able to inspect the files for patterns and correlations, such as trying to see if more corrupted files happen after a certain time (imagine, a new release, or if the server is overloaded, etc).

But I guess that as you said, the window for inspecting such files is small as they'll most likely be overwritten later on.

We can always use some Prometheus counters to understand the failures and find correlations!

}); err != nil {
return fmt.Errorf("failed to write metadata: %w", err)
}
level.Debug(m.logger).Log("msg", "marked as corrupted", "buildid", buildID)
return nil
}

if !m.bucket.IsObjNotFoundErr(err) {
level.Error(m.logger).Log("msg", "unexpected error", "err", err)
return err
}
func (m *metadataManager) markAsUploading(ctx context.Context, buildID string) error {
_, err := m.bucket.Get(ctx, metadataObjectPath(buildID))
// The metadata file should not exist yet. Not erroring here because there's
// room for a race condition.
if err == nil {
level.Info(m.logger).Log("msg", "there should not be a metadata file")
return nil
}

// Let's write the metadata.
metadataBytes, _ := json.MarshalIndent(&metadata{
State: metadataStateUploading,
StartedUploadAt: time.Now().Unix(),
}, "", "\t")
r := bytes.NewReader(metadataBytes)
if err := m.bucket.Upload(ctx, metadataObjectPath(buildID), r); err != nil {
level.Error(m.logger).Log("msg", "failed to create metadata file", "err", err)
return err
}
if !m.bucket.IsObjNotFoundErr(err) {
level.Error(m.logger).Log("msg", "unexpected error", "err", err)
return err
}

case metadataStateUploaded:
r, err := m.bucket.Get(ctx, metadataObjectPath(buildID))
if err != nil {
level.Error(m.logger).Log("msg", "expected metadata file", "err", err)
return ErrMetadataShouldExist
}
buf := new(bytes.Buffer)
_, err = buf.ReadFrom(r)
if err != nil {
level.Error(m.logger).Log("msg", "ReadFrom failed", "err", err)
return err
}
if err := m.write(ctx, buildID, &metadata{
State: metadataStateUploading,
BuildID: buildID,
UploadStartedAt: time.Now().Unix(),
}); err != nil {
return fmt.Errorf("failed to write metadata: %w", err)
}

metaData := &metadata{}
level.Debug(m.logger).Log("msg", "marked as uploading", "buildid", buildID)
return nil
}

if err := json.Unmarshal(buf.Bytes(), metaData); err != nil {
level.Error(m.logger).Log("msg", "parsing JSON metadata failed", "err", err)
return err
}
func (m *metadataManager) markAsUploaded(ctx context.Context, buildID, hash string) error {
r, err := m.bucket.Get(ctx, metadataObjectPath(buildID))
if err != nil {
level.Error(m.logger).Log("msg", "expected metadata file", "err", err)
return ErrMetadataShouldExist
}
buf := new(bytes.Buffer)
_, err = buf.ReadFrom(r)
if err != nil {
return err
}

// There's a small window where a race could happen.
if metaData.State == metadataStateUploaded {
return nil
}
metaData := &metadata{}
if err := json.Unmarshal(buf.Bytes(), metaData); err != nil {
return err
}

if metaData.State != metadataStateUploading {
return ErrMetadataExpectedStateUploading
}
// There's a small window where a race could happen.
if metaData.State == metadataStateUploaded {
return nil
}

metaData.State = metadataStateUploaded
metaData.FinishedUploadAt = time.Now().Unix()
if metaData.BuildID != buildID {
return errors.New("build ids do not match")
}

metadataBytes, _ := json.MarshalIndent(&metaData, "", "\t")
newData := bytes.NewReader(metadataBytes)
metaData.State = metadataStateUploaded
metaData.BuildID = buildID
metaData.Hash = hash
metaData.UploadFinishedAt = time.Now().Unix()

if err := m.bucket.Upload(ctx, metadataObjectPath(buildID), newData); err != nil {
return err
}
metadataBytes, _ := json.MarshalIndent(&metaData, "", "\t")
newData := bytes.NewReader(metadataBytes)

if err := m.bucket.Upload(ctx, metadataObjectPath(buildID), newData); err != nil {
return err
}

level.Debug(m.logger).Log("msg", "marked as uploaded", "buildid", buildID)
return nil
}

func (m *metadataManager) fetch(ctx context.Context, buildID string) (metadataState, error) {
func (m *metadataManager) fetch(ctx context.Context, buildID string) (*metadata, error) {
r, err := m.bucket.Get(ctx, metadataObjectPath(buildID))
if err != nil {
return metadataStateEmpty, nil
if m.bucket.IsObjNotFoundErr(err) {
return nil, ErrMetadataNotFound
}
return nil, err
}

buf := new(bytes.Buffer)
_, err = buf.ReadFrom(r)
if err != nil {
return metadataStateUnknown, err
return nil, err
}

metaData := &metadata{}
if err := json.Unmarshal(buf.Bytes(), metaData); err != nil {
return metadataStateUnknown, err
return nil, err
}
return metaData.State, nil
return metaData, nil
}

func (m *metadataManager) write(ctx context.Context, buildID string, md *metadata) error {
metadataBytes, _ := json.MarshalIndent(md, "", "\t")
r := bytes.NewReader(metadataBytes)
if err := m.bucket.Upload(ctx, metadataObjectPath(buildID), r); err != nil {
level.Error(m.logger).Log("msg", "failed to create metadata file", "err", err)
return err
}
return nil
}

func metadataObjectPath(buildID string) string {
Expand Down
43 changes: 21 additions & 22 deletions pkg/debuginfo/metadata_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,16 @@ func TestMetadata(t *testing.T) {
require.NoError(t, err)

// Test that the initial state should be empty.
state, err := store.metadataManager.fetch(context.Background(), "fake-build-id")
require.NoError(t, err)
require.Equal(t, metadataStateEmpty, state)
_, err = store.metadataManager.fetch(context.Background(), "fake-build-id")
require.ErrorIs(t, err, ErrMetadataNotFound)

// Updating the state should be written to blob storage.
err = store.metadataManager.update(context.Background(), "fake-build-id", metadataStateUploading)
err = store.metadataManager.markAsUploading(context.Background(), "fake-build-id")
require.NoError(t, err)

state, err = store.metadataManager.fetch(context.Background(), "fake-build-id")
md, err := store.metadataManager.fetch(context.Background(), "fake-build-id")
require.NoError(t, err)
require.Equal(t, metadataStateUploading, state)
require.Equal(t, metadataStateUploading, md.State)
}

func TestMetadata_MarshalJSON(t *testing.T) {
Expand All @@ -86,20 +85,20 @@ func TestMetadata_MarshalJSON(t *testing.T) {
wantErr bool
}{
{
m: metadata{State: metadataStateUnknown},
want: `{"state":"METADATA_STATE_UNKNOWN","started_upload_at":0,"finished_upload_at":0}`,
m: metadata{State: metadataStateUnknown, BuildID: "build_id", Hash: "hash"},
want: `{"state":"METADATA_STATE_UNKNOWN","build_id":"build_id","hash":"hash","upload_started_at":0,"upload_finished_at":0}`,
},
{
m: metadata{State: metadataStateEmpty},
want: `{"state":"METADATA_STATE_EMPTY","started_upload_at":0,"finished_upload_at":0}`,
m: metadata{State: metadataStateUploading, BuildID: "build_id", Hash: "hash"},
want: `{"state":"METADATA_STATE_UPLOADING","build_id":"build_id","hash":"hash","upload_started_at":0,"upload_finished_at":0}`,
},
{
m: metadata{State: metadataStateUploading},
want: `{"state":"METADATA_STATE_UPLOADING","started_upload_at":0,"finished_upload_at":0}`,
m: metadata{State: metadataStateUploaded, BuildID: "build_id", Hash: "hash"},
want: `{"state":"METADATA_STATE_UPLOADED","build_id":"build_id","hash":"hash","upload_started_at":0,"upload_finished_at":0}`,
},
{
m: metadata{State: metadataStateUploaded},
want: `{"state":"METADATA_STATE_UPLOADED","started_upload_at":0,"finished_upload_at":0}`,
m: metadata{State: metadataStateCorrupted, BuildID: "build_id", Hash: "hash"},
want: `{"state":"METADATA_STATE_CORRUPTED","build_id":"build_id","hash":"hash","upload_started_at":0,"upload_finished_at":0}`,
},
}
for _, tt := range tests {
Expand All @@ -125,20 +124,20 @@ func TestMetadata_UnmarshalJSON(t *testing.T) {
wantErr bool
}{
{
b: []byte(`{"state":"METADATA_STATE_UNKNOWN","started_upload_at":0,"finished_upload_at":0}`),
want: metadata{State: metadataStateUnknown},
b: []byte(`{"state":"METADATA_STATE_UNKNOWN","build_id":"build_id","hash":"hash","upload_started_at":0,"upload_finished_at":0}`),
want: metadata{State: metadataStateUnknown, BuildID: "build_id", Hash: "hash"},
},
{
b: []byte(`{"state":"METADATA_STATE_EMPTY","started_upload_at":0,"finished_upload_at":0}`),
want: metadata{State: metadataStateEmpty},
b: []byte(`{"state":"METADATA_STATE_UPLOADING","build_id":"build_id","hash":"hash","upload_started_at":0,"upload_finished_at":0}`),
want: metadata{State: metadataStateUploading, BuildID: "build_id", Hash: "hash"},
},
{
b: []byte(`{"state":"METADATA_STATE_UPLOADING","started_upload_at":0,"finished_upload_at":0}`),
want: metadata{State: metadataStateUploading},
b: []byte(`{"state":"METADATA_STATE_UPLOADED","build_id":"build_id","hash":"hash","upload_started_at":0,"upload_finished_at":0}`),
want: metadata{State: metadataStateUploaded, BuildID: "build_id", Hash: "hash"},
},
{
b: []byte(`{"state":"METADATA_STATE_UPLOADED","started_upload_at":0,"finished_upload_at":0}`),
want: metadata{State: metadataStateUploaded},
b: []byte(`{"state":"METADATA_STATE_CORRUPTED","build_id":"build_id","hash":"hash","upload_started_at":0,"upload_finished_at":0}`),
want: metadata{State: metadataStateCorrupted, BuildID: "build_id", Hash: "hash"},
},
}
for _, tt := range tests {
Expand Down
Loading