forked from stripe-archive/timberlake
-
Notifications
You must be signed in to change notification settings - Fork 0
/
persistedjobclient.go
119 lines (104 loc) · 2.76 KB
/
persistedjobclient.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"strings"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
)
// PersistedJobClient fetches retired jobs from persistent storage (e.g. S3) that
// stores job indefinitely (or a very long time)
type PersistedJobClient interface {
FetchJob(id string) (*job, error)
FetchFlowJobIds(flowID string) ([]string, error)
}
/**
* We expect the jobs to be stored in the bucket with the following structure:
*
* <s3bucket>/
* <jobsPrefix>/
* <jobid>.json
* <otherjobid>.json
* <flowPrefix>/
* <flowid>/
* <jobid>.json
* <otherflowid>/
* <otherjobid>.json
*/
type s3JobClient struct {
bucketName string
jobsPrefix string
flowPrefix string
s3Client *s3.S3
}
// NewS3JobClient creates a storage client
func NewS3JobClient(awsRegion string, bucketName string, jobsPrefix string, flowPrefix string) PersistedJobClient {
config := &aws.Config{
Region: aws.String(awsRegion),
}
return &s3JobClient{
bucketName: bucketName,
jobsPrefix: jobsPrefix,
flowPrefix: flowPrefix,
s3Client: s3.New(session.Must(session.NewSession(config))),
}
}
/**
* Expects a key like "<folder>/job_123.json"
*
* returns the job id
*/
func parseJobIDFromKey(key string) string {
idStartIdx := strings.LastIndex(key, "/") + 1
endIdx := len(key) - len(".json")
return key[idStartIdx:endIdx]
}
func (client *s3JobClient) FetchFlowJobIds(flowID string) ([]string, error) {
s3Key := fmt.Sprintf("%s/%s", client.flowPrefix, flowID)
input := &s3.ListObjectsInput{
Bucket: aws.String(client.bucketName),
Prefix: aws.String(s3Key),
}
// fetch objects from s3
result, err := client.s3Client.ListObjects(input)
if err != nil {
log.Printf("Failed to fetch from S3: `%s`\n", err.Error())
return nil, err
}
// get just the keys
relatedJobKeys := make([]string, len(result.Contents))
for i, obj := range result.Contents {
relatedJobKeys[i] = parseJobIDFromKey(*obj.Key)
}
return relatedJobKeys, nil
}
func (client *s3JobClient) FetchJob(id string) (*job, error) {
s3Key := fmt.Sprintf("%s/%s.json", client.jobsPrefix, id)
input := &s3.GetObjectInput{
Bucket: aws.String(client.bucketName),
Key: aws.String(s3Key),
}
// fetch from S3
result, err := client.s3Client.GetObject(input)
if err != nil {
log.Printf("Failed to fetch from S3: `%s`\n", err.Error())
return nil, err
}
// read response body
defer result.Body.Close()
jsonBytes, err := ioutil.ReadAll(result.Body)
if err != nil {
return nil, err
}
// deserialize JSON
data := &S3JobDetail{}
err = json.Unmarshal(jsonBytes, data)
if err != nil {
return nil, err
}
// handle the translating to be consistent with job history server
return s3responseToJob(data), nil
}