Skip to content

Commit

Permalink
Fix index scripts (#2203)
Browse files Browse the repository at this point in the history
* Fix index cleanup script for mixed case keys

Some index keys, especially workload identifiers, have uppercase
characters. MySQL is case insensitive, but Redis is not. The index
storage backends account for this by converting lookup keys to lowercase
for Redis queries, but the cleanup script did not account for this and
was leaving some entries present in MySQL undeleted in Redis because
they appeared to be different strings. This change ensures that these
keys will be deleted from Redis when they are fully migrated to MySQL,
even if the case is different.

Signed-off-by: Colleen Murphy <colleenmurphy@google.com>

* Remove duplicate function in tests

Remove the copy-pasted make_entries function which is already provided
by the index-test-utils.sh source script.

Signed-off-by: Colleen Murphy <colleenmurphy@google.com>

* Fix docker-compose usage in tests

Signed-off-by: Colleen Murphy <colleenmurphy@google.com>

* Add copy-index script

Add a new script alongside the backfill script to copy indices directly
from Redis to MySQL. This is created as a separate script from the
backfill script because they are necessary for different purposes:
backfill is appropriate for when there is no data in any search index
backend and the only source of data is the Rekor API, where copy is
apprpriate when there is index data in the Redis database to pull
directly from. This enables the script to copy data that the backfill
script would not be able to recreate because the data needed to generate
the indexes is not always persisted to the Rekor API.

Signed-off-by: Colleen Murphy <colleenmurphy@google.com>

---------

Signed-off-by: Colleen Murphy <colleenmurphy@google.com>
  • Loading branch information
cmurphy authored Aug 8, 2024
1 parent 2ab3eb3 commit 12c346d
Show file tree
Hide file tree
Showing 6 changed files with 416 additions and 62 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ jobs:
run: ./tests/cleanup-index-test.sh
env:
INDEX_BACKEND: redis
- name: Copy index test
run: ./tests/copy-index-test.sh
- name: Upload logs if they exist
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4
if: failure()
Expand All @@ -163,7 +165,7 @@ jobs:
run: sudo add-apt-repository ppa:savoury1/minisign && sudo apt-get update && sudo apt-get install minisign
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: Docker Build
run: docker-compose build
run: docker compose build
- name: Extract version of Go to use
run: echo "GOVERSION=$(cat Dockerfile|grep golang | awk ' { print $2 } ' | cut -d '@' -f 1 | cut -d ':' -f 2 | uniq)" >> $GITHUB_ENV
- uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
Expand All @@ -186,7 +188,7 @@ jobs:
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: Docker Build
run: docker-compose build
run: docker compose build
- name: Extract version of Go to use
run: echo "GOVERSION=$(cat Dockerfile|grep golang | awk ' { print $2 } ' | cut -d '@' -f 1 | cut -d ':' -f 2 | uniq)" >> $GITHUB_ENV
- uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
Expand Down
4 changes: 4 additions & 0 deletions cmd/cleanup-index/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"log"
"os"
"os/signal"
"strings"
"syscall"

_ "github.com/go-sql-driver/mysql"
Expand Down Expand Up @@ -171,6 +172,9 @@ func removeFromRedis(ctx context.Context, redisClient *redis.Client, keys []stri
if *dryRun {
return nil
}
for i, k := range keys {
keys[i] = strings.ToLower(k)
}
result, err := redisClient.Del(ctx, keys...).Result()
if err != nil {
return err
Expand Down
224 changes: 224 additions & 0 deletions cmd/copy-index/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
// Copyright 2024 The Sigstore Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/*
copy-index is a script to copy indexes from one provider to another.
Currently, only copying from Redis to MySQL is supported. This is useful
when the data already exists in one backend and needs to be migrated to a
new provider.
To run:
go run cmd/copy-index/main.go --redis-hostname <redis-hostname> --redis-port <redis-port> \
--mysql-dsn <mysql-dsn> [--dry-run]
*/

package main

import (
"context"
"crypto/tls"
"flag"
"fmt"
"log"
"os"
"os/signal"
"strconv"
"syscall"

_ "github.com/go-sql-driver/mysql"
"github.com/jmoiron/sqlx"
"github.com/redis/go-redis/v9"
"sigs.k8s.io/release-utils/version"
)

const (
mysqlWriteStmt = "INSERT IGNORE INTO EntryIndex (EntryKey, EntryUUID) VALUES (:key, :uuid)"
mysqlCreateTableStmt = `CREATE TABLE IF NOT EXISTS EntryIndex (
PK BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
EntryKey varchar(512) NOT NULL,
EntryUUID char(80) NOT NULL,
PRIMARY KEY(PK),
UNIQUE(EntryKey, EntryUUID)
)`
)

type redisClient struct {
client *redis.Client
cursor int
}

type mysqlClient struct {
client *sqlx.DB
}

var (
redisHostname = flag.String("redis-hostname", "", "Hostname for Redis application")
redisPort = flag.String("redis-port", "", "Port to Redis application")
redisPassword = flag.String("redis-password", "", "Password for Redis authentication")
redisEnableTLS = flag.Bool("redis-enable-tls", false, "Enable TLS for Redis client")
redisInsecureSkipVerify = flag.Bool("redis-insecure-skip-verify", false, "Whether to skip TLS verification for Redis client or not")
mysqlDSN = flag.String("mysql-dsn", "", "MySQL Data Source Name")
batchSize = flag.Int("batch-size", 10000, "Number of Redis entries to scan per batch (use for testing)")
versionFlag = flag.Bool("version", false, "Print the current version of Copy Index")
dryRun = flag.Bool("dry-run", false, "Dry run - don't actually insert into MySQL")
)

func main() {
flag.Parse()

versionInfo := version.GetVersionInfo()
if *versionFlag {
fmt.Println(versionInfo.String())
os.Exit(0)
}

if *redisHostname == "" {
log.Fatal("Redis address must be set")
}
if *redisPort == "" {
log.Fatal("Redis port must be set")
}
if *mysqlDSN == "" {
log.Fatal("MySQL DSN must be set")
}

log.Printf("running copy index Version: %s GitCommit: %s BuildDate: %s", versionInfo.GitVersion, versionInfo.GitCommit, versionInfo.BuildDate)

mysqlClient, err := getMySQLClient()
if err != nil {
log.Fatalf("creating mysql client: %v", err)
}
redisClient, err := getRedisClient()
if err != nil {
log.Fatalf("creating redis client: %v", err)
}

err = doCopy(mysqlClient, redisClient)
if err != nil {
log.Fatalf("populating index: %v", err)
}
}

// getMySQLClient creates a MySQL client.
func getMySQLClient() (*mysqlClient, error) {
dbClient, err := sqlx.Open("mysql", *mysqlDSN)
if err != nil {
return nil, err
}
if err = dbClient.Ping(); err != nil {
return nil, err
}
if _, err = dbClient.Exec(mysqlCreateTableStmt); err != nil {
return nil, err
}
return &mysqlClient{client: dbClient}, nil
}

// getRedisClient creates a Redis client.
func getRedisClient() (*redisClient, error) {
opts := &redis.Options{
Addr: fmt.Sprintf("%s:%s", *redisHostname, *redisPort),
Password: *redisPassword,
Network: "tcp",
DB: 0, // default DB
}
// #nosec G402
if *redisEnableTLS {
opts.TLSConfig = &tls.Config{
InsecureSkipVerify: *redisInsecureSkipVerify, //nolint: gosec
}
}
return &redisClient{client: redis.NewClient(opts)}, nil
}

// doCopy pulls search index entries from the Redis database and copies them into the MySQL database.
func doCopy(mysqlClient *mysqlClient, redisClient *redisClient) error {
ctx, _ := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
var err error
var done bool
var keys []string
for !done {
keys, done, err = redisClient.getIndexKeys(ctx)
if err != nil {
return err
}
for _, k := range keys {
uuids, err := redisClient.getUUIDsForKey(ctx, k)
if err != nil {
return err
}
for _, v := range uuids {
err = mysqlClient.idempotentAddToIndex(ctx, k, v)
if err != nil {
return err
}
}
}
}
fmt.Println("Copy complete")
return nil
}

// getIndexKeys looks up every key in Redis that is not a checkpoint string.
// It limits the size of the scan to the value of --batch-size and uses the
// returned cursor to keep track of whether the scan is complete.
// It returns a boolean true when the call does not need to be repeated to get more keys.
func (c *redisClient) getIndexKeys(ctx context.Context) ([]string, bool, error) {
result, err := c.client.Do(ctx, "SCAN", c.cursor, "TYPE", "list", "COUNT", *batchSize).Result() // go-redis Scan method does not support TYPE
if err != nil {
return nil, false, err
}
resultList, ok := result.([]any)
if !ok {
return nil, false, fmt.Errorf("unexpected result from Redis SCAN command: %v", result)
}
if len(resultList) != 2 {
return nil, false, fmt.Errorf("unexpected result from Redis SCAN command: %v", resultList)
}
cursor, ok := resultList[0].(string)
if !ok {
return nil, false, fmt.Errorf("could not parse returned cursor from Redis SCAN command: %v", resultList[0])
}
c.cursor, err = strconv.Atoi(cursor)
if err != nil {
return nil, false, fmt.Errorf("could not parse returned cursor from Redis SCAN command: %v", cursor)
}
keys, ok := resultList[1].([]any)
if !ok {
return nil, false, fmt.Errorf("could not parse returned keys from Redis SCAN command: %v", resultList[1])
}
keyStrings := make([]string, len(keys))
for i, k := range keys {
keyStrings[i], ok = k.(string)
if !ok {
return nil, false, fmt.Errorf("could not parse returned keys from Redis SCAN command: %v", k)
}
}
fmt.Printf("Processing %d keys - cursor %d\n", len(keys), c.cursor)
return keyStrings, c.cursor == 0, nil
}

// getUUIDsForKey returns the list of UUIDs for a given index key.
func (c *redisClient) getUUIDsForKey(ctx context.Context, key string) ([]string, error) {
return c.client.LRange(ctx, key, 0, -1).Result()
}

// idempotentAddToIndex inserts the given key-value pair into the MySQL search index table.
func (c *mysqlClient) idempotentAddToIndex(ctx context.Context, key, value string) error {
if *dryRun {
return nil
}
_, err := c.client.NamedExecContext(ctx, mysqlWriteStmt, map[string]any{"key": key, "uuid": value})
return err
}
56 changes: 0 additions & 56 deletions tests/backfill-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,44 +33,6 @@ source $(dirname "$0")/index-test-utils.sh

trap cleanup EXIT

make_entries() {
set -e
# make 10 unique artifacts and sign each once
for i in $(seq 0 9) ; do
minisign -GW -p $testdir/mini${i}.pub -s $testdir/mini${i}.key
echo test${i} > $testdir/blob${i}
minisign -S -s $testdir/mini${i}.key -m $testdir/blob${i}
local rekor_out=$(rekor-cli --rekor_server $REKOR_ADDRESS upload \
--artifact $testdir/blob${i} \
--pki-format=minisign \
--public-key $testdir/mini${i}.pub \
--signature $testdir/blob${i}.minisig \
--format json)
local uuid=$(echo $rekor_out | jq -r .Location | cut -d '/' -f 6)
expected_keys["$testdir/mini${i}.pub"]=$uuid
expected_artifacts["$testdir/blob${i}"]=$uuid
done
# double-sign a few artifacts
for i in $(seq 7 9) ; do
set +e
let key_index=$i-5
set -e
minisign -S -s $testdir/mini${key_index}.key -m $testdir/blob${i}
rekor_out=$(rekor-cli --rekor_server $REKOR_ADDRESS upload \
--artifact $testdir/blob${i} \
--pki-format=minisign \
--public-key $testdir/mini${key_index}.pub \
--signature $testdir/blob${i}.minisig \
--format json)
uuid=$(echo $rekor_out | jq -r .Location | cut -d '/' -f 6)
local orig_key_uuid="${expected_keys[${testdir}/mini${key_index}.pub]}"
expected_keys[$testdir/mini${key_index}.pub]="$orig_key_uuid $uuid"
local orig_art_uuid="${expected_artifacts[${testdir}/blob${i}]}"
expected_artifacts[${testdir}/blob${i}]="$orig_art_uuid $uuid"
done
set +e
}

remove_keys() {
set -e
for i in $@ ; do
Expand All @@ -88,24 +50,6 @@ remove_keys() {
set +e
}

search_expect_fail() {
local artifact=$1
rekor-cli --rekor_server $REKOR_ADDRESS search --artifact $artifact 2>/dev/null
if [ $? -eq 0 ] ; then
echo "Unexpected index found."
exit 1
fi
}

search_expect_success() {
local artifact=$1
rekor-cli --rekor_server $REKOR_ADDRESS search --artifact $artifact 2>/dev/null
if [ $? -ne 0 ] ; then
echo "Unexpected missing index."
exit 1
fi
}

check_all_entries() {
set -e
for artifact in "${!expected_artifacts[@]}" ; do
Expand Down
Loading

0 comments on commit 12c346d

Please sign in to comment.