Skip to content

Commit

Permalink
Merge branch 'hack/play-with-embeddings'
Browse files Browse the repository at this point in the history
* hack/play-with-embeddings: (29 commits)
  Commit and push to get lint
  Register multiple commands
  Space output a bit more
  Properly expose thor commands
  Update gitignore
  Add prompto2 docs
  Update go dependencies
  Update go to use github.com/pkg/errors everywhere
  Add test data for prompto.rb
  Add thirdparty vss libs
  Add idea project
  Add ruby stuff
  🎨 Elide search results
  ✨ Simplify answer question overloading
  ✨ Add real RAG
  ✨ Add indexing and querying help system
  ✨ Add index command
  ✨ Working example
  🎨 Hack on sqlite-vss example
  ✨ Get sqlite-vss go bindings to work
  ...
  • Loading branch information
wesen committed Jun 23, 2024
2 parents 4b94f08 + 7a6c0f2 commit a688263
Show file tree
Hide file tree
Showing 48 changed files with 2,254 additions and 105 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,7 @@ credentials.json
dist/

.history

.yardoc
sqlite-vss
test.db
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions .idea/cody_history.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions .idea/git_toolbox_prj.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions .idea/go-go-labs.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions .ruby-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.2.2
4 changes: 2 additions & 2 deletions cmd/apps/bandcamp/pkg/bandcamp.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"bytes"
"context"
"encoding/json"
"fmt"
"github.com/pkg/errors"
"io"
"net/http"
)
Expand Down Expand Up @@ -100,7 +100,7 @@ func (c *Client) Search(ctx context.Context, query string, filter SearchType) (*
}(resp.Body)

if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
return nil, errors.Errorf("unexpected status code: %d", resp.StatusCode)
}

var searchResp SearchResponse
Expand Down
5 changes: 3 additions & 2 deletions cmd/apps/capture/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"github.com/alecthomas/kong"
"github.com/go-go-golems/glazed/pkg/helpers"
"github.com/pkg/errors"
"github.com/spf13/cobra"
)

Expand Down Expand Up @@ -100,14 +101,14 @@ func NewEditorOptions() (*write.Options, error) {
// but using Kong to populate the struct based on the tags.
parser, err := kong.New(&opts, vars)
if err != nil {
return nil, fmt.Errorf("failed to create parser: %w", err)
return nil, errors.Wrap(err, "failed to create parser")
}

// Use Kong to apply the default values. The following call simulates parsing an empty
// command-line input, causing all values to fall back to their defaults as specified in struct tags.
_, err = parser.Parse([]string{})
if err != nil {
return nil, fmt.Errorf("failed to parse default options: %w", err)
return nil, errors.Wrap(err, "failed to parse default options")
}

// At this point, 'opts' is populated with the default values.
Expand Down
9 changes: 5 additions & 4 deletions cmd/apps/chunker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,34 @@ import (
"fmt"
"github.com/go-go-golems/glazed/pkg/cmds"
"github.com/go-go-golems/glazed/pkg/cmds/parameters"
"github.com/pkg/errors"
"github.com/tiktoken-go/tokenizer"
"log"
)

func SplitString(input string, separators []string, model tokenizer.Model) (string, string, error) {
codec, err := tokenizer.ForModel(model)
if err != nil {
return "", "", fmt.Errorf("Error getting codec: %v", err)
return "", "", errors.Wrap(err, "Error getting codec")
}

dfa := computeDFA(separators, codec)

tokenIds, _, err := codec.Encode(input)
if err != nil {
return "", "", fmt.Errorf("Error encoding text: %v", err)
return "", "", errors.Wrap(err, "Error encoding text")
}

headIds, tailIds := splitTokenIdsByDFA(dfa, tokenIds, 10, codec)

headString, err := codec.Decode(headIds)
if err != nil {
return "", "", fmt.Errorf("Error decoding headIds: %v", err)
return "", "", errors.Wrap(err, "Error decoding headIds")
}

tailString, err := codec.Decode(tailIds)
if err != nil {
return "", "", fmt.Errorf("Error decoding tailIds: %v", err)
return "", "", errors.Wrap(err, "Error decoding tailIds")
}

return headString, tailString, nil
Expand Down
130 changes: 130 additions & 0 deletions cmd/apps/cloudwatch-access-log/cmd.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
package main

import (
"bufio"
"context"
"encoding/json"
"github.com/go-go-golems/glazed/pkg/cmds"
"github.com/go-go-golems/glazed/pkg/cmds/layers"
"github.com/go-go-golems/glazed/pkg/cmds/parameters"
"github.com/go-go-golems/glazed/pkg/middlewares"
"github.com/go-go-golems/glazed/pkg/settings"
"github.com/go-go-golems/glazed/pkg/types"
"github.com/pkg/errors"
"net/url"
"os"
"strings"
"time"
)

type LogParserCommand struct {
*cmds.CommandDescription
}

type LogEntry struct {
Filename string `json:"filename"`
Host string `json:"host"`
Method string `json:"method"`
Process string `json:"process"`
Query map[string]string `json:"-"`
RawQuery string `json:"query"`
Referer string `json:"referer"`
RemoteIP string `json:"remoteIP"`
Request string `json:"request"`
Status string `json:"status"`
Time time.Time `json:"-"`
RawTime string `json:"time"`
UniqueID string `json:"uniqueId"`
UserAgent string `json:"userAgent"`
}

func NewLogParserCommand() (*LogParserCommand, error) {
glazedParameterLayer, err := settings.NewGlazedParameterLayers()
if err != nil {
return nil, errors.Wrap(err, "could not create Glazed parameter layer")
}

return &LogParserCommand{
CommandDescription: cmds.NewCommandDescription(
"log-parser",
cmds.WithShort("Parse log files"),
cmds.WithFlags(
parameters.NewParameterDefinition(
"log",
parameters.ParameterTypeFile,
parameters.WithHelp("Path to the log file"),
parameters.WithRequired(true),
),
),
cmds.WithLayersList(
glazedParameterLayer,
),
),
}, nil
}

type LogParserSettings struct {
LogFile *parameters.FileData `glazed.parameter:"log"`
}

func (c *LogParserCommand) RunIntoGlazeProcessor(
ctx context.Context,
parsedLayers *layers.ParsedLayers,
gp middlewares.Processor,
) error {
s := &LogParserSettings{}
if err := parsedLayers.InitializeStruct(layers.DefaultSlug, s); err != nil {
return err
}

file, err := os.Open(s.LogFile.Path)
if err != nil {
return err
}
defer func(file *os.File) {
_ = file.Close()
}(file)

scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
parts := strings.SplitN(line, " ", 3)
if len(parts) != 3 {
continue
}

var entry LogEntry
err := json.Unmarshal([]byte(parts[2]), &entry)
if err != nil {
continue
}

entry.Time, _ = time.Parse(time.RFC3339, entry.RawTime)
query, _ := url.ParseQuery(strings.TrimPrefix(entry.RawQuery, "?"))
entry.Query = make(map[string]string)
for k, v := range query {
entry.Query[k] = v[0]
}

row := types.NewRow(
types.MRP("filename", entry.Filename),
types.MRP("host", entry.Host),
types.MRP("method", entry.Method),
types.MRP("process", entry.Process),
types.MRP("query", entry.Query),
types.MRP("referer", entry.Referer),
types.MRP("remoteIP", entry.RemoteIP),
types.MRP("request", entry.Request),
types.MRP("status", entry.Status),
types.MRP("time", entry.Time),
types.MRP("uniqueID", entry.UniqueID),
types.MRP("userAgent", entry.UserAgent),
)

if err := gp.AddRow(ctx, row); err != nil {
return err
}
}

return nil
}
Loading

0 comments on commit a688263

Please sign in to comment.