Skip to content

Commit

Permalink
feat: add youtube cmd (video -> mapping_table_row)
Browse files Browse the repository at this point in the history
```bash
dbctl youtube --youtube-link https://www.youtube.com/watch?v=rtuJqQDWmIA >> ../server/internal/data/mapping_table.pbtxt
```

which will append one row to mapping_table.pbtxt

```prototext
rows:  {
  pr_id:  327
  paper_arxiv_ids:  "2010.14701"
  paper_arxiv_ids:  "2001.08361"
  paper_arxiv_ids:  "2106.09488"
  youtube_video_id:  "2irqTp-3hQ0"
}
```
  • Loading branch information
kkweon committed Jun 30, 2021
1 parent 0cf4cd3 commit ac230a7
Show file tree
Hide file tree
Showing 3 changed files with 217 additions and 0 deletions.
118 changes: 118 additions & 0 deletions dbctl/cmd/youtube.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
package cmd

import (
"fmt"
"time"

"github.com/codingpot/pr12er/dbctl/internal/transform"
"github.com/codingpot/pr12er/server/pkg/pr12er"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/spf13/viper"
"golang.org/x/time/rate"
"google.golang.org/protobuf/encoding/prototext"
)

const envNameYouTubeLink = "YOUTUBE_LINK"

// youtubeCmd represents the youtube command
var youtubeCmd = &cobra.Command{
Use: "youtube",
Short: "Generate a single GenMapping to STDOUT",
Long: `You can run this file and redirect to the mapping file.
For example,
go run main.go youtube --youtube-link https://www.youtube.com/watch?v=rtuJqQDWmIA >> ../server/internal/data/mapping_table.pbtxt
`,
RunE: func(cmd *cobra.Command, args []string) error {
apiKey := viper.GetString(envNameYouTubeAPIKey)
youTubeLink := viper.GetString(envNameYouTubeLink)
cx := viper.GetString(envNameCx)

log.WithFields(log.Fields{
envNameYouTubeAPIKey: apiKey,
envNameYouTubeLink: youTubeLink,
envNameCx: cx,
}).Info("binding variables")

rateLimiter := rate.NewLimiter(rate.Every(time.Second), 1)

youtubeID, err := transform.ExtractYouTubeID(youTubeLink)
if err != nil {
return err
}

client, err := fetcherClient(apiKey)
if err != nil {
return err
}

// Get video metadata.
youTubeVideos, err := client.FetchMultiYouTubeVideoByIDs([]string{youtubeID})
if err != nil {
return err
}

if len(youTubeVideos) != 1 {
return fmt.Errorf("expected one video but got %v", youTubeVideos)
}

vid := youTubeVideos[0]

// Get PR ID from video.
prID, err := transform.ExtractPRID(vid.GetVideoTitle())
if err != nil {
return err
}

var paperIDs []string

// Get paper IDs.
if cx == "" {
paperIDs, err = transform.ExtractPaperIDs(vid.GetVideoTitle())
} else {
paperIDs, err = transform.
ExtractPaperIDsViaProgrammableSearch(
vid.GetVideoTitle(),
cx,
apiKey,
rateLimiter)
}
if err != nil {
return err
}

data := &pr12er.MappingTable{
Rows: []*pr12er.MappingTableRow{
{
PrId: prID,
PaperArxivIds: paperIDs,
YoutubeVideoId: vid.GetVideoId(),
},
},
}

bs, err := prototext.MarshalOptions{
Multiline: true,
Indent: " ",
}.Marshal(data)
if err != nil {
return err
}

// Print to stdout
fmt.Print(string(bs))

return nil
},
}

func init() {
rootCmd.AddCommand(youtubeCmd)
youtubeCmd.Flags().String("youtube-api-key", "", "YouTube API Key (required)")
_ = viper.BindPFlag(envNameYouTubeAPIKey, youtubeCmd.Flag("youtube-api-key"))
youtubeCmd.Flags().String("youtube-link", "", "YouTube Link to fetch")
_ = viper.BindPFlag(envNameYouTubeLink, youtubeCmd.Flag("youtube-link"))
youtubeCmd.Flags().String("cx", "", "Search Engine ID from https://programmablesearchengine.google.com/ If this is not set, it will use googlesearch (free)")
_ = viper.BindPFlag(envNameCx, youtubeCmd.Flag("cx"))
}
43 changes: 43 additions & 0 deletions dbctl/internal/transform/transform.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package transform
import (
"context"
"fmt"
"net/url"
"regexp"
"strconv"
"strings"
Expand Down Expand Up @@ -171,3 +172,45 @@ func ExtractPRID(title string) (int32, error) {
}
return int32(atoi), nil
}

type ErrNotYouTubeLink struct {
url string
}

func (e ErrNotYouTubeLink) Error() string {
return fmt.Sprintf("no valid YouTubeID is found in %s", e.url)
}

// ExtractYouTubeID extracts videoID from YouTube link
//
// For example,
//
// https://www.youtube.com/watch?v=rtuJqQDWmIA => rtuJqQDWmIA
// https://youtube.com/watch?v=rtuJqQDWmIA => rtuJqQDWmIA
// https://youtu.be/rtuJqQDWmIA => rtuJqQDWmIA
func ExtractYouTubeID(link string) (string, error) {
parse, err := url.Parse(link)
if err != nil {
return "", err
}

errNotYouTubeLink := ErrNotYouTubeLink{link}

if strings.Contains(parse.Hostname(), "youtube") {
youtubeID := parse.Query().Get("v")
if youtubeID == "" {
return "", errNotYouTubeLink
}
return youtubeID, nil
}

if strings.Contains(parse.Hostname(), "youtu.be") {
youtubeID := strings.TrimPrefix(parse.Path, "/")
if youtubeID == "" {
return "", errNotYouTubeLink
}
return youtubeID, nil
}

return "", errNotYouTubeLink
}
56 changes: 56 additions & 0 deletions dbctl/internal/transform/transform_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,3 +203,59 @@ func TestExtractArxivIDFromURL(t *testing.T) {
})
}
}

func TestExtractYouTubeID(t *testing.T) {
type args struct {
link string
}
tests := []struct {
name string
args args
want string
wantErr bool
}{
{
name: "https://www.youtube.com/watch?v=rtuJqQDWmIA => rtuJqQDWmIA",
args: args{
link: "https://www.youtube.com/watch?v=rtuJqQDWmIA",
},
want: "rtuJqQDWmIA",
wantErr: false,
},
{
name: "https://youtube.com/watch?v=rtuJqQDWmIA => rtuJqQDWmIA",
args: args{
link: "https://youtube.com/watch?v=rtuJqQDWmIA",
},
want: "rtuJqQDWmIA",
wantErr: false,
},
{
name: "https://youtu.be/rtuJqQDWmIA => rtuJqQDWmIA",
args: args{
link: "https://youtu.be/rtuJqQDWmIA",
},
want: "rtuJqQDWmIA",
wantErr: false,
},
{
name: "invalid url returns an error",
args: args{
link: "www.gooogle.com",
},
want: "",
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := ExtractYouTubeID(tt.args.link)
if tt.wantErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.Equal(t, tt.want, got)
}
})
}
}

0 comments on commit ac230a7

Please sign in to comment.