Skip to content

Commit

Permalink
Merge pull request #3 from sysnapse/master
Browse files Browse the repository at this point in the history
fixed youtube scrape unstable causing spam
  • Loading branch information
eric2788 authored Oct 17, 2022
2 parents 93128ef + c474396 commit 57198db
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 6 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/.idea/
**/config/**
**/youtube/result/**
**/youtube/result/**
**/youtube/debug/**
15 changes: 10 additions & 5 deletions crawlers/bilibili/spider.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@ import (
"context"
"encoding/json"
"fmt"
mapset "github.com/deckarep/golang-set"
"github.com/eric2788/PlatformsCrawler/crawling"
"net/http"
"net/url"
"strings"
"time"

mapset "github.com/deckarep/golang-set"
"github.com/eric2788/PlatformsCrawler/crawling"
)

const id = "platforms_crawler"
Expand Down Expand Up @@ -57,14 +58,14 @@ func handleMessage(b []byte) {

publisher(fmt.Sprintf("%d", int64(roomId)), b)
} else {
logger.Debugf("推送方式為 null已略過")
logger.Debugf("推送方式為 null, 已略過")
}

// 僅作為 logging
if data["command"] == "LIVE" {
logger.Infof("檢測到 %s(%d) 在 B站 開播了。", info["name"], int64(roomId))
} else {
logger.Debugf("Received %s command from room %d", data["command"], int64(roomId))
} else if data["command"] == "HEARTBEAT_REPLY" {
logger.Debugf("成功接收來自房間 %s 的 HEARTBEAT_REPLY", int64(roomId))
}
} else {
logger.Warnf("未知的房間 %+v", data["live_info"])
Expand Down Expand Up @@ -101,6 +102,10 @@ func doSubscribeRequest(room []string) (url.URL, error) {

resp, err := http.DefaultClient.Do(req)

if err != nil {
return httpUrl, err
}

defer resp.Body.Close()

if err != nil {
Expand Down
59 changes: 59 additions & 0 deletions crawlers/youtube/scrape.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"io"
"net/http"
"os"
"regexp"

"github.com/corpix/uarand"
Expand Down Expand Up @@ -71,3 +72,61 @@ func GetChannelStatus(channelId string) (*ChannelStatus, error) {
return status, nil
}
}

func getChannelStatusDebug(channelId string, times int) (*ChannelStatus, error) {

req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("https://youtube.com/channel/%s/live", channelId), nil)

if err != nil {
return nil, err
}

req.Header.Set("User-Agent", uarand.GetRandom())

res, err := http.DefaultClient.Do(req)

if err != nil {
return nil, err
} else if res.StatusCode == 404 {
return nil, fmt.Errorf("not found channel %s", channelId)
}

defer res.Body.Close()

content, err := io.ReadAll(res.Body)

if err != nil {
return nil, err
}

if err := os.MkdirAll("debug", 0775); err != nil {
return nil, err
}

if f, err := os.Create(fmt.Sprintf("debug/%s_%d.html", channelId, times)); err == nil {
f.Write(content)
f.Close()
}else{
logger.Error(err)
}

isUpcoming, hasConical := upcomingRegex.Match(content), idRegex.Match(content)

if !hasConical {
return &ChannelStatus{Type: None}, nil // no streaming or upcoming
} else {

find := regex.GetParams(idRegex, string(content))
videoId := find["id"]

status := &ChannelStatus{Id: videoId}

if isUpcoming {
status.Type = UpComing
} else {
status.Type = Live
}

return status, nil
}
}
31 changes: 31 additions & 0 deletions crawlers/youtube/scrape_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,37 @@ func TestGetChannelLiveResponse(t *testing.T) {
}
}

func TestGetOneChannelStatus(t *testing.T) {
logrus.SetLevel(logrus.DebugLevel)
// load youtube yaml
file.LoadYaml("youtube", youtubeYaml)
initKeywordRegexp()

name := "music"
id := channels[name]

for i := 0; i < 5; i++ {
status, err := getChannelStatusDebug(id, i)

if err != nil {
t.Fatalf("GetChannelStatus Error: %v", err)
} else {
if b, err := json.MarshalIndent(status, "", "\t"); err != nil {
t.Fatalf("Json Marshal Error: %v", err)
} else {
fmt.Printf("%s 的直播狀態 \n", name)
fmt.Println(string(b))

if status.Id != "" && youtubeYaml.Api.Key != "" {
if err = showVideoContent(status.Id); err != nil {
t.Fatal(err)
}
}
}
}
}
}

func TestGetChannelStatus(t *testing.T) {

logrus.SetLevel(logrus.DebugLevel)
Expand Down

0 comments on commit 57198db

Please sign in to comment.