From 944d3723cd0d1196e33ecb8f89e02b6093760196 Mon Sep 17 00:00:00 2001 From: AudricV <74829229+AudricV@users.noreply.github.com> Date: Sat, 23 Mar 2024 21:36:25 +0100 Subject: [PATCH 1/9] [YouTube] Do not get twice runs array in YoutubeParsingHelper The runs object was computed twice in getTextFromObject and getUrlFromObject methods, leading to unneeded search costs. This has been avoided by storing the array in method variables. --- .../services/youtube/YoutubeParsingHelper.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java index b6ea901d3e..4a63ab0829 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java @@ -934,12 +934,13 @@ public static String getTextFromObject(final JsonObject textObject, final boolea return textObject.getString("simpleText"); } - if (textObject.getArray("runs").isEmpty()) { + final JsonArray runs = textObject.getArray("runs"); + if (runs.isEmpty()) { return null; } final StringBuilder textBuilder = new StringBuilder(); - for (final Object o : textObject.getArray("runs")) { + for (final Object o : runs) { final JsonObject run = (JsonObject) o; String text = run.getString("text"); @@ -1017,11 +1018,12 @@ public static String getUrlFromObject(final JsonObject textObject) { return null; } - if (textObject.getArray("runs").isEmpty()) { + final JsonArray runs = textObject.getArray("runs"); + if (runs.isEmpty()) { return null; } - for (final Object textPart : textObject.getArray("runs")) { + for (final Object textPart : runs) { final String url = getUrlFromNavigationEndpoint(((JsonObject) textPart) .getObject("navigationEndpoint")); if (!isNullOrEmpty(url)) { From 5f0faf34d7219e8bb5baf8ced96e2a6bc1c150e1 Mon Sep 17 00:00:00 2001 From: AudricV <74829229+AudricV@users.noreply.github.com> Date: Sat, 23 Mar 2024 23:44:38 +0100 Subject: [PATCH 2/9] [YouTube] Support playlists as URL navigation endpoints --- .../services/youtube/YoutubeParsingHelper.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java index 4a63ab0829..ab1a237a81 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java @@ -872,9 +872,15 @@ public static String getUrlFromNavigationEndpoint( final String canonicalBaseUrl = browseEndpoint.getString("canonicalBaseUrl"); final String browseId = browseEndpoint.getString("browseId"); - // All channel ids are prefixed with UC - if (browseId != null && browseId.startsWith("UC")) { - return "https://www.youtube.com/channel/" + browseId; + if (browseId != null) { + if (browseId.startsWith("UC")) { + // All channel IDs are prefixed with UC + return "https://www.youtube.com/channel/" + browseId; + } else if (browseId.startsWith("VL")) { + // All playlist IDs are prefixed with VL, which needs to be removed from the + // playlist ID + return "https://www.youtube.com/playlist?list=" + browseId.substring(2); + } } if (!isNullOrEmpty(canonicalBaseUrl)) { From 65dfc26f137c4f833bff3cb483c5eaf52bed4542 Mon Sep 17 00:00:00 2001 From: AudricV <74829229+AudricV@users.noreply.github.com> Date: Sun, 24 Mar 2024 00:07:50 +0100 Subject: [PATCH 3/9] [YouTube] Move channel header's verified status code to YoutubeChannelHelper This code will be used by YoutubeChannelTabExtractor to return whether the channel is verified in the corresponding property of all InfoItems from this channel. Also throw an exception when we cannot get the verified status of a channel in YoutubeChannelExtractor due to a missing channelHeader, if the channel has no channelAgeGateRenderer. --- .../youtube/YoutubeChannelHelper.java | 31 +++++++++++++++++++ .../extractors/YoutubeChannelExtractor.java | 25 ++------------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java index 31cfa28565..dd338900c3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java @@ -320,4 +320,35 @@ public static Optional getChannelHeader( return Optional.empty(); } } + + /** + * Check if a channel is verified by using its header. + * + *

+ * The header is mandatory, so the verified status of age-restricted channels with a + * {@code channelAgeGateRenderer} cannot be checked. + *

+ * + * @param channelHeader the {@link ChannelHeader} of a non age-restricted channel + * @return whether the channel is verified + */ + public static boolean isChannelVerified(@Nonnull final ChannelHeader channelHeader) { + // carouselHeaderRenderer and pageHeaderRenderer does not contain any verification + // badges + // Since they are only shown on YouTube internal channels or on channels of large + // organizations broadcasting live events, we can assume the channel to be verified + if (channelHeader.headerType == ChannelHeader.HeaderType.CAROUSEL + || channelHeader.headerType == ChannelHeader.HeaderType.PAGE) { + return true; + } + + if (channelHeader.headerType == ChannelHeader.HeaderType.INTERACTIVE_TABBED) { + // If the header has an autoGenerated property, it should mean that the channel has + // been auto generated by YouTube: we can assume the channel to be verified in this + // case + return channelHeader.json.has("autoGenerated"); + } + + return YoutubeParsingHelper.isVerified(channelHeader.json.getArray("badges")); + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index fb24254103..0dd79e531d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -350,31 +350,12 @@ public List getParentChannelAvatars() { public boolean isVerified() throws ParsingException { assertPageFetched(); if (channelAgeGateRenderer != null) { + // Verified status is unknown with channelAgeGateRenderers, return false in this case return false; } - if (channelHeader.isPresent()) { - final ChannelHeader header = channelHeader.get(); - - // carouselHeaderRenderer and pageHeaderRenderer does not contain any verification - // badges - // Since they are only shown on YouTube internal channels or on channels of large - // organizations broadcasting live events, we can assume the channel to be verified - if (header.headerType == HeaderType.CAROUSEL || header.headerType == HeaderType.PAGE) { - return true; - } - - if (header.headerType == HeaderType.INTERACTIVE_TABBED) { - // If the header has an autoGenerated property, it should mean that the channel has - // been auto generated by YouTube: we can assume the channel to be verified in this - // case - return header.json.has("autoGenerated"); - } - - return YoutubeParsingHelper.isVerified(header.json.getArray("badges")); - } - - return false; + return YoutubeChannelHelper.isChannelVerified(channelHeader.orElseThrow(() -> + new ParsingException("Could not get verified status"))); } @Nonnull From 3760bd70a88b49b456840ae655598a23892f3eca Mon Sep 17 00:00:00 2001 From: AudricV <74829229+AudricV@users.noreply.github.com> Date: Sun, 24 Mar 2024 00:17:22 +0100 Subject: [PATCH 4/9] [YouTube] Add base implementation for show InfoItems As there are multiple show UI elements which share a lot of common data, a base implementation, an abstract class named YoutubeBaseShowInfoItemExtractor, has been created to handle common cases. --- .../YoutubeBaseShowInfoItemExtractor.java | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeBaseShowInfoItemExtractor.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeBaseShowInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeBaseShowInfoItemExtractor.java new file mode 100644 index 0000000000..67254302fc --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeBaseShowInfoItemExtractor.java @@ -0,0 +1,65 @@ +package org.schabi.newpipe.extractor.services.youtube.extractors; + +import com.grack.nanojson.JsonObject; +import org.schabi.newpipe.extractor.Image; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.playlist.PlaylistInfoItemExtractor; +import org.schabi.newpipe.extractor.utils.Utils; + +import javax.annotation.Nonnull; +import java.util.List; + +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailsFromInfoItem; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint; + +/** + * The base {@link PlaylistInfoItemExtractor} for shows playlists UI elements. + */ +abstract class YoutubeBaseShowInfoItemExtractor implements PlaylistInfoItemExtractor { + + @Nonnull + protected final JsonObject showRenderer; + + YoutubeBaseShowInfoItemExtractor(@Nonnull final JsonObject showRenderer) { + this.showRenderer = showRenderer; + } + + @Override + public String getName() throws ParsingException { + return showRenderer.getString("title"); + } + + @Override + public String getUrl() throws ParsingException { + return getUrlFromNavigationEndpoint(showRenderer.getObject("navigationEndpoint")); + } + + @Nonnull + @Override + public List getThumbnails() throws ParsingException { + return getThumbnailsFromInfoItem(showRenderer.getObject("thumbnailRenderer") + .getObject("showCustomThumbnailRenderer")); + } + + @Override + public long getStreamCount() throws ParsingException { + // The stream count should be always returned in the first text object for English + // localizations, but the complete text is parsed for reliability purposes + final String streamCountText = getTextFromObject( + showRenderer.getObject("thumbnailOverlays") + .getObject("thumbnailOverlayBottomPanelRenderer") + .getObject("text")); + if (streamCountText == null) { + throw new ParsingException("Could not get stream count"); + } + + try { + // The data returned could be a human/shortened number, but no show with more than 1000 + // videos has been found at the time this code was written + return Long.parseLong(Utils.removeNonDigitCharacters(streamCountText)); + } catch (final NumberFormatException e) { + throw new ParsingException("Could not convert stream count to a long", e); + } + } +} From adcb689cfa6f0e15ef7cb3d9b9019bab25095408 Mon Sep 17 00:00:00 2001 From: AudricV <74829229+AudricV@users.noreply.github.com> Date: Sun, 24 Mar 2024 00:19:30 +0100 Subject: [PATCH 5/9] [YouTube] Add support for showRenderers in search results --- .../extractors/YoutubeSearchExtractor.java | 11 +++- .../YoutubeShowRendererInfoItemExtractor.java | 57 +++++++++++++++++++ 2 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeShowRendererInfoItemExtractor.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index 90d5cab019..f19d8fca09 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -239,9 +239,14 @@ private void collectStreamsFrom(final MultiInfoItemsCollector collector, } else if (extractChannelResults && item.has("channelRenderer")) { collector.commit(new YoutubeChannelInfoItemExtractor( item.getObject("channelRenderer"))); - } else if (extractPlaylistResults && item.has("playlistRenderer")) { - collector.commit(new YoutubePlaylistInfoItemExtractor( - item.getObject("playlistRenderer"))); + } else if (extractPlaylistResults) { + if (item.has("playlistRenderer")) { + collector.commit(new YoutubePlaylistInfoItemExtractor( + item.getObject("playlistRenderer"))); + } else if (item.has("showRenderer")) { + collector.commit(new YoutubeShowRendererInfoItemExtractor( + item.getObject("showRenderer"))); + } } } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeShowRendererInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeShowRendererInfoItemExtractor.java new file mode 100644 index 0000000000..c7119907e0 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeShowRendererInfoItemExtractor.java @@ -0,0 +1,57 @@ +package org.schabi.newpipe.extractor.services.youtube.extractors; + +import com.grack.nanojson.JsonObject; +import org.schabi.newpipe.extractor.exceptions.ParsingException; + +import javax.annotation.Nonnull; + +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromObject; +import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; + +/** + * A {@link YoutubeBaseShowInfoItemExtractor} implementation for {@code showRenderer}s. + */ +class YoutubeShowRendererInfoItemExtractor extends YoutubeBaseShowInfoItemExtractor { + + @Nonnull + private final JsonObject shortBylineText; + @Nonnull + private final JsonObject longBylineText; + + YoutubeShowRendererInfoItemExtractor(@Nonnull final JsonObject showRenderer) { + super(showRenderer); + this.shortBylineText = showRenderer.getObject("shortBylineText"); + this.longBylineText = showRenderer.getObject("longBylineText"); + } + + @Override + public String getUploaderName() throws ParsingException { + String name = getTextFromObject(longBylineText); + if (isNullOrEmpty(name)) { + name = getTextFromObject(shortBylineText); + if (isNullOrEmpty(name)) { + throw new ParsingException("Could not get uploader name"); + } + } + return name; + } + + @Override + public String getUploaderUrl() throws ParsingException { + String uploaderUrl = getUrlFromObject(longBylineText); + if (uploaderUrl == null) { + uploaderUrl = getUrlFromObject(shortBylineText); + if (uploaderUrl == null) { + throw new ParsingException("Could not get uploader URL"); + } + } + return uploaderUrl; + } + + @Override + public boolean isUploaderVerified() throws ParsingException { + // We do not have this information in showRenderers + return false; + } +} From 675a7a71aa5375c4f0afe2d248dcd39db67e83c1 Mon Sep 17 00:00:00 2001 From: AudricV <74829229+AudricV@users.noreply.github.com> Date: Sat, 30 Mar 2024 15:55:59 +0100 Subject: [PATCH 6/9] [YouTube] Support shows in channels and provide verified status to items Also fix naming of info items' collection methods. --- .../YoutubeChannelTabExtractor.java | 240 +++++++++++++----- 1 file changed, 181 insertions(+), 59 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelTabExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelTabExtractor.java index 49049457ac..6df7538f53 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelTabExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelTabExtractor.java @@ -38,8 +38,8 @@ * A {@link ChannelTabExtractor} implementation for the YouTube service. * *

- * It currently supports {@code Videos}, {@code Shorts}, {@code Live}, {@code Playlists} and - * {@code Channels} tabs. + * It currently supports {@code Videos}, {@code Shorts}, {@code Live}, {@code Playlists}, + * {@code Albums} and {@code Channels} tabs. *

*/ public class YoutubeChannelTabExtractor extends ChannelTabExtractor { @@ -61,6 +61,8 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor { private String channelId; @Nullable private String visitorData; + @SuppressWarnings("OptionalUsedAsFieldOrParameterType") + private Optional channelHeader; public YoutubeChannelTabExtractor(final StreamingService service, final ListLinkHandler linkHandler) { @@ -90,14 +92,15 @@ private String getChannelTabsParameters() throws ParsingException { @Override public void onFetchPage(@Nonnull final Downloader downloader) throws IOException, ExtractionException { - channelId = resolveChannelId(super.getId()); + final String channelIdFromId = resolveChannelId(super.getId()); final String params = getChannelTabsParameters(); - final YoutubeChannelHelper.ChannelResponseData data = getChannelResponse(channelId, + final YoutubeChannelHelper.ChannelResponseData data = getChannelResponse(channelIdFromId, params, getExtractorLocalization(), getExtractorContentCountry()); jsonResponse = data.jsonResponse; + channelHeader = YoutubeChannelHelper.getChannelHeader(jsonResponse); channelId = data.channelId; if (useVisitorData) { visitorData = jsonResponse.getObject("responseContext").getString("visitorData"); @@ -205,18 +208,27 @@ public InfoItemsPage getInitialPage() throws IOException, ExtractionEx } } + final VerifiedStatus verifiedStatus = channelHeader.flatMap(header -> + YoutubeChannelHelper.isChannelVerified(header) + ? Optional.of(VerifiedStatus.VERIFIED) + : Optional.of(VerifiedStatus.UNVERIFIED)) + .orElse(VerifiedStatus.UNKNOWN); + // If a channel tab is fetched, the next page requires channel ID and name, as channel // streams don't have their channel specified. // We also need to set the visitor data here when it should be enabled, as it is required // to get continuations on some channel tabs, and we need a way to pass it between pages - final List channelIds = useVisitorData && !isNullOrEmpty(visitorData) - ? List.of(getChannelName(), getUrl(), visitorData) - : List.of(getChannelName(), getUrl()); + final String channelName = getChannelName(); + final String channelUrl = getUrl(); - final JsonObject continuation = collectItemsFrom(collector, items, channelIds) + final JsonObject continuation = collectItemsFrom(collector, items, verifiedStatus, + channelName, channelUrl) .orElse(null); - final Page nextPage = getNextPageFrom(continuation, channelIds); + final Page nextPage = getNextPageFrom(continuation, + useVisitorData && !isNullOrEmpty(visitorData) + ? List.of(channelName, channelUrl, verifiedStatus.toString(), visitorData) + : List.of(channelName, channelUrl, verifiedStatus.toString())); return new InfoItemsPage<>(collector, nextPage); } @@ -282,16 +294,48 @@ Optional getTabData() { private Optional collectItemsFrom(@Nonnull final MultiInfoItemsCollector collector, @Nonnull final JsonArray items, @Nonnull final List channelIds) { + final String channelName; + final String channelUrl; + VerifiedStatus verifiedStatus; + + if (channelIds.size() >= 3) { + channelName = channelIds.get(0); + channelUrl = channelIds.get(1); + try { + verifiedStatus = VerifiedStatus.valueOf(channelIds.get(2)); + } catch (final IllegalArgumentException e) { + // An IllegalArgumentException can be thrown if someone passes a third channel ID + // which is not of the enum type in the getPage method, use the UNKNOWN + // VerifiedStatus enum value in this case + verifiedStatus = VerifiedStatus.UNKNOWN; + } + } else { + channelName = null; + channelUrl = null; + verifiedStatus = VerifiedStatus.UNKNOWN; + } + + return collectItemsFrom(collector, items, verifiedStatus, channelName, channelUrl); + } + + private Optional collectItemsFrom(@Nonnull final MultiInfoItemsCollector collector, + @Nonnull final JsonArray items, + @Nonnull final VerifiedStatus verifiedStatus, + @Nullable final String channelName, + @Nullable final String channelUrl) { return items.stream() .filter(JsonObject.class::isInstance) .map(JsonObject.class::cast) - .map(item -> collectItem(collector, item, channelIds)) + .map(item -> collectItem( + collector, item, verifiedStatus, channelName, channelUrl)) .reduce(Optional.empty(), (c1, c2) -> c1.or(() -> c2)); } private Optional collectItem(@Nonnull final MultiInfoItemsCollector collector, @Nonnull final JsonObject item, - @Nonnull final List channelIds) { + @Nonnull final VerifiedStatus channelVerifiedStatus, + @Nullable final String channelName, + @Nullable final String channelUrl) { final TimeAgoParser timeAgoParser = getTimeAgoParser(); if (item.has("richItemRenderer")) { @@ -299,33 +343,37 @@ private Optional collectItem(@Nonnull final MultiInfoItemsCollector .getObject("content"); if (richItem.has("videoRenderer")) { - getCommitVideoConsumer(collector, timeAgoParser, channelIds, - richItem.getObject("videoRenderer")); + commitVideo(collector, timeAgoParser, richItem.getObject("videoRenderer"), + channelVerifiedStatus, channelName, channelUrl); } else if (richItem.has("reelItemRenderer")) { - getCommitReelItemConsumer(collector, channelIds, - richItem.getObject("reelItemRenderer")); + commitReel(collector, richItem.getObject("reelItemRenderer"), + channelVerifiedStatus, channelName, channelUrl); } else if (richItem.has("playlistRenderer")) { - getCommitPlaylistConsumer(collector, channelIds, - richItem.getObject("playlistRenderer")); + commitPlaylist(collector, richItem.getObject("playlistRenderer"), + channelVerifiedStatus, channelName, channelUrl); } } else if (item.has("gridVideoRenderer")) { - getCommitVideoConsumer(collector, timeAgoParser, channelIds, - item.getObject("gridVideoRenderer")); + commitVideo(collector, timeAgoParser, item.getObject("gridVideoRenderer"), + channelVerifiedStatus, channelName, channelUrl); } else if (item.has("gridPlaylistRenderer")) { - getCommitPlaylistConsumer(collector, channelIds, - item.getObject("gridPlaylistRenderer")); + commitPlaylist(collector, item.getObject("gridPlaylistRenderer"), + channelVerifiedStatus, channelName, channelUrl); + } else if (item.has("gridShowRenderer")) { + collector.commit(new YoutubeGridShowRendererChannelInfoItemExtractor( + item.getObject("gridShowRenderer"), channelVerifiedStatus, channelName, + channelUrl)); } else if (item.has("shelfRenderer")) { return collectItem(collector, item.getObject("shelfRenderer") - .getObject("content"), channelIds); + .getObject("content"), channelVerifiedStatus, channelName, channelUrl); } else if (item.has("itemSectionRenderer")) { return collectItemsFrom(collector, item.getObject("itemSectionRenderer") - .getArray("contents"), channelIds); + .getArray("contents"), channelVerifiedStatus, channelName, channelUrl); } else if (item.has("horizontalListRenderer")) { return collectItemsFrom(collector, item.getObject("horizontalListRenderer") - .getArray("items"), channelIds); + .getArray("items"), channelVerifiedStatus, channelName, channelUrl); } else if (item.has("expandedShelfContentsRenderer")) { return collectItemsFrom(collector, item.getObject("expandedShelfContentsRenderer") - .getArray("items"), channelIds); + .getArray("items"), channelVerifiedStatus, channelName, channelUrl); } else if (item.has("continuationItemRenderer")) { return Optional.ofNullable(item.getObject("continuationItemRenderer")); } @@ -333,72 +381,91 @@ private Optional collectItem(@Nonnull final MultiInfoItemsCollector return Optional.empty(); } - private void getCommitVideoConsumer(@Nonnull final MultiInfoItemsCollector collector, - @Nonnull final TimeAgoParser timeAgoParser, - @Nonnull final List channelIds, - @Nonnull final JsonObject jsonObject) { + private static void commitReel(@Nonnull final MultiInfoItemsCollector collector, + @Nonnull final JsonObject reelItemRenderer, + @Nonnull final VerifiedStatus channelVerifiedStatus, + @Nullable final String channelName, + @Nullable final String channelUrl) { collector.commit( - new YoutubeStreamInfoItemExtractor(jsonObject, timeAgoParser) { + new YoutubeReelInfoItemExtractor(reelItemRenderer) { @Override public String getUploaderName() throws ParsingException { - if (channelIds.size() >= 2) { - return channelIds.get(0); - } - return super.getUploaderName(); + return isNullOrEmpty(channelName) ? super.getUploaderName() : channelName; } @Override public String getUploaderUrl() throws ParsingException { - if (channelIds.size() >= 2) { - return channelIds.get(1); - } - return super.getUploaderUrl(); + return isNullOrEmpty(channelUrl) ? super.getUploaderName() : channelUrl; + } + + @Override + public boolean isUploaderVerified() { + return channelVerifiedStatus == VerifiedStatus.VERIFIED; } }); } - private void getCommitReelItemConsumer(@Nonnull final MultiInfoItemsCollector collector, - @Nonnull final List channelIds, - @Nonnull final JsonObject jsonObject) { + private void commitVideo(@Nonnull final MultiInfoItemsCollector collector, + @Nonnull final TimeAgoParser timeAgoParser, + @Nonnull final JsonObject jsonObject, + @Nonnull final VerifiedStatus channelVerifiedStatus, + @Nullable final String channelName, + @Nullable final String channelUrl) { collector.commit( - new YoutubeReelInfoItemExtractor(jsonObject) { + new YoutubeStreamInfoItemExtractor(jsonObject, timeAgoParser) { @Override public String getUploaderName() throws ParsingException { - if (channelIds.size() >= 2) { - return channelIds.get(0); - } - return super.getUploaderName(); + return isNullOrEmpty(channelName) ? super.getUploaderName() : channelName; } @Override public String getUploaderUrl() throws ParsingException { - if (channelIds.size() >= 2) { - return channelIds.get(1); + return isNullOrEmpty(channelUrl) ? super.getUploaderName() : channelUrl; + } + + @SuppressWarnings("DuplicatedCode") + @Override + public boolean isUploaderVerified() throws ParsingException { + switch (channelVerifiedStatus) { + case VERIFIED: + return true; + case UNVERIFIED: + return false; + default: + return super.isUploaderVerified(); } - return super.getUploaderUrl(); } }); } - private void getCommitPlaylistConsumer(@Nonnull final MultiInfoItemsCollector collector, - @Nonnull final List channelIds, - @Nonnull final JsonObject jsonObject) { + private void commitPlaylist(@Nonnull final MultiInfoItemsCollector collector, + @Nonnull final JsonObject jsonObject, + @Nonnull final VerifiedStatus channelVerifiedStatus, + @Nullable final String channelName, + @Nullable final String channelUrl) { collector.commit( new YoutubePlaylistInfoItemExtractor(jsonObject) { @Override public String getUploaderName() throws ParsingException { - if (channelIds.size() >= 2) { - return channelIds.get(0); - } - return super.getUploaderName(); + return isNullOrEmpty(channelName) ? super.getUploaderName() : channelName; } @Override public String getUploaderUrl() throws ParsingException { - if (channelIds.size() >= 2) { - return channelIds.get(1); + return isNullOrEmpty(channelUrl) ? super.getUploaderName() : channelUrl; + } + + @SuppressWarnings("DuplicatedCode") + @Override + public boolean isUploaderVerified() throws ParsingException { + switch (channelVerifiedStatus) { + case VERIFIED: + return true; + case UNVERIFIED: + return false; + default: + return super.isUploaderVerified(); } - return super.getUploaderUrl(); } }); } @@ -476,4 +543,59 @@ Optional getTabData() { return Optional.of(tabRenderer); } } + + /** + * Enum representing the verified state of a channel + */ + private enum VerifiedStatus { + VERIFIED, + UNVERIFIED, + UNKNOWN + } + + private static final class YoutubeGridShowRendererChannelInfoItemExtractor + extends YoutubeBaseShowInfoItemExtractor { + + @Nonnull + private final VerifiedStatus verifiedStatus; + + @Nullable + private final String channelName; + + @Nullable + private final String channelUrl; + + private YoutubeGridShowRendererChannelInfoItemExtractor( + @Nonnull final JsonObject gridShowRenderer, + @Nonnull final VerifiedStatus verifiedStatus, + @Nullable final String channelName, + @Nullable final String channelUrl) { + super(gridShowRenderer); + this.verifiedStatus = verifiedStatus; + this.channelName = channelName; + this.channelUrl = channelUrl; + } + + @Override + public String getUploaderName() { + return channelName; + } + + @Override + public String getUploaderUrl() { + return channelUrl; + } + + @Override + public boolean isUploaderVerified() throws ParsingException { + switch (verifiedStatus) { + case VERIFIED: + return true; + case UNVERIFIED: + return false; + default: + throw new ParsingException("Could not get uploader verification status"); + } + } + } } From 584866be5eb197d4ad421102bda6efd528d6a714 Mon Sep 17 00:00:00 2001 From: AudricV <74829229+AudricV@users.noreply.github.com> Date: Thu, 4 Apr 2024 19:36:31 +0200 Subject: [PATCH 7/9] [YouTube] Add common methods to get ID, name and age gate object of channels Also move duplicate strings into constants and support pageHeader channel header in user channels on YoutubeChannelHelper methods. --- .../youtube/YoutubeChannelHelper.java | 256 ++++++++++++++++-- 1 file changed, 229 insertions(+), 27 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java index dd338900c3..74335017ae 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java @@ -4,16 +4,19 @@ import com.grack.nanojson.JsonWriter; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.localization.ContentCountry; import org.schabi.newpipe.extractor.localization.Localization; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Optional; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.defaultAlertsCheck; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; @@ -21,6 +24,19 @@ * Shared functions for extracting YouTube channel pages and tabs. */ public final class YoutubeChannelHelper { + + private static final String BROWSE_ENDPOINT = "browseEndpoint"; + private static final String BROWSE_ID = "browseId"; + private static final String CAROUSEL_HEADER_RENDERER = "carouselHeaderRenderer"; + private static final String C4_TABBED_HEADER_RENDERER = "c4TabbedHeaderRenderer"; + private static final String CONTENT = "content"; + private static final String CONTENTS = "contents"; + private static final String HEADER = "header"; + private static final String PAGE_HEADER_VIEW_MODEL = "pageHeaderViewModel"; + private static final String TAB_RENDERER = "tabRenderer"; + private static final String TITLE = "title"; + private static final String TOPIC_CHANNEL_DETAILS_RENDERER = "topicChannelDetailsRenderer"; + private YoutubeChannelHelper() { } @@ -64,8 +80,8 @@ public static String resolveChannelId(@Nonnull final String idOrPath) .getObject("webCommandMetadata") .getString("webPageType", ""); - final JsonObject browseEndpoint = endpoint.getObject("browseEndpoint"); - final String browseId = browseEndpoint.getString("browseId", ""); + final JsonObject browseEndpoint = endpoint.getObject(BROWSE_ENDPOINT); + final String browseId = browseEndpoint.getString(BROWSE_ID, ""); if (webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_BROWSE") || webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_CHANNEL") @@ -140,7 +156,7 @@ public static ChannelResponseData getChannelResponse(@Nonnull final String chann while (level < 3) { final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder( localization, country) - .value("browseId", id) + .value(BROWSE_ID, id) .value("params", parameters) .done()) .getBytes(StandardCharsets.UTF_8); @@ -159,8 +175,8 @@ public static ChannelResponseData getChannelResponse(@Nonnull final String chann .getObject("webCommandMetadata") .getString("webPageType", ""); - final String browseId = endpoint.getObject("browseEndpoint") - .getString("browseId", ""); + final String browseId = endpoint.getObject(BROWSE_ENDPOINT) + .getString(BROWSE_ID, ""); if (webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_BROWSE") || webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_CHANNEL") @@ -257,7 +273,7 @@ public enum HeaderType { * A {@code pageHeaderRenderer} channel header type. * *

- * This header returns only the channel's name and its avatar. + * This header returns only the channel's name and its avatar for system channels. *

*/ PAGE @@ -294,20 +310,20 @@ private ChannelHeader(@Nonnull final JsonObject json, final HeaderType headerTyp @Nonnull public static Optional getChannelHeader( @Nonnull final JsonObject channelResponse) { - final JsonObject header = channelResponse.getObject("header"); + final JsonObject header = channelResponse.getObject(HEADER); - if (header.has("c4TabbedHeaderRenderer")) { - return Optional.of(header.getObject("c4TabbedHeaderRenderer")) + if (header.has(C4_TABBED_HEADER_RENDERER)) { + return Optional.of(header.getObject(C4_TABBED_HEADER_RENDERER)) .map(json -> new ChannelHeader(json, ChannelHeader.HeaderType.C4_TABBED)); - } else if (header.has("carouselHeaderRenderer")) { - return header.getObject("carouselHeaderRenderer") - .getArray("contents") + } else if (header.has(CAROUSEL_HEADER_RENDERER)) { + return header.getObject(CAROUSEL_HEADER_RENDERER) + .getArray(CONTENTS) .stream() .filter(JsonObject.class::isInstance) .map(JsonObject.class::cast) - .filter(item -> item.has("topicChannelDetailsRenderer")) + .filter(item -> item.has(TOPIC_CHANNEL_DETAILS_RENDERER)) .findFirst() - .map(item -> item.getObject("topicChannelDetailsRenderer")) + .map(item -> item.getObject(TOPIC_CHANNEL_DETAILS_RENDERER)) .map(json -> new ChannelHeader(json, ChannelHeader.HeaderType.CAROUSEL)); } else if (header.has("pageHeaderRenderer")) { return Optional.of(header.getObject("pageHeaderRenderer")) @@ -333,22 +349,208 @@ public static Optional getChannelHeader( * @return whether the channel is verified */ public static boolean isChannelVerified(@Nonnull final ChannelHeader channelHeader) { - // carouselHeaderRenderer and pageHeaderRenderer does not contain any verification - // badges - // Since they are only shown on YouTube internal channels or on channels of large - // organizations broadcasting live events, we can assume the channel to be verified - if (channelHeader.headerType == ChannelHeader.HeaderType.CAROUSEL - || channelHeader.headerType == ChannelHeader.HeaderType.PAGE) { - return true; + switch (channelHeader.headerType) { + // carouselHeaderRenderers do not contain any verification badges + // Since they are only shown on YouTube internal channels or on channels of large + // organizations broadcasting live events, we can assume the channel to be verified + case CAROUSEL: + return true; + case PAGE: + final JsonObject pageHeaderViewModel = channelHeader.json.getObject(CONTENT) + .getObject(PAGE_HEADER_VIEW_MODEL); + + final boolean hasCircleOrMusicIcon = pageHeaderViewModel.getObject(TITLE) + .getObject("dynamicTextViewModel") + .getObject("text") + .getArray("attachmentRuns") + .stream() + .filter(JsonObject.class::isInstance) + .map(JsonObject.class::cast) + .anyMatch(attachmentRun -> attachmentRun.getObject("element") + .getObject("type") + .getObject("imageType") + .getObject("image") + .getArray("sources") + .stream() + .filter(JsonObject.class::isInstance) + .map(JsonObject.class::cast) + .anyMatch(source -> { + final String imageName = source.getObject("clientResource") + .getString("imageName"); + return "CHECK_CIRCLE_FILLED".equals(imageName) + || "MUSIC_FILLED".equals(imageName); + })); + if (!hasCircleOrMusicIcon && pageHeaderViewModel.getObject("image") + .has("contentPreviewImageViewModel")) { + // If a pageHeaderRenderer has no object in which a check verified may be + // contained and if it has a contentPreviewImageViewModel, it should mean + // that the header is coming from a system channel, which we can assume to + // be verified + return true; + } + + return hasCircleOrMusicIcon; + case INTERACTIVE_TABBED: + // If the header has an autoGenerated property, it should mean that the channel has + // been auto generated by YouTube: we can assume the channel to be verified in this + // case + return channelHeader.json.has("autoGenerated"); + default: + return YoutubeParsingHelper.isVerified(channelHeader.json.getArray("badges")); } + } - if (channelHeader.headerType == ChannelHeader.HeaderType.INTERACTIVE_TABBED) { - // If the header has an autoGenerated property, it should mean that the channel has - // been auto generated by YouTube: we can assume the channel to be verified in this - // case - return channelHeader.json.has("autoGenerated"); + /** + * Get the ID of a channel from its response. + * + *

+ * For {@link ChannelHeader.HeaderType#C4_TABBED c4TabbedHeaderRenderer} and + * {@link ChannelHeader.HeaderType#CAROUSEL carouselHeaderRenderer} channel headers, the ID is + * get from the header. + *

+ * + *

+ * For other headers or if it cannot be got, the ID from the {@code channelMetadataRenderer} + * in the channel response is used. + *

+ * + *

+ * If the ID cannot still be get, the fallback channel ID, if provided, will be used. + *

+ * + * @param header the channel header + * @param fallbackChannelId the fallback channel ID, which can be null + * @return the ID of the channel + * @throws ParsingException if the channel ID cannot be got from the channel header, the + * channel response and the fallback channel ID + */ + @Nonnull + public static String getChannelId( + @SuppressWarnings("OptionalUsedAsFieldOrParameterType") + @Nonnull final Optional header, + @Nonnull final JsonObject jsonResponse, + @Nullable final String fallbackChannelId) throws ParsingException { + if (header.isPresent()) { + final ChannelHeader channelHeader = header.get(); + switch (channelHeader.headerType) { + case C4_TABBED: + final String channelId = channelHeader.json.getObject(HEADER) + .getObject(C4_TABBED_HEADER_RENDERER) + .getString("channelId", ""); + if (!isNullOrEmpty(channelId)) { + return channelId; + } + final String navigationC4TabChannelId = channelHeader.json + .getObject("navigationEndpoint") + .getObject(BROWSE_ENDPOINT) + .getString(BROWSE_ID); + if (!isNullOrEmpty(navigationC4TabChannelId)) { + return navigationC4TabChannelId; + } + break; + case CAROUSEL: + final String navigationCarouselChannelId = channelHeader.json.getObject(HEADER) + .getObject(CAROUSEL_HEADER_RENDERER) + .getArray(CONTENTS) + .stream() + .filter(JsonObject.class::isInstance) + .map(JsonObject.class::cast) + .filter(item -> item.has(TOPIC_CHANNEL_DETAILS_RENDERER)) + .findFirst() + .orElse(new JsonObject()) + .getObject(TOPIC_CHANNEL_DETAILS_RENDERER) + .getObject("navigationEndpoint") + .getObject(BROWSE_ENDPOINT) + .getString(BROWSE_ID); + if (!isNullOrEmpty(navigationCarouselChannelId)) { + return navigationCarouselChannelId; + } + break; + default: + break; + } + } + + final String externalChannelId = jsonResponse.getObject("metadata") + .getObject("channelMetadataRenderer") + .getString("externalChannelId"); + if (!isNullOrEmpty(externalChannelId)) { + return externalChannelId; + } + + if (!isNullOrEmpty(fallbackChannelId)) { + return fallbackChannelId; + } else { + throw new ParsingException("Could not get channel ID"); + } + } + + @Nonnull + public static String getChannelName(@SuppressWarnings("OptionalUsedAsFieldOrParameterType") + @Nonnull final Optional channelHeader, + @Nonnull final JsonObject jsonResponse, + @Nullable final JsonObject channelAgeGateRenderer) + throws ParsingException { + if (channelAgeGateRenderer != null) { + final String title = channelAgeGateRenderer.getString("channelTitle"); + if (isNullOrEmpty(title)) { + throw new ParsingException("Could not get channel name"); + } + return title; } - return YoutubeParsingHelper.isVerified(channelHeader.json.getArray("badges")); + final String metadataRendererTitle = jsonResponse.getObject("metadata") + .getObject("channelMetadataRenderer") + .getString(TITLE); + if (!isNullOrEmpty(metadataRendererTitle)) { + return metadataRendererTitle; + } + + return channelHeader.map(header -> { + final JsonObject channelJson = header.json; + switch (header.headerType) { + case PAGE: + return channelJson.getObject(CONTENT) + .getObject(PAGE_HEADER_VIEW_MODEL) + .getObject(TITLE) + .getObject("dynamicTextViewModel") + .getObject("text") + .getString(CONTENT, channelJson.getString("pageTitle")); + case CAROUSEL: + case INTERACTIVE_TABBED: + return getTextFromObject(channelJson.getObject(TITLE)); + case C4_TABBED: + default: + return channelJson.getString(TITLE); + } + }) + // The channel name from a microformatDataRenderer may be different from the one + // displayed, especially for auto-generated channels, depending on the language + // requested for the interface (hl parameter of InnerTube requests' payload) + .or(() -> Optional.ofNullable(jsonResponse.getObject("microformat") + .getObject("microformatDataRenderer") + .getString(TITLE))) + .orElseThrow(() -> new ParsingException("Could not get channel name")); + } + + @Nullable + public static JsonObject getChannelAgeGateRenderer(@Nonnull final JsonObject jsonResponse) { + return jsonResponse.getObject(CONTENTS) + .getObject("twoColumnBrowseResultsRenderer") + .getArray("tabs") + .stream() + .filter(JsonObject.class::isInstance) + .map(JsonObject.class::cast) + .flatMap(tab -> tab.getObject(TAB_RENDERER) + .getObject(CONTENT) + .getObject("sectionListRenderer") + .getArray(CONTENTS) + .stream() + .filter(JsonObject.class::isInstance) + .map(JsonObject.class::cast)) + .filter(content -> content.has("channelAgeGateRenderer")) + .map(content -> content.getObject("channelAgeGateRenderer")) + .findFirst() + .orElse(null); } } From 88df7472080842a6dc02573063372480f1f61841 Mon Sep 17 00:00:00 2001 From: AudricV <74829229+AudricV@users.noreply.github.com> Date: Thu, 4 Apr 2024 19:41:30 +0200 Subject: [PATCH 8/9] [YouTube] Support pageHeader on user channels Also move duplicate strings into constants and add a missing default switch case. --- .../extractors/YoutubeChannelExtractor.java | 216 +++++++++--------- .../YoutubeChannelTabExtractor.java | 58 +---- 2 files changed, 118 insertions(+), 156 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 0dd79e531d..9facac9b5b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -23,7 +23,6 @@ import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.getChannelResponse; import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.resolveChannelId; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; -import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; @@ -59,6 +58,18 @@ public class YoutubeChannelExtractor extends ChannelExtractor { + // Constants of objects used multiples from channel responses + private static final String IMAGE = "image"; + private static final String CONTENTS = "contents"; + private static final String CONTENT_PREVIEW_IMAGE_VIEW_MODEL = "contentPreviewImageViewModel"; + private static final String PAGE_HEADER_VIEW_MODEL = "pageHeaderViewModel"; + private static final String TAB_RENDERER = "tabRenderer"; + private static final String CONTENT = "content"; + private static final String METADATA = "metadata"; + private static final String AVATAR = "avatar"; + private static final String THUMBNAILS = "thumbnails"; + private static final String SOURCES = "sources"; + private JsonObject jsonResponse; @SuppressWarnings("OptionalUsedAsFieldOrParameterType") @@ -95,28 +106,7 @@ public void onFetchPage(@Nonnull final Downloader downloader) jsonResponse = data.jsonResponse; channelHeader = YoutubeChannelHelper.getChannelHeader(jsonResponse); channelId = data.channelId; - channelAgeGateRenderer = getChannelAgeGateRenderer(); - } - - @Nullable - private JsonObject getChannelAgeGateRenderer() { - return jsonResponse.getObject("contents") - .getObject("twoColumnBrowseResultsRenderer") - .getArray("tabs") - .stream() - .filter(JsonObject.class::isInstance) - .map(JsonObject.class::cast) - .flatMap(tab -> tab.getObject("tabRenderer") - .getObject("content") - .getObject("sectionListRenderer") - .getArray("contents") - .stream() - .filter(JsonObject.class::isInstance) - .map(JsonObject.class::cast)) - .filter(content -> content.has("channelAgeGateRenderer")) - .map(content -> content.getObject("channelAgeGateRenderer")) - .findFirst() - .orElse(null); + channelAgeGateRenderer = YoutubeChannelHelper.getChannelAgeGateRenderer(jsonResponse); } @Nonnull @@ -133,62 +123,15 @@ public String getUrl() throws ParsingException { @Override public String getId() throws ParsingException { assertPageFetched(); - return channelHeader.map(header -> header.json) - .flatMap(header -> Optional.ofNullable(header.getString("channelId")) - .or(() -> Optional.ofNullable(header.getObject("navigationEndpoint") - .getObject("browseEndpoint") - .getString("browseId")) - )) - .or(() -> Optional.ofNullable(channelId)) - .orElseThrow(() -> new ParsingException("Could not get channel ID")); + return YoutubeChannelHelper.getChannelId(channelHeader, jsonResponse, channelId); } @Nonnull @Override public String getName() throws ParsingException { assertPageFetched(); - if (channelAgeGateRenderer != null) { - final String title = channelAgeGateRenderer.getString("channelTitle"); - if (isNullOrEmpty(title)) { - throw new ParsingException("Could not get channel name"); - } - return title; - } - - final String metadataRendererTitle = jsonResponse.getObject("metadata") - .getObject("channelMetadataRenderer") - .getString("title"); - if (!isNullOrEmpty(metadataRendererTitle)) { - return metadataRendererTitle; - } - - return channelHeader.map(header -> { - final JsonObject channelJson = header.json; - switch (header.headerType) { - case PAGE: - return channelJson.getObject("content") - .getObject("pageHeaderViewModel") - .getObject("title") - .getObject("dynamicTextViewModel") - .getObject("text") - .getString("content", channelJson.getString("pageTitle")); - - case CAROUSEL: - case INTERACTIVE_TABBED: - return getTextFromObject(channelJson.getObject("title")); - - case C4_TABBED: - default: - return channelJson.getString("title"); - } - }) - // The channel name from a microformatDataRenderer may be different from the one displayed, - // especially for auto-generated channels, depending on the language requested for the - // interface (hl parameter of InnerTube requests' payload) - .or(() -> Optional.ofNullable(jsonResponse.getObject("microformat") - .getObject("microformatDataRenderer") - .getString("title"))) - .orElseThrow(() -> new ParsingException("Could not get channel name")); + return YoutubeChannelHelper.getChannelName( + channelHeader, jsonResponse, channelAgeGateRenderer); } @Nonnull @@ -196,8 +139,8 @@ public String getName() throws ParsingException { public List getAvatars() throws ParsingException { assertPageFetched(); if (channelAgeGateRenderer != null) { - return Optional.ofNullable(channelAgeGateRenderer.getObject("avatar") - .getArray("thumbnails")) + return Optional.ofNullable(channelAgeGateRenderer.getObject(AVATAR) + .getArray(THUMBNAILS)) .map(YoutubeParsingHelper::getImagesFromThumbnailsArray) .orElseThrow(() -> new ParsingException("Could not get avatars")); } @@ -205,22 +148,31 @@ public List getAvatars() throws ParsingException { return channelHeader.map(header -> { switch (header.headerType) { case PAGE: - return header.json.getObject("content") - .getObject("pageHeaderViewModel") - .getObject("image") - .getObject("contentPreviewImageViewModel") - .getObject("image") - .getArray("sources"); - + final JsonObject imageObj = header.json.getObject(CONTENT) + .getObject(PAGE_HEADER_VIEW_MODEL) + .getObject(IMAGE); + if (imageObj.has(CONTENT_PREVIEW_IMAGE_VIEW_MODEL)) { + return imageObj.getObject(CONTENT_PREVIEW_IMAGE_VIEW_MODEL) + .getObject(IMAGE) + .getArray(SOURCES); + } + if (imageObj.has("decoratedAvatarViewModel")) { + return imageObj.getObject(AVATAR) + .getObject("avatarViewModel") + .getObject(IMAGE) + .getArray(SOURCES); + } + // Return an empty avatar array as a fallback + return new JsonArray(); case INTERACTIVE_TABBED: return header.json.getObject("boxArt") - .getArray("thumbnails"); + .getArray(THUMBNAILS); case C4_TABBED: case CAROUSEL: default: - return header.json.getObject("avatar") - .getArray("thumbnails"); + return header.json.getObject(AVATAR) + .getArray(THUMBNAILS); } }) .map(YoutubeParsingHelper::getImagesFromThumbnailsArray) @@ -235,10 +187,24 @@ public List getBanners() { return List.of(); } - // No banner is available on pageHeaderRenderer headers - return channelHeader.filter(header -> header.headerType != HeaderType.PAGE) - .map(header -> header.json.getObject("banner") - .getArray("thumbnails")) + return channelHeader.map(header -> { + if (header.headerType == HeaderType.PAGE) { + final JsonObject pageHeaderViewModel = header.json.getObject(CONTENT) + .getObject(PAGE_HEADER_VIEW_MODEL); + if (pageHeaderViewModel.has("banner")) { + return pageHeaderViewModel.getObject("imageBannerViewModel") + .getObject(IMAGE) + .getArray(SOURCES); + } + // No banner is available (this should happen on pageHeaderRenderers of + // system channels), use an empty JsonArray instead + return new JsonArray(); + } + + return header.json + .getObject("banner") + .getArray(THUMBNAILS); + }) .map(YoutubeParsingHelper::getImagesFromThumbnailsArray) .orElse(List.of()); } @@ -264,14 +230,16 @@ public long getSubscriberCount() throws ParsingException { if (channelHeader.isPresent()) { final ChannelHeader header = channelHeader.get(); - if (header.headerType == HeaderType.INTERACTIVE_TABBED - || header.headerType == HeaderType.PAGE) { - // No subscriber count is available on interactiveTabbedHeaderRenderer and - // pageHeaderRenderer headers + if (header.headerType == HeaderType.INTERACTIVE_TABBED) { + // No subscriber count is available on interactiveTabbedHeaderRenderer header return UNKNOWN_SUBSCRIBER_COUNT; } final JsonObject headerJson = header.json; + if (header.headerType == HeaderType.PAGE) { + return getSubscriberCountFromPageChannelHeader(headerJson); + } + JsonObject textObject = null; if (headerJson.has("subscriberCountText")) { @@ -292,6 +260,51 @@ public long getSubscriberCount() throws ParsingException { return UNKNOWN_SUBSCRIBER_COUNT; } + private long getSubscriberCountFromPageChannelHeader(@Nonnull final JsonObject headerJson) + throws ParsingException { + final JsonObject metadataObject = headerJson.getObject(CONTENT) + .getObject(PAGE_HEADER_VIEW_MODEL) + .getObject(METADATA); + if (metadataObject.has("contentMetadataViewModel")) { + final JsonArray metadataPart = metadataObject.getObject("contentMetadataViewModel") + .getArray("metadataRows") + .stream() + .filter(JsonObject.class::isInstance) + .map(JsonObject.class::cast) + .map(metadataRow -> metadataRow.getArray("metadataParts")) + /* + Find metadata parts which have two elements: channel handle and subscriber + count. + + On autogenerated music channels, the subscriber count is not shown with this + header. + + Use the first metadata parts object found. + */ + .filter(metadataParts -> metadataParts.size() == 2) + .findFirst() + .orElse(null); + if (metadataPart == null) { + // As the parsing of the metadata parts object needed to get the subscriber count + // is fragile, return UNKNOWN_SUBSCRIBER_COUNT when it cannot be got + return UNKNOWN_SUBSCRIBER_COUNT; + } + + try { + // The subscriber count is at the same position for all languages as of 02/03/2024 + return Utils.mixedNumberWordToLong(metadataPart.getObject(0) + .getObject("text") + .getString(CONTENT)); + } catch (final NumberFormatException e) { + throw new ParsingException("Could not get subscriber count", e); + } + } + + // If the channel header has no contentMetadataViewModel (which is the case for system + // channels using this header), return UNKNOWN_SUBSCRIBER_COUNT + return UNKNOWN_SUBSCRIBER_COUNT; + } + @Override public String getDescription() throws ParsingException { assertPageFetched(); @@ -302,12 +315,6 @@ public String getDescription() throws ParsingException { try { if (channelHeader.isPresent()) { final ChannelHeader header = channelHeader.get(); - - if (header.headerType == HeaderType.PAGE) { - // A pageHeaderRenderer doesn't contain a description - return null; - } - if (header.headerType == HeaderType.INTERACTIVE_TABBED) { /* In an interactiveTabbedHeaderRenderer, the real description, is only available @@ -322,7 +329,7 @@ public String getDescription() throws ParsingException { } // The description is cut and the original one can be only accessed from the About tab - return jsonResponse.getObject("metadata") + return jsonResponse.getObject("title") .getObject("channelMetadataRenderer") .getString("description"); } catch (final Exception e) { @@ -371,7 +378,7 @@ public List getTabs() throws ParsingException { @Nonnull private List getTabsForNonAgeRestrictedChannels() throws ParsingException { - final JsonArray responseTabs = jsonResponse.getObject("contents") + final JsonArray responseTabs = jsonResponse.getObject(CONTENTS) .getObject("twoColumnBrowseResultsRenderer") .getArray("tabs"); @@ -392,8 +399,8 @@ private List getTabsForNonAgeRestrictedChannels() throws Parsin responseTabs.stream() .filter(JsonObject.class::isInstance) .map(JsonObject.class::cast) - .filter(tab -> tab.has("tabRenderer")) - .map(tab -> tab.getObject("tabRenderer")) + .filter(tab -> tab.has(TAB_RENDERER)) + .map(tab -> tab.getObject(TAB_RENDERER)) .forEach(tabRenderer -> { final String tabUrl = tabRenderer.getObject("endpoint") .getObject("commandMetadata") @@ -432,6 +439,9 @@ private List getTabsForNonAgeRestrictedChannels() throws Parsin case "playlists": addNonVideosTab.accept(ChannelTabs.PLAYLISTS); break; + default: + // Unsupported channel tab, ignore it + break; } } }); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelTabExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelTabExtractor.java index 6df7538f53..29813c6076 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelTabExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelTabExtractor.java @@ -30,7 +30,6 @@ import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.YOUTUBEI_V1_URL; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getKey; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; @@ -121,60 +120,13 @@ public String getUrl() throws ParsingException { @Nonnull @Override public String getId() throws ParsingException { - final String id = jsonResponse.getObject("header") - .getObject("c4TabbedHeaderRenderer") - .getString("channelId", ""); - - if (!id.isEmpty()) { - return id; - } - - final Optional carouselHeaderId = jsonResponse.getObject("header") - .getObject("carouselHeaderRenderer") - .getArray("contents") - .stream() - .filter(JsonObject.class::isInstance) - .map(JsonObject.class::cast) - .filter(item -> item.has("topicChannelDetailsRenderer")) - .findFirst() - .flatMap(item -> - Optional.ofNullable(item.getObject("topicChannelDetailsRenderer") - .getObject("navigationEndpoint") - .getObject("browseEndpoint") - .getString("browseId"))); - if (carouselHeaderId.isPresent()) { - return carouselHeaderId.get(); - } - - if (!isNullOrEmpty(channelId)) { - return channelId; - } else { - throw new ParsingException("Could not get channel ID"); - } + return YoutubeChannelHelper.getChannelId(channelHeader, jsonResponse, channelId); } - protected String getChannelName() { - final String metadataName = jsonResponse.getObject("metadata") - .getObject("channelMetadataRenderer") - .getString("title"); - if (!isNullOrEmpty(metadataName)) { - return metadataName; - } - - return YoutubeChannelHelper.getChannelHeader(jsonResponse) - .map(header -> { - final Object title = header.json.get("title"); - if (title instanceof String) { - return (String) title; - } else if (title instanceof JsonObject) { - final String headerName = getTextFromObject((JsonObject) title); - if (!isNullOrEmpty(headerName)) { - return headerName; - } - } - return ""; - }) - .orElse(""); + protected String getChannelName() throws ParsingException { + return YoutubeChannelHelper.getChannelName( + channelHeader, jsonResponse, + YoutubeChannelHelper.getChannelAgeGateRenderer(jsonResponse)); } @Nonnull From 72e7d61dc4836adc240d329ce3cc0cfe01f2a543 Mon Sep 17 00:00:00 2001 From: AudricV <74829229+AudricV@users.noreply.github.com> Date: Fri, 5 Apr 2024 19:22:38 +0200 Subject: [PATCH 9/9] [YouTube] Fix uploader name, URL and verified status for some channels' content Gaming topic channels do not upload any content, all the content is coming from other channels. Also, uploader name, URL and verified status come are now got as much as possible from items instead of the channel info. --- .../YoutubeChannelTabExtractor.java | 136 ++++++++++++------ 1 file changed, 92 insertions(+), 44 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelTabExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelTabExtractor.java index 29813c6076..c2898a2f0b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelTabExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelTabExtractor.java @@ -21,10 +21,17 @@ import javax.annotation.Nullable; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import java.util.List; import java.util.Optional; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.ChannelHeader; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.ChannelResponseData; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.getChannelAgeGateRenderer; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.getChannelHeader; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.getChannelId; import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.getChannelResponse; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.isChannelVerified; import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.resolveChannelId; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.DISABLE_PRETTY_PRINT_PARAMETER; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.YOUTUBEI_V1_URL; @@ -61,7 +68,7 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor { @Nullable private String visitorData; @SuppressWarnings("OptionalUsedAsFieldOrParameterType") - private Optional channelHeader; + private Optional channelHeader; public YoutubeChannelTabExtractor(final StreamingService service, final ListLinkHandler linkHandler) { @@ -95,11 +102,11 @@ public void onFetchPage(@Nonnull final Downloader downloader) throws IOException final String params = getChannelTabsParameters(); - final YoutubeChannelHelper.ChannelResponseData data = getChannelResponse(channelIdFromId, + final ChannelResponseData data = getChannelResponse(channelIdFromId, params, getExtractorLocalization(), getExtractorContentCountry()); jsonResponse = data.jsonResponse; - channelHeader = YoutubeChannelHelper.getChannelHeader(jsonResponse); + channelHeader = getChannelHeader(jsonResponse); channelId = data.channelId; if (useVisitorData) { visitorData = jsonResponse.getObject("responseContext").getString("visitorData"); @@ -120,13 +127,13 @@ public String getUrl() throws ParsingException { @Nonnull @Override public String getId() throws ParsingException { - return YoutubeChannelHelper.getChannelId(channelHeader, jsonResponse, channelId); + return getChannelId(channelHeader, jsonResponse, channelId); } protected String getChannelName() throws ParsingException { return YoutubeChannelHelper.getChannelName( channelHeader, jsonResponse, - YoutubeChannelHelper.getChannelAgeGateRenderer(jsonResponse)); + getChannelAgeGateRenderer(jsonResponse)); } @Nonnull @@ -161,7 +168,7 @@ public InfoItemsPage getInitialPage() throws IOException, ExtractionEx } final VerifiedStatus verifiedStatus = channelHeader.flatMap(header -> - YoutubeChannelHelper.isChannelVerified(header) + isChannelVerified(header) ? Optional.of(VerifiedStatus.VERIFIED) : Optional.of(VerifiedStatus.UNVERIFIED)) .orElse(VerifiedStatus.UNKNOWN); @@ -170,17 +177,40 @@ public InfoItemsPage getInitialPage() throws IOException, ExtractionEx // streams don't have their channel specified. // We also need to set the visitor data here when it should be enabled, as it is required // to get continuations on some channel tabs, and we need a way to pass it between pages - final String channelName = getChannelName(); - final String channelUrl = getUrl(); + final String channelName; + final String channelUrl; + final VerifiedStatus channelVerifiedStatus; + if (channelHeader.isPresent()) { + final ChannelHeader header = channelHeader.get(); + // Auto-generated channels which have an interactive tabbed channel header do not host + // any content and aggregate the one of multiple channels, so we do not set a channel + // name or a URL and the verification status in this case in order to not provide wrong + // information + if (header.headerType == ChannelHeader.HeaderType.INTERACTIVE_TABBED + && verifiedStatus == VerifiedStatus.VERIFIED) { + channelName = null; + channelUrl = null; + channelVerifiedStatus = VerifiedStatus.UNKNOWN; + } else { + channelName = getChannelName(); + channelUrl = getUrl(); + channelVerifiedStatus = verifiedStatus; + } + } else { + channelName = getChannelName(); + channelUrl = getUrl(); + channelVerifiedStatus = verifiedStatus; + } - final JsonObject continuation = collectItemsFrom(collector, items, verifiedStatus, + final JsonObject continuation = collectItemsFrom(collector, items, channelVerifiedStatus, channelName, channelUrl) .orElse(null); final Page nextPage = getNextPageFrom(continuation, useVisitorData && !isNullOrEmpty(visitorData) - ? List.of(channelName, channelUrl, verifiedStatus.toString(), visitorData) - : List.of(channelName, channelUrl, verifiedStatus.toString())); + ? Arrays.asList(channelName, channelUrl, channelVerifiedStatus.toString(), + visitorData) + : Arrays.asList(channelName, channelUrl, channelVerifiedStatus.toString())); return new InfoItemsPage<>(collector, nextPage); } @@ -342,82 +372,100 @@ private static void commitReel(@Nonnull final MultiInfoItemsCollector collector, new YoutubeReelInfoItemExtractor(reelItemRenderer) { @Override public String getUploaderName() throws ParsingException { - return isNullOrEmpty(channelName) ? super.getUploaderName() : channelName; + final String superUploaderName = super.getUploaderName(); + if (isNullOrEmpty(superUploaderName)) { + return channelName; + } + return superUploaderName; } @Override public String getUploaderUrl() throws ParsingException { - return isNullOrEmpty(channelUrl) ? super.getUploaderName() : channelUrl; + final String superUploaderUrl = super.getUploaderUrl(); + if (isNullOrEmpty(superUploaderUrl)) { + return channelUrl; + } + return superUploaderUrl; } @Override public boolean isUploaderVerified() { + // Verification status is not provided by short items, so always rely on + // channel verified status in this case return channelVerifiedStatus == VerifiedStatus.VERIFIED; } }); } - private void commitVideo(@Nonnull final MultiInfoItemsCollector collector, - @Nonnull final TimeAgoParser timeAgoParser, - @Nonnull final JsonObject jsonObject, - @Nonnull final VerifiedStatus channelVerifiedStatus, - @Nullable final String channelName, - @Nullable final String channelUrl) { + private static void commitVideo(@Nonnull final MultiInfoItemsCollector collector, + @Nonnull final TimeAgoParser timeAgoParser, + @Nonnull final JsonObject jsonObject, + @Nonnull final VerifiedStatus channelVerifiedStatus, + @Nullable final String channelName, + @Nullable final String channelUrl) { collector.commit( new YoutubeStreamInfoItemExtractor(jsonObject, timeAgoParser) { @Override public String getUploaderName() throws ParsingException { - return isNullOrEmpty(channelName) ? super.getUploaderName() : channelName; + final String superUploaderName = super.getUploaderName(); + if (isNullOrEmpty(superUploaderName)) { + return channelName; + } + return superUploaderName; } @Override public String getUploaderUrl() throws ParsingException { - return isNullOrEmpty(channelUrl) ? super.getUploaderName() : channelUrl; + final String superUploaderUrl = super.getUploaderUrl(); + if (isNullOrEmpty(superUploaderUrl)) { + return channelUrl; + } + return superUploaderUrl; } - @SuppressWarnings("DuplicatedCode") @Override public boolean isUploaderVerified() throws ParsingException { - switch (channelVerifiedStatus) { - case VERIFIED: - return true; - case UNVERIFIED: - return false; - default: - return super.isUploaderVerified(); + final boolean superIsUploaderVerified = super.isUploaderVerified(); + if (superIsUploaderVerified) { + return true; } + return channelVerifiedStatus == VerifiedStatus.VERIFIED; } }); } - private void commitPlaylist(@Nonnull final MultiInfoItemsCollector collector, - @Nonnull final JsonObject jsonObject, - @Nonnull final VerifiedStatus channelVerifiedStatus, - @Nullable final String channelName, - @Nullable final String channelUrl) { + private static void commitPlaylist(@Nonnull final MultiInfoItemsCollector collector, + @Nonnull final JsonObject jsonObject, + @Nonnull final VerifiedStatus channelVerifiedStatus, + @Nullable final String channelName, + @Nullable final String channelUrl) { collector.commit( new YoutubePlaylistInfoItemExtractor(jsonObject) { @Override public String getUploaderName() throws ParsingException { - return isNullOrEmpty(channelName) ? super.getUploaderName() : channelName; + final String superUploaderName = super.getUploaderName(); + if (isNullOrEmpty(superUploaderName)) { + return channelName; + } + return superUploaderName; } @Override public String getUploaderUrl() throws ParsingException { - return isNullOrEmpty(channelUrl) ? super.getUploaderName() : channelUrl; + final String superUploaderUrl = super.getUploaderUrl(); + if (isNullOrEmpty(superUploaderUrl)) { + return channelUrl; + } + return superUploaderUrl; } - @SuppressWarnings("DuplicatedCode") @Override public boolean isUploaderVerified() throws ParsingException { - switch (channelVerifiedStatus) { - case VERIFIED: - return true; - case UNVERIFIED: - return false; - default: - return super.isUploaderVerified(); + final boolean superIsUploaderVerified = super.isUploaderVerified(); + if (superIsUploaderVerified) { + return true; } + return channelVerifiedStatus == VerifiedStatus.VERIFIED; } }); }