From 04394eecea622261f867a207022e7e0f07dc8d88 Mon Sep 17 00:00:00 2001 From: FireMasterK <20838718+FireMasterK@users.noreply.github.com> Date: Tue, 8 Feb 2022 10:44:55 +0000 Subject: [PATCH] Add support to extract total comment count. --- .../extractor/comments/CommentsExtractor.java | 7 +++++ .../extractor/comments/CommentsInfo.java | 28 +++++++++++++++++-- .../extractors/YoutubeCommentsExtractor.java | 25 +++++++++++++---- .../youtube/YoutubeCommentsExtractorTest.java | 7 +++++ 4 files changed, 59 insertions(+), 8 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java index ac4792fc07..5f34ffbfdc 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java @@ -22,6 +22,13 @@ public boolean isCommentsDisabled() throws ExtractionException { return false; } + /** + * @return total number of comments. + */ + public int getCommentsCount() throws ExtractionException { + return -1; + } + @Nonnull @Override public String getName() throws ParsingException { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java index 439ade4316..bd4f17f577 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java @@ -48,6 +48,11 @@ public static CommentsInfo getInfo(final CommentsExtractor commentsExtractor) ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor); commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled()); commentsInfo.setRelatedItems(initialCommentsPage.getItems()); + try { + commentsInfo.setCommentsCount(commentsExtractor.getCommentsCount()); + } catch (Exception e) { + commentsInfo.addError(e); + } commentsInfo.setNextPage(initialCommentsPage.getNextPage()); return commentsInfo; @@ -72,6 +77,7 @@ public static InfoItemsPage getMoreItems( private transient CommentsExtractor commentsExtractor; private boolean commentsDisabled = false; + private int commentsCount; public CommentsExtractor getCommentsExtractor() { return commentsExtractor; @@ -82,8 +88,8 @@ public void setCommentsExtractor(final CommentsExtractor commentsExtractor) { } /** - * @apiNote Warning: This method is experimental and may get removed in a future release. * @return true if the comments are disabled otherwise false (default) + * @apiNote Warning: This method is experimental and may get removed in a future release. * @see CommentsExtractor#isCommentsDisabled() */ public boolean isCommentsDisabled() { @@ -91,10 +97,28 @@ public boolean isCommentsDisabled() { } /** - * @apiNote Warning: This method is experimental and may get removed in a future release. * @param commentsDisabled true if the comments are disabled otherwise false + * @apiNote Warning: This method is experimental and may get removed in a future release. */ public void setCommentsDisabled(final boolean commentsDisabled) { this.commentsDisabled = commentsDisabled; } + + /** + * Returns the total number of comments. + * + * @return totalComments + */ + public int getCommentsCount() { + return commentsCount; + } + + /** + * Sets the total number of comments. + * + * @param commentsCount + */ + public void setCommentsCount(int commentsCount) { + this.commentsCount = commentsCount; + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index d5f65264ef..1e5c39a995 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -1,7 +1,6 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.*; import static org.schabi.newpipe.extractor.utils.Utils.UTF_8; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; @@ -29,6 +28,7 @@ import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonWriter; +import org.schabi.newpipe.extractor.utils.Utils; public class YoutubeCommentsExtractor extends CommentsExtractor { @@ -45,6 +45,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { */ @SuppressWarnings("OptionalUsedAsFieldOrParameterType") private Optional optCommentsDisabled = Optional.empty(); + private JsonObject ajaxJson; public YoutubeCommentsExtractor( final StreamingService service, @@ -175,16 +176,15 @@ public InfoItemsPage getPage(final Page page) .done()) .getBytes(UTF_8); - final JsonObject ajaxJson = getJsonPostResponse("next", body, localization); + this.ajaxJson = getJsonPostResponse("next", body, localization); final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector( getServiceId()); - collectCommentsFrom(collector, ajaxJson); + collectCommentsFrom(collector); return new InfoItemsPage<>(collector, getNextPage(ajaxJson)); } - private void collectCommentsFrom(final CommentsInfoItemsCollector collector, - @Nonnull final JsonObject ajaxJson) throws ParsingException { + private void collectCommentsFrom(final CommentsInfoItemsCollector collector) throws ParsingException { final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray( "onResponseReceivedEndpoints"); @@ -257,4 +257,17 @@ public boolean isCommentsDisabled() throws ExtractionException { return optCommentsDisabled.get(); } + + @Override + public int getCommentsCount() throws ExtractionException { + final JsonObject commentsHeaderRenderer = ajaxJson + .getArray("onResponseReceivedEndpoints").getObject(0) + .getObject("reloadContinuationItemsCommand") + .getArray("continuationItems").getObject(0) + .getObject("commentsHeaderRenderer"); + + final String text = getTextFromObject(commentsHeaderRenderer.getObject("countText")); + + return Integer.parseInt(Utils.removeNonDigitCharacters(text)); + } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index 6ea11a4d4a..a371a22a60 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -87,6 +87,7 @@ private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException @Test public void testGetCommentsAllData() throws IOException, ExtractionException { InfoItemsPage comments = extractor.getInitialPage(); + assertTrue(extractor.getCommentsCount() > 5); // at least 5 comments DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors()); for (CommentsInfoItem c : comments.getItems()) { @@ -336,5 +337,11 @@ public void testGetCommentsFirstReplies() throws IOException, ExtractionExceptio assertEquals("First", replies.getItems().get(0).getCommentText(), "First reply comment did not match"); } + + @Test + public void testCommentsCount() throws IOException, ExtractionException { + extractor.getInitialPage(); // Needs to be called first + assertTrue(extractor.getCommentsCount() > 18800); + } } }