11import { getPossibleElementByQuerySelector } from './get-possible-element-by-query-selector.mjs'
2-
3- function getArea ( e ) {
4- const rect = e . getBoundingClientRect ( )
5- return rect . width * rect . height
6- }
2+ import { Readability , isProbablyReaderable } from '@mozilla/readability'
73
84const adapters = {
95 'scholar.google' : [ '#gs_res_ccl_mid' ] ,
@@ -17,6 +13,11 @@ const adapters = {
1713 'new.qq.com' : [ '.content-article' ] ,
1814}
1915
16+ function getArea ( e ) {
17+ const rect = e . getBoundingClientRect ( )
18+ return rect . width * rect . height
19+ }
20+
2021function findLargestElement ( e ) {
2122 if ( ! e ) {
2223 return null
@@ -42,22 +43,39 @@ function findLargestElement(e) {
4243 return largestElement
4344}
4445
45- export function getCoreContentText ( ) {
46- function getTextFrom ( e ) {
47- return e . innerText || e . textContent
48- }
46+ function getTextFrom ( e ) {
47+ return e . innerText || e . textContent
48+ }
4949
50+ function postProcessText ( text ) {
51+ return text
52+ . trim ( )
53+ . replaceAll ( ' ' , '' )
54+ . replaceAll ( '\t' , '' )
55+ . replaceAll ( '\n\n' , '' )
56+ . replaceAll ( ',,' , '' )
57+ }
58+
59+ export function getCoreContentText ( ) {
5060 for ( const [ siteName , selectors ] of Object . entries ( adapters ) ) {
5161 if ( location . hostname . includes ( siteName ) ) {
5262 const element = getPossibleElementByQuerySelector ( selectors )
53- if ( element ) return getTextFrom ( element )
63+ if ( element ) return postProcessText ( getTextFrom ( element ) )
5464 break
5565 }
5666 }
5767
5868 const element = document . querySelector ( 'article' )
5969 if ( element ) {
60- return getTextFrom ( element )
70+ return postProcessText ( getTextFrom ( element ) )
71+ }
72+
73+ if ( isProbablyReaderable ( document ) ) {
74+ let article = new Readability ( document . cloneNode ( true ) , {
75+ keepClasses : true ,
76+ } ) . parse ( )
77+ console . log ( 'readerable' )
78+ return postProcessText ( article . textContent )
6179 }
6280
6381 const largestElement = findLargestElement ( document . body )
@@ -79,5 +97,5 @@ export function getCoreContentText() {
7997 ret = getTextFrom ( largestElement )
8098 console . log ( 'use first' )
8199 }
82- return ret . trim ( ) . replaceAll ( ' ' , '' ) . replaceAll ( '\n\n' , '' ) . replaceAll ( ',,' , '' )
100+ return postProcessText ( ret )
83101}
0 commit comments