Skip to content

Commit

Permalink
[metadata] add option of configuring ua of async metadata (#74594)
Browse files Browse the repository at this point in the history
### What

Introduce an experimental config `experimental.htmlLimitedBots` (type
`Regex`) which can used for controlling the metadata streaming behavior
for different bots, if the bots user agent is matching the regex, it
will serve blocking metadata forcedly. This gives users some control to
opt-out streaming metadata for some clients or bots which cannot handle
it.


Output another manifest `response-config-manfiest.json` containing the
customized UA, which is potentially used for the platform serving
Next.js to control the returned response for certain UA. But it's mostly
for PPR case which we'll tackle later.

Closes NDX-635
Closes NDX-599
  • Loading branch information
huozhi authored Jan 10, 2025
1 parent 36a138a commit d483dfa
Show file tree
Hide file tree
Showing 18 changed files with 228 additions and 12 deletions.
23 changes: 23 additions & 0 deletions packages/next/src/build/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ import {
UNDERSCORE_NOT_FOUND_ROUTE_ENTRY,
UNDERSCORE_NOT_FOUND_ROUTE,
DYNAMIC_CSS_MANIFEST,
RESPONSE_CONFIG_MANIFEST,
} from '../shared/lib/constants'
import {
getSortedRoutes,
Expand Down Expand Up @@ -214,6 +215,7 @@ import {
getParsedNodeOptionsWithoutInspect,
} from '../server/lib/utils'
import { InvariantError } from '../shared/lib/invariant-error'
import { HTML_LIMITED_BOT_UA_RE_STRING } from '../shared/lib/router/utils/is-bot'

type Fallback = null | boolean | string

Expand Down Expand Up @@ -1307,6 +1309,24 @@ export default async function build(
NextBuildContext.clientRouterFilters = clientRouterFilters
}

if (config.experimental.streamingMetadata) {
// Write html limited bots config to response-config-manifest
const responseConfigManifestPath = path.join(
distDir,
RESPONSE_CONFIG_MANIFEST
)
const responseConfigManifest: {
version: number
htmlLimitedBots: string
} = {
version: 0,
htmlLimitedBots:
config.experimental.htmlLimitedBots ||
HTML_LIMITED_BOT_UA_RE_STRING,
}
await writeManifest(responseConfigManifestPath, responseConfigManifest)
}

// Ensure commonjs handling is used for files in the distDir (generally .next)
// Files outside of the distDir can be "type": "module"
await writeFileUtf8(
Expand Down Expand Up @@ -2487,6 +2507,9 @@ export default async function build(
PRERENDER_MANIFEST,
path.join(SERVER_DIRECTORY, MIDDLEWARE_MANIFEST),
path.join(SERVER_DIRECTORY, MIDDLEWARE_BUILD_MANIFEST + '.js'),
...(config.experimental.streamingMetadata
? [RESPONSE_CONFIG_MANIFEST]
: []),
...(!process.env.TURBOPACK
? [
path.join(
Expand Down
4 changes: 2 additions & 2 deletions packages/next/src/build/utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { NextConfig, NextConfigComplete } from '../server/config-shared'
import type { NextConfigComplete } from '../server/config-shared'
import type { ExperimentalPPRConfig } from '../server/lib/experimental/ppr'
import type { AppBuildManifest } from './webpack/plugins/app-build-manifest-plugin'
import type { AssetBinding } from './webpack/loaders/get-module-build-info'
Expand Down Expand Up @@ -1466,7 +1466,7 @@ export async function copyTracedFiles(
pageKeys: readonly string[],
appPageKeys: readonly string[] | undefined,
tracingRoot: string,
serverConfig: NextConfig,
serverConfig: NextConfigComplete,
middlewareManifest: MiddlewareManifest,
hasInstrumentationHook: boolean,
staticPages: Set<string>
Expand Down
1 change: 1 addition & 0 deletions packages/next/src/export/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ async function exportAppImpl(
inlineCss: nextConfig.experimental.inlineCss ?? false,
authInterrupts: !!nextConfig.experimental.authInterrupts,
streamingMetadata: !!nextConfig.experimental.streamingMetadata,
htmlLimitedBots: nextConfig.experimental.htmlLimitedBots,
},
reactMaxHeadersLength: nextConfig.reactMaxHeadersLength,
}
Expand Down
1 change: 1 addition & 0 deletions packages/next/src/server/app-render/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ export interface RenderOptsPartial {
inlineCss: boolean
authInterrupts: boolean
streamingMetadata: boolean
htmlLimitedBots: string | undefined
}
postponed?: string

Expand Down
14 changes: 8 additions & 6 deletions packages/next/src/server/base-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ import {
} from './lib/revalidate'
import { execOnce } from '../shared/lib/utils'
import { isBlockedPage } from './utils'
import { isBot, isHtmlLimitedBotUA } from '../shared/lib/router/utils/is-bot'
import { isBot } from '../shared/lib/router/utils/is-bot'
import RenderResult from './render-result'
import { removeTrailingSlash } from '../shared/lib/router/utils/remove-trailing-slash'
import { denormalizePagePath } from '../shared/lib/page-path/denormalize-page-path'
Expand Down Expand Up @@ -175,6 +175,7 @@ import type { RouteModule } from './route-modules/route-module'
import { FallbackMode, parseFallbackField } from '../lib/fallback'
import { toResponseCacheEntry } from './response-cache/utils'
import { scheduleOnNextTick } from '../lib/scheduler'
import { shouldServeStreamingMetadata } from './lib/streaming-metadata'

export type FindComponentsResult = {
components: LoadComponentsReturnType
Expand Down Expand Up @@ -595,6 +596,7 @@ export default abstract class Server<
inlineCss: this.nextConfig.experimental.inlineCss ?? false,
authInterrupts: !!this.nextConfig.experimental.authInterrupts,
streamingMetadata: !!this.nextConfig.experimental.streamingMetadata,
htmlLimitedBots: this.nextConfig.experimental.htmlLimitedBots,
},
onInstrumentationRequestError:
this.instrumentationOnRequestError.bind(this),
Expand Down Expand Up @@ -1675,11 +1677,13 @@ export default abstract class Server<
renderOpts: {
...this.renderOpts,
supportsDynamicResponse: !isBotRequest,
serveStreamingMetadata:
this.renderOpts.experimental.streamingMetadata &&
!isHtmlLimitedBotUA(ua),
serveStreamingMetadata: shouldServeStreamingMetadata(
ua,
this.renderOpts.experimental
),
},
}

const payload = await fn(ctx)
if (payload === null) {
return
Expand Down Expand Up @@ -2181,8 +2185,6 @@ export default abstract class Server<
// cache if there are no dynamic data requirements
opts.supportsDynamicResponse =
!isSSG && !isBotRequest && !query.amp && isSupportedDocument
opts.serveStreamingMetadata =
opts.experimental.streamingMetadata && !isHtmlLimitedBotUA(ua)
}

// In development, we always want to generate dynamic HTML.
Expand Down
1 change: 1 addition & 0 deletions packages/next/src/server/config-schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ export const configSchema: zod.ZodType<NextConfig> = z.lazy(() =>
authInterrupts: z.boolean().optional(),
newDevOverlay: z.boolean().optional(),
streamingMetadata: z.boolean().optional(),
htmlLimitedBots: z.instanceof(RegExp).optional(),
})
.optional(),
exportPathMap: z
Expand Down
12 changes: 12 additions & 0 deletions packages/next/src/server/config-shared.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ export type NextConfigComplete = Required<NextConfig> & {
configOrigin?: string
configFile?: string
configFileName: string
// override NextConfigComplete.experimental.htmlLimitedBots to string
// because it's not defined in NextConfigComplete.experimental
experimental: Omit<ExperimentalConfig, 'htmlLimitedBots'> & {
htmlLimitedBots: string | undefined
}
}

export type I18NDomains = readonly DomainLocale[]
Expand Down Expand Up @@ -577,6 +582,12 @@ export interface ExperimentalConfig {
* When enabled will cause async metadata calls to stream rather than block the render.
*/
streamingMetadata?: boolean

/**
* User Agent of bots that can handle streaming metadata.
* Besides the default behavior, Next.js act differently on serving metadata to bots based on their capability.
*/
htmlLimitedBots?: RegExp
}

export type ExportPathMap = {
Expand Down Expand Up @@ -1203,6 +1214,7 @@ export const defaultConfig: NextConfig = {
inlineCss: false,
newDevOverlay: false,
streamingMetadata: false,
htmlLimitedBots: undefined,
},
bundlePagesRouterDependencies: false,
}
Expand Down
12 changes: 12 additions & 0 deletions packages/next/src/server/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import { hasNextSupport } from '../server/ci-info'
import { transpileConfig } from '../build/next-config-ts/transpile-config'
import { dset } from '../shared/lib/dset'
import { normalizeZodErrors } from '../shared/lib/zod'
import { HTML_LIMITED_BOT_UA_RE_STRING } from '../shared/lib/router/utils/is-bot'

export { normalizeConfig } from './config-shared'
export type { DomainLocale, NextConfig } from './config-shared'
Expand Down Expand Up @@ -1004,6 +1005,11 @@ function assignDefaults(
]),
]

if (!result.experimental.htmlLimitedBots) {
// @ts-expect-error: override the htmlLimitedBots with default string, type covert: RegExp -> string
result.experimental.htmlLimitedBots = HTML_LIMITED_BOT_UA_RE_STRING
}

return result
}

Expand Down Expand Up @@ -1222,6 +1228,12 @@ export default async function loadConfig(
}
}

// serialize the regex config into string
if (userConfig.experimental?.htmlLimitedBots instanceof RegExp) {
userConfig.experimental.htmlLimitedBots =
userConfig.experimental.htmlLimitedBots.source
}

onLoadUserConfig?.(userConfig)
const completeConfig = assignDefaults(
dir,
Expand Down
22 changes: 22 additions & 0 deletions packages/next/src/server/lib/streaming-metadata.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import { HTML_LIMITED_BOT_UA_RE_STRING } from '../../shared/lib/router/utils/is-bot'

export function shouldServeStreamingMetadata(
userAgent: string,
{
streamingMetadata,
htmlLimitedBots,
}: {
streamingMetadata: boolean
htmlLimitedBots: string | undefined
}
): boolean {
if (!streamingMetadata) {
return false
}

const blockingMetadataUARegex = new RegExp(
htmlLimitedBots || HTML_LIMITED_BOT_UA_RE_STRING,
'i'
)
return !blockingMetadataUARegex.test(userAgent)
}
1 change: 1 addition & 0 deletions packages/next/src/shared/lib/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ export const EXPORT_DETAIL = 'export-detail.json'
export const PRERENDER_MANIFEST = 'prerender-manifest.json'
export const ROUTES_MANIFEST = 'routes-manifest.json'
export const IMAGES_MANIFEST = 'images-manifest.json'
export const RESPONSE_CONFIG_MANIFEST = 'response-config-manifest.json'
export const SERVER_FILES_MANIFEST = 'required-server-files.json'
export const DEV_CLIENT_PAGES_MANIFEST = '_devPagesManifest.json'
export const MIDDLEWARE_MANIFEST = 'middleware-manifest.json'
Expand Down
11 changes: 8 additions & 3 deletions packages/next/src/shared/lib/router/utils/is-bot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,19 @@ const HEADLESS_BROWSER_BOT_UA_RE =

// This regex contains the bots that we need to do a blocking render for and can't safely stream the response
// due to how they parse the DOM. For example, they might explicitly check for metadata in the `head` tag, so we can't stream metadata tags after the `head` was sent.
const HTML_LIMITED_BOT_UA_RE =
/Mediapartners-Google|Slurp|DuckDuckBot|baiduspider|yandex|sogou|bitlybot|tumblr|vkShare|quora link preview|redditbot|ia_archiver|Bingbot|BingPreview|applebot|facebookexternalhit|facebookcatalog|Twitterbot|LinkedInBot|Slackbot|Discordbot|WhatsApp|SkypeUriPreview/i
export const HTML_LIMITED_BOT_UA_RE_STRING =
'Mediapartners-Google|Slurp|DuckDuckBot|baiduspider|yandex|sogou|bitlybot|tumblr|vkShare|quora link preview|redditbot|ia_archiver|Bingbot|BingPreview|applebot|facebookexternalhit|facebookcatalog|Twitterbot|LinkedInBot|Slackbot|Discordbot|WhatsApp|SkypeUriPreview'

export const HTML_LIMITED_BOT_UA_RE = new RegExp(
HTML_LIMITED_BOT_UA_RE_STRING,
'i'
)

function isHeadlessBrowserBotUA(userAgent: string) {
return HEADLESS_BROWSER_BOT_UA_RE.test(userAgent)
}

export function isHtmlLimitedBotUA(userAgent: string) {
function isHtmlLimitedBotUA(userAgent: string) {
return HTML_LIMITED_BOT_UA_RE.test(userAgent)
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import { nextTestSetup } from 'e2e-utils'

describe('app-dir - metadata-streaming-customized-rule', () => {
const { next } = nextTestSetup({
files: __dirname,
overrideFiles: {
'next.config.js': `
module.exports = {
experimental: {
streamingMetadata: true,
htmlLimitedBots: /Minibot/i,
}
}
`,
},
})

it('should send the blocking response for html limited bots', async () => {
const $ = await next.render$(
'/',
undefined, // no query
{
headers: {
'user-agent': 'Minibot',
},
}
)
expect(await $('title').text()).toBe('index page')
})

it('should send streaming response for headless browser bots', async () => {
const $ = await next.render$(
'/',
undefined, // no query
{
headers: {
'user-agent': 'Weebot',
},
}
)
expect(await $('title').length).toBe(0)
})
})
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { nextTestSetup } from 'e2e-utils'
import { retry, createMultiDomMatcher } from 'next-test-utils'

describe('metadata-streaming', () => {
describe('app-dir - metadata-streaming', () => {
const { next } = nextTestSetup({
files: __dirname,
})
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { ReactNode } from 'react'
export default function Root({ children }: { children: ReactNode }) {
return (
<html>
<body>{children}</body>
</html>
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export default function Page() {
return <p>hello world</p>
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import { nextTestSetup } from 'e2e-utils'

describe('app-dir - metadata-streaming-config-customized', () => {
const { next } = nextTestSetup({
files: __dirname,
overrideFiles: {
'next.config.js': `
module.exports = {
experimental: {
streamingMetadata: true,
htmlLimitedBots: /MyBot/i,
}
}
`,
},
})

it('should have the default streaming metadata config output in routes-manifest.json', async () => {
const requiredServerFiles = JSON.parse(
await next.readFile('.next/required-server-files.json')
)
expect(requiredServerFiles.files).toContain(
'.next/response-config-manifest.json'
)
expect(
requiredServerFiles.config.experimental.htmlLimitedBots
).toMatchInlineSnapshot(`"MyBot"`)

const responseConfigManifest = JSON.parse(
await next.readFile('.next/response-config-manifest.json')
)

expect(responseConfigManifest).toMatchInlineSnapshot(`
{
"htmlLimitedBots": "MyBot",
"version": 0,
}
`)
})
})
Loading

0 comments on commit d483dfa

Please sign in to comment.