Skip to content

Commit

Permalink
feat(new_split_chunks): support reuseExistingChunk (#3000)
Browse files Browse the repository at this point in the history
* feat(new_split_chunks): support `reuseExistingChunk`

* Fix

* Fix
  • Loading branch information
hyf0 authored May 4, 2023
1 parent c567571 commit e5d33eb
Show file tree
Hide file tree
Showing 17 changed files with 222 additions and 21 deletions.
5 changes: 5 additions & 0 deletions .changeset/strange-pans-raise.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@rspack/binding": patch
---

feat(new_split_chunks): support `reuseExistingChunk`
1 change: 1 addition & 0 deletions crates/node_binding/binding.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ export interface RawCacheGroupOptions {
chunks?: string
minChunks?: number
name?: string
reuseExistingChunk?: boolean
}
export interface RawStatsOptions {
colors: boolean
Expand Down
2 changes: 2 additions & 0 deletions crates/rspack_binding_options/src/options/raw_split_chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ pub struct RawCacheGroupOptions {
// pub max_initial_size: usize,
pub name: Option<String>,
// used_exports: bool,
pub reuse_existing_chunk: Option<bool>,
}

use rspack_plugin_split_chunks_new as new_split_chunks_plugin;
Expand Down Expand Up @@ -161,6 +162,7 @@ impl From<RawSplitChunksOptions> for new_split_chunks_plugin::PluginOptions {
&default_size_types,
overall_min_size,
),
reuse_existing_chunk: v.reuse_existing_chunk.unwrap_or(true),
}),
);

Expand Down
1 change: 1 addition & 0 deletions crates/rspack_plugin_split_chunks_new/src/cache_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub struct CacheGroup {
pub name: ChunkNameGetter,
pub priority: f64,
pub min_size: SplitChunkSizes,
pub reuse_existing_chunk: bool,
/// number of referenced chunks
pub min_chunks: u32,
}
1 change: 1 addition & 0 deletions crates/rspack_plugin_split_chunks_new/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#![feature(map_many_mut)]
#![feature(let_chains)]

pub(crate) mod cache_group;
pub(crate) mod common;
Expand Down
1 change: 1 addition & 0 deletions crates/rspack_plugin_split_chunks_new/src/module_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pub(crate) struct ModuleGroup {
pub modules: IdentifierSet,
pub cache_group_index: usize,
pub cache_group_priority: f64,
pub cache_group_reuse_existing_chunk: bool,
/// If the `ModuleGroup` is going to create a chunk, which will be named using `chunk_name`
/// A module
pub chunk_name: Option<String>,
Expand Down
146 changes: 126 additions & 20 deletions crates/rspack_plugin_split_chunks_new/src/plugin.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::fmt::Debug;
use std::{borrow::Cow, fmt::Debug};

use async_scoped::TokioScope;
use dashmap::DashMap;
Expand Down Expand Up @@ -34,25 +34,39 @@ impl SplitChunksPlugin {
self.ensure_min_size_fit(compilation, &mut module_group_map);

while !module_group_map.is_empty() {
let (_module_group_key, module_group) = self.find_best_module_group(&mut module_group_map);
let (_module_group_key, mut module_group) =
self.find_best_module_group(&mut module_group_map);

let new_chunk = self.get_corresponding_chunk(compilation, &module_group);
let mut is_reuse_existing_chunk = false;
let mut is_reuse_existing_chunk_with_all_modules = false;
let new_chunk = self.get_corresponding_chunk(
compilation,
&mut module_group,
&mut is_reuse_existing_chunk,
&mut is_reuse_existing_chunk_with_all_modules,
);

if is_reuse_existing_chunk {
// The chunk is not new but created in code splitting. We need remove `new_chunk` since we would remove
// modules in this `Chunk/ModuleGroup` from other chunks. Other chunks is stored in `ModuleGroup.chunks`.
module_group.chunks.remove(&new_chunk);
}

let original_chunks = &module_group.chunks;
let used_chunks = Cow::Borrowed(&module_group.chunks);

self.move_modules_to_new_chunk_and_remove_from_old_chunks(
&module_group,
new_chunk,
original_chunks,
&used_chunks,
compilation,
);

self.split_from_original_chunks(&module_group, original_chunks, new_chunk, compilation);
self.split_from_original_chunks(&module_group, &used_chunks, new_chunk, compilation);

self.remove_all_modules_from_other_module_groups(
&module_group,
&mut module_group_map,
original_chunks,
&used_chunks,
compilation,
)
}
Expand Down Expand Up @@ -123,34 +137,125 @@ impl SplitChunksPlugin {
});
}

/// Affected by `splitChunks.cacheGroups.{cacheGroup}.reuseExistingChunk`
///
/// If the current chunk contains modules already split out from the main bundle,
/// it will be reused instead of a new one being generated. This can affect the
/// resulting file name of the chunk.
///
/// the best means the reused chunks contains all modules in this ModuleGroup
fn find_the_best_reusable_chunk(
&self,
compilation: &mut Compilation,
module_group: &mut ModuleGroup,
) -> Option<ChunkUkey> {
let candidates = module_group.chunks.par_iter().filter_map(|chunk| {
let chunk = chunk.as_ref(&compilation.chunk_by_ukey);

if compilation
.chunk_graph
.get_number_of_chunk_modules(&chunk.ukey)
!= module_group.modules.len()
{
// Fast path for checking is the chunk reuseable for this `ModuleGroup`.
return None;
}

if module_group.chunks.len() > 1
&& compilation
.chunk_graph
.get_number_of_entry_modules(&chunk.ukey)
> 0
{
// `module_group.chunks.len() > 1`: this ModuleGroup are related multiple chunks generated in code splitting.
// `get_number_of_entry_modules(&chunk.ukey) > 0`: current chunk is an initial chunk.

// I(hyf0) don't see why breaking for this condition. But ChatGPT3.5 told me:

// The condition means that if there are multiple chunks in item and the current chunk is an
// entry chunk, then it cannot be reused. This is because entry chunks typically contain the core
// code of an application, while other chunks contain various parts of the application. If
// an entry chunk is used for other purposes, it may cause the application broken.
return None;
}

let is_all_module_in_chunk = module_group.modules.par_iter().all(|each_module| {
compilation
.chunk_graph
.is_module_in_chunk(each_module, chunk.ukey)
});
if !is_all_module_in_chunk {
return None;
}

Some(chunk)
});

/// Port https://github.com/webpack/webpack/blob/b471a6bfb71020f6d8f136ef10b7efb239ef5bbf/lib/optimize/SplitChunksPlugin.js#L1360-L1373
fn best_reuseable_chunk<'a>(first: &'a Chunk, second: &'a Chunk) -> &'a Chunk {
match (&first.name, &second.name) {
(None, None) => first,
(None, Some(_)) => second,
(Some(_), None) => first,
(Some(first_name), Some(second_name)) => match first_name.len().cmp(&second_name.len()) {
std::cmp::Ordering::Greater => second,
std::cmp::Ordering::Less => first,
std::cmp::Ordering::Equal => {
if matches!(second_name.cmp(first_name), std::cmp::Ordering::Less) {
second
} else {
first
}
}
},
}
}

let best_reuseable_chunk =
candidates.reduce_with(|best, each| best_reuseable_chunk(best, each));

best_reuseable_chunk.map(|c| c.ukey)
}

fn get_corresponding_chunk(
&self,
compilation: &mut Compilation,
module_group: &ModuleGroup,
module_group: &mut ModuleGroup,
is_reuse_existing_chunk: &mut bool,
is_reuse_existing_chunk_with_all_modules: &mut bool,
) -> ChunkUkey {
if let Some(chunk) = module_group
.chunk_name
.as_ref()
.and_then(|chunk_name| compilation.named_chunks.get(chunk_name))
{
*is_reuse_existing_chunk = true;
return *chunk;
}

let chunk = if let Some(chunk_name) = &module_group.chunk_name {
Compilation::add_named_chunk(
if let Some(reusable_chunk) = self.find_the_best_reusable_chunk(compilation, module_group) && module_group.cache_group_reuse_existing_chunk {
*is_reuse_existing_chunk = true;
*is_reuse_existing_chunk_with_all_modules = true;
reusable_chunk
} else if let Some(chunk_name) = &module_group.chunk_name {
let new_chunk = Compilation::add_named_chunk(
chunk_name.clone(),
&mut compilation.chunk_by_ukey,
&mut compilation.named_chunks,
)
} else {
Compilation::add_chunk(&mut compilation.chunk_by_ukey)
};

chunk
.chunk_reasons
.push("Create by split chunks".to_string());
compilation.chunk_graph.add_chunk(chunk.ukey);
chunk.ukey
);
new_chunk
.chunk_reasons
.push("Create by split chunks".to_string());
compilation.chunk_graph.add_chunk(new_chunk.ukey);
new_chunk.ukey
} else {
let new_chunk = Compilation::add_chunk(&mut compilation.chunk_by_ukey);
new_chunk
.chunk_reasons
.push("Create by split chunks".to_string());
compilation.chunk_graph.add_chunk(new_chunk.ukey);
new_chunk.ukey
}
}

fn remove_all_modules_from_other_module_groups(
Expand Down Expand Up @@ -342,6 +447,7 @@ impl SplitChunksPlugin {
modules: Default::default(),
cache_group_index,
cache_group_priority: cache_group.priority,
cache_group_reuse_existing_chunk: cache_group.reuse_existing_chunk,
sizes: Default::default(),
chunks: Default::default(),
chunk_name,
Expand Down
3 changes: 2 additions & 1 deletion packages/rspack/src/config/adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,8 @@ function getRawSplitChunksOptions(
name: group.name,
priority: group.priority,
minChunks: group.minChunks,
chunks: group.chunks
chunks: group.chunks,
reuseExistingChunk: group.reuseExistingChunk
};
return [key, normalizedGroup];
})
Expand Down
5 changes: 5 additions & 0 deletions packages/rspack/src/config/schema.js
Original file line number Diff line number Diff line change
Expand Up @@ -1010,6 +1010,11 @@ module.exports = {
$ref: "#/definitions/OptimizationSplitChunksSizes"
}
]
},
reuseExistingChunk: {
description:
"If the current chunk contains modules already split out from the main bundle, it will be reused instead of a new one being generated. This can affect the resulting file name of the chunk.",
type: "boolean"
}
}
},
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export default "foo-2.js";
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import "./foo-2";
export default "foo.js";
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
() => import("./foo");

import fs from "fs";
import path from "path";

export default "index.js";

it("disable-reuse-existing-chunk-simple", () => {
expect(fs.existsSync(path.resolve(__dirname, "./splittedFoo.js"))).toBe(true);
expect(fs.existsSync(path.resolve(__dirname, "./foo_js.js"))).toBe(false);
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/** @type {import("../../../../").Configuration} */
module.exports = {
target: "node",
output: {
filename: "[name].js"
},
entry: "./index.js",
experiments: {
newSplitChunks: true
},
optimization: {
splitChunks: {
minSize: 1,
cacheGroups: {
splittedFoo: {
name: "splittedFoo",
test: /(foo|foo-2)\.js/,
priority: 0,
reuseExistingChunk: false
}
}
}
}
};
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export default "foo-2.js";
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import "./foo-2";
export default "foo.js";
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
() => import("./foo");

import fs from "fs";
import path from "path";

export default "index.js";

it("reuse-existing-chunk-simple", () => {
expect(fs.existsSync(path.resolve(__dirname, "./splittedFoo.js"))).toBe(
false
);
expect(fs.existsSync(path.resolve(__dirname, "./foo_js.js"))).toBe(true);
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/** @type {import("../../../../").Configuration} */
module.exports = {
target: "node",
entry: "./index.js",
output: {
filename: "[name].js"
},
experiments: {
newSplitChunks: true
},
optimization: {
splitChunks: {
minSize: 1,
cacheGroups: {
splittedFoo: {
name: "splittedFoo",
test: /(foo|foo-2)\.js/,
priority: 0,
reuseExistingChunk: true
}
}
}
}
};

0 comments on commit e5d33eb

Please sign in to comment.