Skip to content

Commit

Permalink
[format] applied code formatting on changed files in pull request 4441 (
Browse files Browse the repository at this point in the history
#4445)

Co-authored-by: github-actions <github-actions@github.com>
  • Loading branch information
github-actions[bot] and github-actions authored Aug 16, 2023
1 parent 5d4efdf commit d20dceb
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 41 deletions.
80 changes: 40 additions & 40 deletions colossalai/shardformer/policies/vit.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,53 +40,53 @@ def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDescription]:
suffix="dropout",
target_module=DropoutForReplicatedInput,
)
])
])

policy[ViTLayer] = ModulePolicyDescription(attribute_replacement={
"attention.attention.num_attention_heads":
self.model.config.num_attention_heads // self.shard_config.tensor_parallel_size,
"attention.attention.all_head_size":
self.model.config.hidden_size // self.shard_config.tensor_parallel_size,
},
param_replacement=[],
sub_module_replacement=[
SubModuleReplacementDescription(
suffix="attention.attention.query",
target_module=col_nn.Linear1D_Col,
),
SubModuleReplacementDescription(
suffix="attention.attention.key",
target_module=col_nn.Linear1D_Col,
),
SubModuleReplacementDescription(
suffix="attention.attention.value",
target_module=col_nn.Linear1D_Col,
),
SubModuleReplacementDescription(
suffix="attention.attention.dropout",
target_module=col_nn.DropoutForParallelInput,
),
SubModuleReplacementDescription(
suffix="attention.output.dense",
target_module=col_nn.Linear1D_Row,
),
SubModuleReplacementDescription(
suffix="attention.output.dropout",
target_module=col_nn.DropoutForReplicatedInput,
),
SubModuleReplacementDescription(
suffix="intermediate.dense",
target_module=col_nn.Linear1D_Col,
),
SubModuleReplacementDescription(
suffix="output.dense",
target_module=col_nn.Linear1D_Row,
),
SubModuleReplacementDescription(
suffix="output.dropout",
target_module=col_nn.DropoutForReplicatedInput,
),
])
param_replacement=[],
sub_module_replacement=[
SubModuleReplacementDescription(
suffix="attention.attention.query",
target_module=col_nn.Linear1D_Col,
),
SubModuleReplacementDescription(
suffix="attention.attention.key",
target_module=col_nn.Linear1D_Col,
),
SubModuleReplacementDescription(
suffix="attention.attention.value",
target_module=col_nn.Linear1D_Col,
),
SubModuleReplacementDescription(
suffix="attention.attention.dropout",
target_module=col_nn.DropoutForParallelInput,
),
SubModuleReplacementDescription(
suffix="attention.output.dense",
target_module=col_nn.Linear1D_Row,
),
SubModuleReplacementDescription(
suffix="attention.output.dropout",
target_module=col_nn.DropoutForReplicatedInput,
),
SubModuleReplacementDescription(
suffix="intermediate.dense",
target_module=col_nn.Linear1D_Col,
),
SubModuleReplacementDescription(
suffix="output.dense",
target_module=col_nn.Linear1D_Row,
),
SubModuleReplacementDescription(
suffix="output.dropout",
target_module=col_nn.DropoutForReplicatedInput,
),
])

# use flash attention
if self.shard_config.enable_flash_attention:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_pipeline/test_stage_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def check_stage_manager():
1: [0, 1],
2: [2, 3],
3: [2, 3],
}
}
pg_mesh = ProcessGroupMesh(DP_SIZE, PP_SIZE)
stage_manager = PipelineStageManager(pg_mesh, PP_DIM)
rank = dist.get_rank()
Expand Down

0 comments on commit d20dceb

Please sign in to comment.