Skip to content

Commit

Permalink
Formatting fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
loadams committed Oct 28, 2024
1 parent ab6e29b commit e95086f
Show file tree
Hide file tree
Showing 9 changed files with 34 additions and 32 deletions.
5 changes: 2 additions & 3 deletions csrc/aio/common/deepspeed_aio_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,8 @@ int regular_read(const char* filename, std::vector<char>& buffer)
} while (r > 0);

if (read_bytes != num_bytes) {
std::cerr << "read error "
<< " read_bytes (read) = " << read_bytes << " num_bytes (fstat) = " << num_bytes
<< std::endl;
std::cerr << "read error " << " read_bytes (read) = " << read_bytes
<< " num_bytes (fstat) = " << num_bytes << std::endl;
}
assert(read_bytes == num_bytes);
close(fd);
Expand Down
10 changes: 4 additions & 6 deletions csrc/aio/py_lib/deepspeed_py_aio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,8 @@ int deepspeed_py_aio_write(const torch::Tensor& buffer,

const std::chrono::duration<double> fn_time =
std::chrono::high_resolution_clock::now() - start_time;
std::cout << "Elapsed time(usec): "
<< "aio = " << aio_time.count() * 1e6 << " call = " << fn_time.count() * 1e6
<< std::endl;
std::cout << "Elapsed time(usec): " << "aio = " << aio_time.count() * 1e6
<< " call = " << fn_time.count() * 1e6 << std::endl;
return 0;
}

Expand Down Expand Up @@ -118,8 +117,7 @@ int deepspeed_py_aio_read(torch::Tensor& buffer,

const std::chrono::duration<double> fn_time =
std::chrono::high_resolution_clock::now() - start_time;
std::cout << "Elapsed time(usec): "
<< "aio = " << aio_time.count() * 1e6 << " call = " << fn_time.count() * 1e6
<< std::endl;
std::cout << "Elapsed time(usec): " << "aio = " << aio_time.count() * 1e6
<< " call = " << fn_time.count() * 1e6 << std::endl;
return 0;
}
10 changes: 4 additions & 6 deletions csrc/aio/py_lib/deepspeed_py_aio_handle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,8 @@ int deepspeed_aio_handle_t::read(torch::Tensor& buffer, const char* filename, co
if (validate) { validate_aio_operation(true, filename, read_buffer, num_file_bytes); }
const std::chrono::duration<double> fn_time =
std::chrono::high_resolution_clock::now() - start_time;
std::cout << "Elapsed time(usec): "
<< "aio = " << aio_time.count() * 1e6 << " call = " << fn_time.count() * 1e6
<< std::endl;
std::cout << "Elapsed time(usec): " << "aio = " << aio_time.count() * 1e6
<< " call = " << fn_time.count() * 1e6 << std::endl;
return 0;
}

Expand Down Expand Up @@ -128,9 +127,8 @@ int deepspeed_aio_handle_t::write(const torch::Tensor& buffer,

const std::chrono::duration<double> fn_time =
std::chrono::high_resolution_clock::now() - start_time;
std::cout << "Elapsed time(usec): "
<< "aio = " << aio_time.count() * 1e6 << " call = " << fn_time.count() * 1e6
<< std::endl;
std::cout << "Elapsed time(usec): " << "aio = " << aio_time.count() * 1e6
<< " call = " << fn_time.count() * 1e6 << std::endl;
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion csrc/aio/py_lib/deepspeed_py_copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
#include "deepspeed_py_copy.h"
#include <omp.h>

#define ROUND_DOWN(size, step) ((size) & ~((step)-1))
#define ROUND_DOWN(size, step) ((size) & ~((step) - 1))

#if defined(__AVX512__) or defined(__AVX256__)
union AVX_Data {
Expand Down
9 changes: 4 additions & 5 deletions csrc/deepspeed4science/evoformer_attn/gemm_kernel_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,10 @@ struct CheckArch {
std::cerr << #PTR " is not correctly aligned\n"; \
return false; \
}
#define EVOFORMER_CHECK(COND, ERR) \
if (!(COND)) { \
std::cerr << "[Evoformer Attention]" \
<< "'" #COND "' failed: " << ERR << "\n"; \
return false; \
#define EVOFORMER_CHECK(COND, ERR) \
if (!(COND)) { \
std::cerr << "[Evoformer Attention]" << "'" #COND "' failed: " << ERR << "\n"; \
return false; \
}
#endif

Expand Down
2 changes: 1 addition & 1 deletion csrc/includes/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ inline void writeAs(void* dst, const T& val)
std::memcpy(dst, &val, sizeof(T));
}

#define ROUND_DOWN(size, step) ((size) & ~((step)-1))
#define ROUND_DOWN(size, step) ((size) & ~((step) - 1))

#if defined(__AVX512__)
#define SIMD_STORE(a, d) _mm512_storeu_ps(a, d)
Expand Down
2 changes: 1 addition & 1 deletion csrc/xpu/includes/simd.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#define TILE (128 * 1024 * 1024)
#if defined(__AVX512__) or defined(__AVX256__)

#define ROUND_DOWN(size, step) ((size) & ~((step)-1))
#define ROUND_DOWN(size, step) ((size) & ~((step) - 1))

#if defined(__AVX512__)
#define SIMD_STORE(a, d) _mm512_storeu_ps(a, d)
Expand Down
10 changes: 5 additions & 5 deletions csrc/xpu/includes/type_shim.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,11 @@
}

template <typename T>
__inline__ __attribute__((always_inline)) T reduce_block_into_lanes(
T* x,
T val,
int lanes = 1,
bool share_result = false) // lanes is intended to be <= 32.
__inline__ __attribute__((always_inline)) T
reduce_block_into_lanes(T* x,
T val,
int lanes = 1,
bool share_result = false) // lanes is intended to be <= 32.
{
auto item_ct1 = sycl::ext::oneapi::experimental::this_nd_item<3>();
int tid = item_ct1.get_local_id(2) + item_ct1.get_local_id(1) * item_ct1.get_local_range(2);
Expand Down
16 changes: 12 additions & 4 deletions deepspeed/inference/v2/model_implementations/phi3small/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ def positional_embedding_config(self) -> Optional[RotateHalfConfig]:
def mup_embedding_multiplier(self) -> float:
return 10.0


"""
Forward implementations
"""
Expand Down Expand Up @@ -160,7 +159,10 @@ def _forward_transformer_layer(self, layer_idx: int, residual: torch.Tensor, hid
if self.tp_size > 1:
dist.all_reduce(hidden_states, group=self._base_mp_group)

residual, hidden_states = self.norm(residual, hidden_states, cur_params.mlp_norm_gamma, beta=cur_params.mlp_norm_beta)
residual, hidden_states = self.norm(residual,
hidden_states,
cur_params.mlp_norm_gamma,
beta=cur_params.mlp_norm_beta)

hidden_states = self.mlp_1(hidden_states, cur_params.mlp_1_w, b=None)
hidden_states = self.mlp_2(hidden_states, cur_params.mlp_2_w, b=None)
Expand All @@ -170,7 +172,10 @@ def _forward_transformer_layer(self, layer_idx: int, residual: torch.Tensor, hid

if layer_idx != self.num_layers - 1:
next_params = self._transformer[layer_idx + 1]
residual, hidden_states = self.norm(residual, hidden_states, next_params.attn_norm_gamma, beta=next_params.attn_norm_beta)
residual, hidden_states = self.norm(residual,
hidden_states,
next_params.attn_norm_gamma,
beta=next_params.attn_norm_beta)
else:
# On last layer, we just need to perform the residual add. Adding into the residual
# here is safe.
Expand Down Expand Up @@ -205,7 +210,10 @@ def _forward_unembed(self, hidden_states: torch.Tensor, ragged_batch_info: Ragge
def forward(self, wrapped_batch: RaggedBatchWrapper) -> torch.Tensor:
residual = self._forward_embed(wrapped_batch)

residual, hidden_states = self.norm(residual, None, gamma=self._transformer[0].attn_norm_gamma, beta=self._transformer[0].attn_norm_beta)
residual, hidden_states = self.norm(residual,
None,
gamma=self._transformer[0].attn_norm_gamma,
beta=self._transformer[0].attn_norm_beta)

for layer_idx in range(self.num_layers):
residual, hidden_states = self._forward_transformer_layer(layer_idx, residual, hidden_states,
Expand Down

0 comments on commit e95086f

Please sign in to comment.