Skip to content

Commit

Permalink
[from mpich] make ADIOI_GEN_WriteStrided not step on itself
Browse files Browse the repository at this point in the history
Merging a romio commit that was accepted by mpich:

> make ADIOI_GEN_WriteStrided not step on itself
>
> The ADIOI_GEN_WriteStrided funcion uses data sieving on non-contiguous
> types. That is, if it wants to write data at locations
>     [W...X...Y...Z...]
> it reads the whole buffer
>     [dddddddddddddddd]
> changes the locations it wants to write to
>     [WdddXdddYdddZddd]
> then writes the whole thing back. It uses locks to make this safe, but
> the problem is this only protects against other parts of the product that
> are using locks. And without this PR a peer who is simultaneously making
> a simple non-contiguous write wouldn't have locked.
>
> A testcase to demonstrate the original problem is here:
> https://gist.github.com/markalle/d7da240c19e57f095c5d1b13240dae24
>
> % mpicc -o x romio_write_timing.c
> % mpirun -np 4 ./x
>
> Note: you need to use a filesystem that uses ADIOI_GEN_WriteStrided to
> hit the bug. I was using GPFS.
>
> This commit is pulled from wkliao after discussing where to put the
> new lock.  It adds locks to contiguous writes in independent write
> functions when data sieving write is not disabled

Signed-off-by: Mark Allen <markalle@us.ibm.com>
  • Loading branch information
markalle committed Aug 11, 2021
1 parent adda190 commit ecd3e05
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 24 deletions.
28 changes: 17 additions & 11 deletions ompi/mca/io/romio321/romio/adio/ad_lustre/ad_lustre_wrstr.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
ADIO_EXPLICIT_OFFSET, writebuf_off, \
&status1, error_code); \
if (!(fd->atomicity)) \
if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) \
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (*error_code != MPI_SUCCESS) { \
*error_code = MPIO_Err_create_code(*error_code, \
Expand All @@ -35,7 +35,7 @@
writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
(writebuf_off / stripe_size + 1) * \
stripe_size - writebuf_off); \
if (!(fd->atomicity)) \
if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) \
ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \
ADIO_EXPLICIT_OFFSET, \
Expand All @@ -58,7 +58,7 @@
while (write_sz != req_len) { \
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
if (!(fd->atomicity)) \
if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) \
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (*error_code != MPI_SUCCESS) { \
*error_code = MPIO_Err_create_code(*error_code, \
Expand All @@ -75,7 +75,7 @@
writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
(writebuf_off / stripe_size + 1) * \
stripe_size - writebuf_off); \
if (!(fd->atomicity)) \
if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) \
ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \
ADIO_EXPLICIT_OFFSET, \
Expand Down Expand Up @@ -221,8 +221,9 @@ void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, const void *buf, int count,
writebuf_off = 0;
writebuf_len = 0;

/* if atomicity is true, lock the region to be accessed */
if (fd->atomicity)
/* if atomicity is true or data sieving is not disable, lock the region
* to be accessed */
if (fd->atomicity || fd->hints->ds_write != ADIOI_HINT_DISABLE)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, bufsize);

for (j = 0; j < count; j++) {
Expand All @@ -241,7 +242,7 @@ void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, const void *buf, int count,
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1,
error_code);

if (fd->atomicity)
if (fd->atomicity || fd->hints->ds_write != ADIOI_HINT_DISABLE)
ADIOI_UNLOCK(fd, start_off, SEEK_SET, bufsize);
if (*error_code != MPI_SUCCESS) {
ADIOI_Free(writebuf);
Expand Down Expand Up @@ -325,9 +326,13 @@ void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, const void *buf, int count,
userbuf_off = 0;
ADIOI_BUFFERED_WRITE_WITHOUT_READ
/* write the buffer out finally */
if (fd->hints->ds_write != ADIOI_HINT_DISABLE)
ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE,
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1,
error_code);
if (fd->hints->ds_write != ADIOI_HINT_DISABLE)
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);

if (file_ptr_type == ADIO_INDIVIDUAL) {
/* update MPI-IO file pointer to point to the first byte
Expand Down Expand Up @@ -378,8 +383,9 @@ void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, const void *buf, int count,
fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
}

/* if atomicity is true, lock the region to be accessed */
if (fd->atomicity)
/* if atomicity is true or data sieving is not disable, lock the region
* to be accessed */
if (fd->atomicity || fd->hints->ds_write != ADIOI_HINT_DISABLE)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

writebuf_off = 0;
Expand Down Expand Up @@ -502,11 +508,11 @@ void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, const void *buf, int count,
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE,
ADIO_EXPLICIT_OFFSET,
writebuf_off, &status1, error_code);
if (!(fd->atomicity))
if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE)
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
if (*error_code != MPI_SUCCESS) return;
}
if (fd->atomicity)
if (fd->atomicity || fd->hints->ds_write != ADIOI_HINT_DISABLE)
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

ADIOI_Free(writebuf);
Expand Down
34 changes: 21 additions & 13 deletions ompi/mca/io/romio321/romio/adio/common/ad_write_str.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
if (writebuf_len) { \
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) \
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (*error_code != MPI_SUCCESS) { \
*error_code = MPIO_Err_create_code(*error_code, \
MPIR_ERR_RECOVERABLE, myname, \
Expand All @@ -25,7 +26,8 @@
} \
writebuf_off = req_off; \
writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) \
ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
if (*error_code != MPI_SUCCESS) { \
Expand All @@ -42,7 +44,8 @@
while (write_sz != req_len) { \
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) \
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (*error_code != MPI_SUCCESS) { \
*error_code = MPIO_Err_create_code(*error_code, \
MPIR_ERR_RECOVERABLE, myname, \
Expand All @@ -54,7 +57,8 @@
userbuf_off += write_sz; \
writebuf_off += writebuf_len; \
writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) \
ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
if (*error_code != MPI_SUCCESS) { \
Expand Down Expand Up @@ -191,9 +195,10 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count,
writebuf = (char *) ADIOI_Malloc(max_bufsize);
writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-writebuf_off+1));

/* if atomicity is true, lock the region to be accessed */
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
/* if atomicity is true or data sieving is not disable, lock the region
* to be accessed */
if (fd->atomicity || fd->hints->ds_write != ADIOI_HINT_DISABLE)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset - start_off + 1);

for (j=0; j<count; j++)
{
Expand All @@ -210,7 +215,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count,
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
writebuf_off, &status1, error_code);

if (fd->atomicity)
if (fd->atomicity || fd->hints->ds_write != ADIOI_HINT_DISABLE)
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

if (*error_code != MPI_SUCCESS) goto fn_exit;
Expand Down Expand Up @@ -287,8 +292,10 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count,
* datatypes, instead of a count of bytes (which might overflow)
* Other WriteContig calls in this path are operating on data
* sieving buffer */
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, bufsize);
ADIO_WriteContig(fd, buf, count, datatype, ADIO_EXPLICIT_OFFSET,
offset, status, error_code);
ADIOI_UNLOCK(fd, offset, SEEK_SET, bufsize);

if (file_ptr_type == ADIO_INDIVIDUAL) {
/* update MPI-IO file pointer to point to the first byte
Expand Down Expand Up @@ -338,9 +345,10 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count,
fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
}

/* if atomicity is true, lock the region to be accessed */
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
/* if atomicity is true or data sieving is not disable, lock the region
* to be accessed */
if (fd->atomicity || fd->hints->ds_write != ADIOI_HINT_DISABLE)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset - start_off + 1);

writebuf_off = 0;
writebuf_len = 0;
Expand Down Expand Up @@ -460,11 +468,11 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count,
if (writebuf_len) {
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
writebuf_off, &status1, error_code);
if (!(fd->atomicity))
if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE)
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
if (*error_code != MPI_SUCCESS) goto fn_exit;
}
if (fd->atomicity)
if (fd->atomicity || fd->hints->ds_write != ADIOI_HINT_DISABLE)
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);

if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
Expand Down

0 comments on commit ecd3e05

Please sign in to comment.