Skip to content

Commit

Permalink
Fix an issue with checkpoint storage region
Browse files Browse the repository at this point in the history
  • Loading branch information
houjun committed May 19, 2021
1 parent 0cec315 commit f27393d
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 4 deletions.
35 changes: 32 additions & 3 deletions src/server/pdc_server.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ double total_mem_usage_g = 0.0;
pdc_data_server_io_list_t *pdc_data_server_read_list_head_g = NULL;
pdc_data_server_io_list_t *pdc_data_server_write_list_head_g = NULL;
update_storage_meta_list_t *pdc_update_storage_meta_list_head_g = NULL;
extern data_server_region_t *dataserver_region_g;


/*
Expand Down Expand Up @@ -1147,12 +1148,27 @@ perr_t PDC_Server_checkpoint()
}
}

region_count += n_region;
if (n_write_region != n_region) {
printf("==PDC_SERVER[%d]: %s - ERROR with number of regions", pdc_server_rank_g, __func__);
ret_value = FAIL;
goto done;
}

// Write storage region info
data_server_region_t *region = NULL;
region = PDC_Server_get_obj_region(elt->obj_id);
if(region) {
DL_COUNT(region->region_storage_head, region_elt, n_region);
fwrite(&n_region, sizeof(int), 1, file);
DL_FOREACH(region->region_storage_head, region_elt) {
fwrite(region_elt, sizeof(region_list_t), 1, file);
}
}
else {
fwrite(&n_region, sizeof(int), 1, file);
}

metadata_size++;
region_count += n_region;
}
Expand Down Expand Up @@ -1315,8 +1331,8 @@ perr_t PDC_Server_restart(char *filename)
goto done;
}

if (n_region == 0)
continue;
/* if (n_region == 0) */
/* continue; */

total_region += n_region;

Expand Down Expand Up @@ -1381,6 +1397,20 @@ perr_t PDC_Server_restart(char *filename)

DL_APPEND((metadata+i)->storage_region_list_head, region_list);
} // For j

// read storage region info
fread(&n_region, sizeof(int), 1, file);
data_server_region_t *new_obj_reg = (data_server_region_t *)calloc(1, sizeof(struct data_server_region_t));
DL_APPEND(dataserver_region_g, new_obj_reg);
new_obj_reg->obj_id = (metadata+i)->obj_id;
for (j = 0; j < n_region; j++) {
region_list_t *new_region_list = (region_list_t*)malloc(sizeof(region_list_t));
fread(new_region_list, sizeof(region_list_t), 1, file);
DL_APPEND(new_obj_reg->region_storage_head, new_region_list);
}

total_region += n_region;

DL_SORT((metadata+i)->storage_region_list_head, region_cmp);
} // For i

Expand Down Expand Up @@ -1866,7 +1896,6 @@ int main(int argc, char *argv[])

// Exit from the loop, start finalize process
#ifndef DISABLE_CHECKPOINT
#else
char *tmp_env_char = getenv("PDC_DISABLE_CHECKPOINT");
if (tmp_env_char != NULL && strcmp(tmp_env_char, "TRUE")==0) {
if (pdc_server_rank_g == 0) printf("==PDC_SERVER[0]: checkpoint disabled!\n");
Expand Down
4 changes: 3 additions & 1 deletion src/server/pdc_server_data.c
Original file line number Diff line number Diff line change
Expand Up @@ -4450,7 +4450,7 @@ int PDC_region_cache_free() {
perr_t PDC_Server_data_write_out2(uint64_t obj_id, struct pdc_region_info *region_info, void *buf, size_t unit)
{
perr_t ret_value = SUCCEED;
ssize_t write_bytes = -1;
uint64_t write_bytes = -1;
data_server_region_t *region = NULL;

FUNC_ENTER(NULL);
Expand Down Expand Up @@ -4485,6 +4485,7 @@ perr_t PDC_Server_data_write_out2(uint64_t obj_id, struct pdc_region_info *regio
}
storage_region->unit_size = unit;
storage_region->offset = lseek(region->fd, 0, SEEK_END);
strcpy(storage_region->storage_location, region->storage_location);

/* time_t t; */
/* struct tm tm; */
Expand Down Expand Up @@ -4526,6 +4527,7 @@ perr_t PDC_Server_data_write_out2(uint64_t obj_id, struct pdc_region_info *regio
storage_region->data_size = write_bytes;
DL_APPEND(region->region_storage_head, storage_region);

printf("==PDC_SERVER[%d]: write region %llu bytes\n", pdc_server_rank_g, storage_region->data_size);
done:
fflush(stdout);
FUNC_LEAVE(ret_value);
Expand Down

0 comments on commit f27393d

Please sign in to comment.