mpi - How to write in chunks by multiple ranks using parallel HDF5 -
i have dataset of 20x20. want write in chunks of 2x2 4 ranks in parallel. using parallel hdf5. each rank has 25 chunks write. not understanding how code when use plain chunking, ranks write entire 20x20 dataset. when use hyperslab, not know how set multiple chunks written each rank. has pointers me? stuck.
i'm not sure understand asking. here's how interpret question.
- global domain 20x20
- 4 mpi ranks
- chunking 2x2
you don't have set chunking, in fact don't.
here's how it.
- domain decomposition in mpi.
- generate local matrix.
- create memory hyperslab (based off local matrix).
- create file hyperslab (based off global matrix).
- create dataset chunking property.
- write dataset.
here's looks like. 
! program use mpi_cart , parallel hdf5 ! program hdf_pwrite use mpi use hdf5 use kinds, : r_dp implicit none ! local array size halo integer, parameter :: g_n = 20 integer, parameter :: ndims = 2 integer, parameter :: halo = 0 integer :: argc ! number of command line arguments integer :: ierr ! error status integer :: id ! rank/id integer :: np ! number of processors integer :: iunit ! file descriptor integer :: i,j ! loop indexers integer :: n(ndims) ! local n , j directions integer :: total(ndims) ! local total dimension size ! mpi io/lustre file striping integer :: lcount ! lustre count size integer :: lsize ! lustre stripe size character(len=1024) :: clcount, clsize ! strings of lfs integer :: info ! mpi io info integer :: m_dims(ndims) ! mpi cart dims integer :: coords(ndims) ! co-ords of procs in grid logical :: is_periodic(ndims) ! periodic boundary conditions logical :: reorder ! reorder mpi structure integer :: mpi_comm_2d ! new communicator character(len=1024) :: filename integer(kind=hid_t) :: p_id, f_id, x_id, d_id, c_id integer(kind=hid_t) :: memspace, filespace ! chunk sizes integer(kind=hsize_t) :: c_size(ndims) ! local hyper slab info integer(kind=hsize_t) :: d_size(ndims), s_size(ndims), h_size(ndims), & stride(ndims), block(ndims) ! global hyper slab info integer(kind=hsize_t) :: g_size(ndims), g_start(ndims) ! local data array real(kind=r_dp), allocatable :: ld(:,:) argc = 0 ierr = 0 m_dims = (/ 0, 0/) is_periodic = .false. ! non-periodic reorder = .false. ! not allowed reorder call mpi_init(ierr) ! set mpi cartesian topology call mpi_comm_size(mpi_comm_world, np, ierr) call mpi_dims_create(np, ndims, m_dims, ierr) call mpi_cart_create(mpi_comm_world, ndims, m_dims, is_periodic, & reorder, mpi_comm_2d, ierr) call mpi_comm_rank(mpi_comm_2d, id, ierr) call mpi_cart_coords(mpi_comm_2d, id, ndims, coords, ierr) if (id .eq. 0) if (mod(g_n,np) .ne. 0) write(0,*) 'must use divisiable number of procs.' call mpi_abort(mpi_comm_world, 1, ierr) endif ! filename argc = command_argument_count() if (argc .lt. 1 ) write(0, *) 'must supply filename' call exit(1) endif call get_command_argument(1, filename) endif ! broadcast filename call mpi_bcast(filename, len(filename), mpi_char, 0, & mpi_comm_world, ierr) ! init hdf5 library call h5open_f(ierr) ! set stripe count of 4 , stripe size of 4mb lcount = 4 lsize = 4 * 1024 * 1024 write(clcount, '(i4)') lcount write(clsize, '(i8)') lsize call mpi_info_create(info, ierr) call mpi_info_set(info, "striping_factor", trim(clcount), ierr) call mpi_info_set(info, "striping_unit", trim(clsize), ierr) ! set access properties call h5pcreate_f(h5p_file_access_f, p_id, ierr) call h5pset_fapl_mpio_f(p_id, mpi_comm_2d, info, ierr) ! open file call h5fcreate_f(filename, h5f_acc_trunc_f, f_id, ierr, & access_prp = p_id) if (ierr .ne. 0) write(0,*) 'unable open: ', trim(filename), ': ', ierr call mpi_abort(mpi_comm_world, 1, ierr) endif ! generate our local matrix = 1, ndims n(i) = g_n / m_dims(i) total(i) = n(i) + (2 * halo) end if (halo .ne. 0) allocate(ld(0:total(1)-1, 0:total(2)-1), stat=ierr) else allocate(ld(total(1),total(2)), stat=ierr) end if if (ierr .ne. 0) write(0,*) 'unable allocate local data array: ', ierr call mpi_abort(mpi_comm_world, 1, ierr) end if ld = -99.99 ! init local data j = 1, n(2) = 1, n(1) ld(i,j) = id enddo enddo ! create local memory space , hyperslab = 1, ndims d_size(i) = total(i) s_size(i) = n(i) h_size(i) = halo stride(i) = 1 block(i) = 1 enddo call h5screate_simple_f(ndims, d_size, memspace, ierr) call h5sselect_hyperslab_f(memspace, h5s_select_set_f, & h_size, s_size, ierr, & stride, block) ! create global file space , hyperslab g_size = g_n = 1, ndims g_start(i) = n(i) * coords(i) enddo call h5screate_simple_f(ndims, g_size, filespace, ierr) call h5sselect_hyperslab_f(filespace, h5s_select_set_f, & g_start, s_size, ierr, & stride, block) ! create data chunking property c_size = 2 call h5pcreate_f(h5p_dataset_create_f, c_id, ierr) call h5pset_chunk_f(c_id, ndims, c_size, ierr) ! create dataset id call h5dcreate_f(f_id, "/data", h5t_ieee_f64le, filespace, d_id, & ierr, dcpl_id=c_id) ! create data transfer property call h5pcreate_f(h5p_dataset_xfer_f, x_id, ierr) call h5pset_dxpl_mpio_f(x_id, h5fd_mpio_collective_f, ierr) ! write data call h5dwrite_f(d_id, h5t_ieee_f64le, ld, s_size, ierr, & file_space_id=filespace, mem_space_id=memspace, & xfer_prp=x_id) if (allocated(ld)) deallocate(ld) endif ! close , exit call h5dclose_f(d_id, ierr) call h5sclose_f(filespace, ierr) call h5sclose_f(memspace, ierr) call h5pclose_f(c_id, ierr) call h5pclose_f(x_id, ierr) call h5pclose_f(p_id, ierr) call h5fclose_f(f_id, ierr) call h5close_f(ierr) call mpi_finalize(ierr) end program hdf_pwrite for completeness here's definition of kinds.
module kinds use, intrinsic :: iso_fortran_env implicit none private public :: i_sp, i_dp, & r_sp, r_dp, r_qp integer, parameter :: i_sp = int32 integer, parameter :: i_dp = int64 integer, parameter :: r_sp = real32 integer, parameter :: r_dp = real64 integer, parameter :: r_qp = real128 end module kinds then compiling, running , looking @ output file:
$ make rm -f kinds.o kinds.mod h5pfc -c -o3 -o kinds.o kinds.f90 rm -f hdf_pwrite.o hdf_pwrite.mod h5pfc -c -o3 -o hdf_pwrite.o hdf_pwrite.f90 h5pfc -o3 -o hdf_pwrite kinds.o hdf_pwrite.o $ mpiexec -np 4 ./hdf_pwrite test.h5 $ h5dump test.h5 hdf5 "test.h5" { group "/" { dataset "data" { datatype h5t_ieee_f64le dataspace simple { ( 20, 20 ) / ( 20, 20 ) } data { (0,0): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, (1,0): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, (2,0): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, (3,0): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, (4,0): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, (5,0): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, (6,0): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, (7,0): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, (8,0): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, (9,0): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, (10,0): 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, (11,0): 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, (12,0): 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, (13,0): 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, (14,0): 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, (15,0): 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, (16,0): 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, (17,0): 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, (18,0): 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, (19,0): 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 } } } } hope helps.
edit: of course should use better algorithm domain decomposition, mpe_decomp1d.
Comments
Post a Comment