I'm trying to use the Intel MKL Inspector/Executor Sparse BLAS library and I've been struggling with faulty memory use in the `mkl_sparse_convert_csr` subroutine. The simple program below can reproduce my problem:
program debug use mkl_spblas use omp_lib use, intrinsic :: iso_c_binding, only: c_int, c_double implicit none integer, parameter :: DIM = 10000 integer :: stat, i integer(kind = c_int), dimension(DIM) :: irn, jcn real(kind = c_double), dimension(DIM) :: val type(sparse_matrix_t) :: mat1, mat2 do i = 1, DIM irn(i) = i jcn(i) = i val(i) = 1.0d0 end do call omp_set_num_threads(1) stat = mkl_sparse_d_create_coo (A = mat1, indexing = SPARSE_INDEX_BASE_ONE, & rows = DIM, cols = DIM, nnz = DIM, row_indx = irn, col_indx = jcn, values = val) if (stat /= 0) stop 'Error in mkl_sparse_d_create_coo' stat = mkl_sparse_convert_csr (source = mat1, & operation = SPARSE_OPERATION_NON_TRANSPOSE, dest = mat2) if (stat /= 0) stop 'Error in mkl_sparse_convert_csr' stat = mkl_sparse_destroy (A = mat1) if (stat /= 0) stop 'Error in mkl_sparse_destroy (mat1)' stat = mkl_sparse_destroy (A = mat2) if (stat /= 0) stop 'Error in mkl_sparse_destroy (mat2)' call mkl_free_buffers end program debug
When I check with Valgrind I get the following report of memory leaks:
==27267== Memcheck, a memory error detector ==27267== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al. ==27267== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info ==27267== Command: ../bin/LINKS_debug ==27267== ==27267== ==27267== HEAP SUMMARY: ==27267== in use at exit: 495 bytes in 6 blocks ==27267== total heap usage: 47 allocs, 41 frees, 463,031 bytes allocated ==27267== ==27267== 8 bytes in 1 blocks are still reachable in loss record 1 of 6 ==27267== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==27267== by 0x504CA98: gomp_malloc (alloc.c:37) ==27267== by 0x505BA56: gomp_init_num_threads (proc.c:91) ==27267== by 0x504B06A: initialize_env (env.c:1244) ==27267== by 0x4010732: call_init (dl-init.c:72) ==27267== by 0x4010732: _dl_init (dl-init.c:119) ==27267== by 0x40010C9: ??? (in /lib/x86_64-linux-gnu/ld-2.27.so) ==27267== ==27267== 8 bytes in 1 blocks are still reachable in loss record 2 of 6 ==27267== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==27267== by 0x152F22: mkl_serv_malloc (in /home/rcarvalho/repos/debug/bin/LINKS_debug) ==27267== by 0x1261B4: mkl_sparse_d_create_coo_i4_avx2 (in /home/rcarvalho/repos/debug/bin/LINKS_debug) ==27267== by 0x112AF8: MAIN__ (main.f90:49) ==27267== by 0x112C07: main (main.f90:31) ==27267== ==27267== 32 bytes in 1 blocks are still reachable in loss record 3 of 6 ==27267== at 0x4C31B25: calloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==27267== by 0x590C7E4: _dlerror_run (dlerror.c:140) ==27267== by 0x590C050: dlopen@@GLIBC_2.2.5 (dlopen.c:87) ==27267== by 0x150F32: mkl_serv_inspector_suppress (in /home/rcarvalho/repos/debug/bin/LINKS_debug) ==27267== by 0x150E8C: mkl_serv_lock (in /home/rcarvalho/repos/debug/bin/LINKS_debug) ==27267== by 0x14EFA1: mkl_serv_cpu_detect (in /home/rcarvalho/repos/debug/bin/LINKS_debug) ==27267== by 0x112EC4: mkl_sparse_d_create_coo_i4 (in /home/rcarvalho/repos/debug/bin/LINKS_debug) ==27267== by 0x112AF8: MAIN__ (main.f90:49) ==27267== by 0x112C07: main (main.f90:31) ==27267== ==27267== 47 bytes in 1 blocks are still reachable in loss record 4 of 6 ==27267== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==27267== by 0x4017880: _dl_exception_create (dl-exception.c:77) ==27267== by 0x6996250: _dl_signal_error (dl-error-skeleton.c:117) ==27267== by 0x4009812: _dl_map_object (dl-load.c:2384) ==27267== by 0x4014EE3: dl_open_worker (dl-open.c:235) ==27267== by 0x69962DE: _dl_catch_exception (dl-error-skeleton.c:196) ==27267== by 0x40147C9: _dl_open (dl-open.c:605) ==27267== by 0x590BF95: dlopen_doit (dlopen.c:66) ==27267== by 0x69962DE: _dl_catch_exception (dl-error-skeleton.c:196) ==27267== by 0x699636E: _dl_catch_error (dl-error-skeleton.c:215) ==27267== by 0x590C734: _dlerror_run (dlerror.c:162) ==27267== by 0x590C050: dlopen@@GLIBC_2.2.5 (dlopen.c:87) ==27267== ==27267== 192 bytes in 1 blocks are still reachable in loss record 5 of 6 ==27267== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==27267== by 0x504CA98: gomp_malloc (alloc.c:37) ==27267== by 0x5059B65: gomp_get_thread_pool (pool.h:42) ==27267== by 0x5059B65: get_last_team (team.c:146) ==27267== by 0x5059B65: gomp_new_team (team.c:165) ==27267== by 0x5050DDB: GOMP_parallel_start (parallel.c:126) ==27267== by 0x17D0A4: mkl_sparse_d_coo_csr_new_omp_i4 (in /home/rcarvalho/repos/debug/bin/LINKS_debug) ==27267== by 0x17D4A7: mkl_sparse_d_convert_coo_to_csr_i4 (in /home/rcarvalho/repos/debug/bin/LINKS_debug) ==27267== by 0x17D554: mkl_sparse_d_export_csr_data_i4 (in /home/rcarvalho/repos/debug/bin/LINKS_debug) ==27267== by 0x126E68: mkl_sparse_d_convert_csr_i4_avx2 (in /home/rcarvalho/repos/debug/bin/LINKS_debug) ==27267== by 0x112B38: MAIN__ (main.f90:52) ==27267== by 0x112C07: main (main.f90:31) ==27267== ==27267== 208 bytes in 1 blocks are still reachable in loss record 6 of 6 ==27267== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==27267== by 0x504CA98: gomp_malloc (alloc.c:37) ==27267== by 0x505AFFA: gomp_new_icv (team.c:968) ==27267== by 0x504CF24: omp_set_num_threads (libgomp.h:681) ==27267== by 0x112AB3: MAIN__ (main.f90:47) ==27267== by 0x112C07: main (main.f90:31) ==27267== ==27267== LEAK SUMMARY: ==27267== definitely lost: 0 bytes in 0 blocks ==27267== indirectly lost: 0 bytes in 0 blocks ==27267== possibly lost: 0 bytes in 0 blocks ==27267== still reachable: 495 bytes in 6 blocks ==27267== suppressed: 0 bytes in 0 blocks ==27267== ==27267== For counts of detected and suppressed errors, rerun with: -v ==27267== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
It seems that this kind of problem has been also reported before and, as suggested in https://stackoverflow.com/questions/37395541/mkl-sparse-blas-segfault-wh..., I'm already setting the number of threads to 1 and also using the `call mkl_free_buffers` subroutine. However, the problem is still there and, in a bigger project I have, this memory leak leads leads to a program crash due to invalid writes. Any idea on how to solve this?