After updating the installed MKL version when attempting to avoid another bug we discovered, previously working code has started throwing division by zero errors after sparse matrices exceed a certain nonzero count. (Again using parallel 64bit MKL)
Unhandled exception at 0x000007FEDEC0DB3D (mkl_avx.dll) in TestSparseMultiply.exe: 0xC0000094: Integer division by zero.
Callstack:
mkl_avx.dll!000007fedec0db3d() Unknown
mkl_intel_thread.dll!000007fee0549de3() Unknownmkl_intel_thread.dll!000007fee02d5037() Unknown
Repro case:
#include <mkl.h> int _tmain(int argc, _TCHAR* argv[]) { const size_t sparseMatrixWidth = 6045696; const size_t sparseMatrixHeight = 200; const size_t sparseMatrixUsage = 1000000 / 2; // <= this should crash // const size_t sparseMatrixUsage = 1000000 / 3; // <= This still works double *sparseMatrixData = new double[sparseMatrixUsage * sparseMatrixHeight]; ::memset(sparseMatrixData, 0, sizeof(double) * sparseMatrixUsage * sparseMatrixHeight); MKL_INT *colIndices = new MKL_INT[sparseMatrixUsage * sparseMatrixHeight]; ::memset(colIndices, 0, sizeof(MKL_INT) * sparseMatrixUsage * sparseMatrixHeight); MKL_INT *rowOffsets = new MKL_INT[sparseMatrixHeight + 1]; ::memset(colIndices, 0, sizeof(MKL_INT) * (sparseMatrixHeight + 1)); ::srand(42); rowOffsets[0] = 0 + 1; // 1 based for (unsigned long y=0;y<sparseMatrixHeight;++y) { MKL_INT lastIndex = 0; for (unsigned long x=0;x<sparseMatrixUsage;++x) { // Jump forward a random amount, ensuring even if we hit the maximum jump everytime we do not exceed the matrix limits int jump = rand() % (sparseMatrixWidth / sparseMatrixUsage - 1); lastIndex = lastIndex + 1 + jump; // Ensure we jump forward at least 1 column sparseMatrixData[x + y*sparseMatrixUsage] = 1.0; colIndices[x + y*sparseMatrixUsage] = lastIndex + 1; // 1 based } rowOffsets[y + 1] = sparseMatrixUsage * (y + 1) + 1; // 1 based } // Doublecheck: Verify sparse matrix properties for (unsigned long y=0;y<sparseMatrixHeight;++y) { // Since we are using one based matrices, nothing is allowed to be zero if (rowOffsets[y] == 0) return -1; // Row data must be ordered in memory if (rowOffsets[y + 1]< rowOffsets[y]) return -1; // If rows are not empty ... if (rowOffsets[y] != rowOffsets[y + 1]) { // ... make sure column indices are one based and do not exceed the matrix size for (unsigned long i = rowOffsets[y];i < rowOffsets[y + 1];++i) { if (colIndices[i - 1] <= 0) return -1; if (colIndices[i - 1] >= sparseMatrixWidth) return -1; } // ... make sure column indices are in ascending order for (unsigned long i = rowOffsets[y];i < rowOffsets[y + 1] - 1;++i) { if (colIndices[i - 1 + 1] <= colIndices[i - 1]) return -1; } } } // Calculate matrix average by multiplying with a vector containing ones and scaling by the inverse vector length const size_t rightVectorLength = sparseMatrixWidth; double *vectorData = new double[rightVectorLength]; for (unsigned long x=0;x<rightVectorLength;++x) { vectorData[x] = 1.0; } // Store the result in this vector const size_t resultVectorLength = sparseMatrixHeight; double *resultData = new double[resultVectorLength]; char matdescra[6] = {'g', 'l', 'n', 'f', 'x', 'x'}; /* https://software.intel.com/sites/products/documentation/doclib/iss/2013/mkl/mklman/GUID-34C8DB79-0139-46E0-8B53-99F3BEE7B2D4.htm#TBL2-6 G: General. D: Diagonal L/U Lower/Upper triangular (ignored with G) N: non-unit diagonal (ignored with G) C: zero-based indexing. / F: one-based indexing */ MKL_INT strideA = static_cast<MKL_INT>(rightVectorLength); char transposeAtxt = 'N'; MKL_INT numRowsA = static_cast<MKL_INT>(sparseMatrixHeight), numDestCols = static_cast<MKL_INT>(1), numColsA = static_cast<MKL_INT>(sparseMatrixWidth); double tempAlpha = 1.0 / sparseMatrixWidth; double tempBeta = 0.0; MKL_INT resultStride = static_cast<MKL_INT>(resultVectorLength); ::mkl_dcsrmm( &transposeAtxt,&numRowsA,&numDestCols,&numColsA,&tempAlpha, matdescra, sparseMatrixData, colIndices, rowOffsets, rowOffsets + 1, vectorData, &strideA,&tempBeta, resultData, &resultStride ); return 0; }