#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <time.h>
#include <mpi.h>
#include <mkl_blacs.h>
#include <mkl_scalapack.h>
#include <mkl_lapacke.h>
#include <mkl_cblas.h>
#include <errno.h>



int main(int    argc,
         char **argv)
{
  int i,j;
  // test parameters (default)
  int m      = 4000;
  int n      = 4000;
  int mb     = 8;
 int nb     = 8;
  int nprows = 8;
 int npcols = 8; // temp values

 // parameter value change (optional)
  if(argc >=5){
    m=atoi(argv[1]);
    n=atoi(argv[2]);
    nprows=atoi(argv[3]);
    npcols=atoi(argv[4]);
  
  }

  // time and validity
  double startTime;
  double endTime;
  double gap;
  double flops;

 //QR
 double * A;
 double * tau;
 double * work;

  int mpirank, mpisize;
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &mpirank);
  MPI_Comm_size(MPI_COMM_WORLD, &mpisize);

  int myid, numproc, ctxt, myrow, mycol;
  MKL_INT descA[9];
  MKL_INT zero = 0;
 MKL_INT one = 1;
 MKL_INT info = 0;

  Cblacs_pinfo(&myid, &numproc);
  if(numproc > 1 && myid != 0){
    Cblacs_setup(&myid, &numproc);
  }
  Cblacs_get(-1, 0, &ctxt);
  Cblacs_gridinit(&ctxt, "R", nprows, npcols);
  blacs_gridinfo_(&ctxt, &nprows, &npcols, &myrow, &mycol);

  if(myrow == -1){
    return 0;
  }

 /*
  * temp value for fortran
  */
 char aform = 'N';
 char diag = 'N';
 MKL_INT lda = m;
 MKL_INT iarow = 0;
 MKL_INT iacol = 0;
 MKL_INT iseed = 10;
 MKL_INT iroff = 0;
 MKL_INT irnum = numroc_(&m, &mb, &myrow, &zero, &nprows);
 MKL_INT icoff = 0;
 MKL_INT icnum = numroc_(&n, &nb, &mycol, &zero, &npcols);
 MKL_INT lwork = -1;

 //descinit(desc,  m,  n,  mb,  nb, irsrc, icsrc, ictxt, LLD, info)
  descinit_(descA, &m, &n, &mb, &nb, &zero, &zero, &ctxt, &icnum, &info);
 A = (double*)malloc(sizeof(double)*irnum*icnum);
 tau = (double*)malloc(sizeof(double)*(m*n/2)); //array size should >= LOCc(ja+min(m,n)-1)
 work = (double*)malloc(sizeof(double)*m*n);

 //pdmatgen_(&ctxt, &aform, &diag, &m, &n, &mb, &nb, A, &m, &iarow, &iacol, &iseed, &iroff, &irnum, &icoff, &icnum, &myrow, &mycol, &nprows, &npcols);


 /*
  *  generate matrix (by column major)
  *
  * matrix      : A
  * size        : irnum * icnum
  * seed        : 10
  */
  for(j = 0; j < irnum; ++j)
  {
    for(i = 0; i < icnum; ++i)
    {
      A[i*irnum+j] = rand()%10;
    }
  }

  printf("QR valid test (MPI)\n");

 
 // pdgeqrf routine
 MPI_Barrier(MPI_COMM_WORLD);
  startTime = MPI_Wtime();
 pdgeqrf_(&m,&n,A,&one,&one,descA,tau,work,&lwork,&info);
  //info = dgeqrf(LAPACK_COL_MAJOR, m, n, A, m, tau, descA, mpirank);
 MPI_Barrier(MPI_COMM_WORLD);
  endTime = MPI_Wtime();

  // flops
  gap = (double)( endTime - startTime );
  flops = (2.0 * (double)n * (double)n * (double)(m-n/3) ) * 1.0e-9 / gap;
  printf("info\t%d, dgemm time (sec)\t%f, Gflops\t%f \n", info, gap, flops);
 
  Cblacs_gridexit(ctxt);
  Cblacs_exit(&zero);
  MPI_Finalize();
   
  free(A);
  free(tau);
 free(work);
  return 0;
}

i have compiled my code like this :

 mpiicc scalapack_test2.c -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 -mkl

i've comfirmed the result value 0 correctly, but there is some weird things especially speed things.

in addition, i couldn't check this result is right or not. i'm not sure of initializing data set value exactly.

this is result texts what i confirmed.

//  form : ./a.out m n nprow npcol

[@localhost src]$ mpirun -n 4 ./a.out 6400 6400 2 2
QR valid test (MPI)
QR valid test (MPI)
QR valid test (MPI)
QR valid test (MPI)
info    0, dgemm time (sec)     0.001011, Gflops        345703.851960
info    0, dgemm time (sec)     0.001003, Gflops        348580.607742
info    0, dgemm time (sec)     0.001141, Gflops        306337.241154
info    0, dgemm time (sec)     0.001143, Gflops        305826.040084

[@localhost src]$ mpirun -n 1 ./a.out 6400 6400 1 1
QR valid test (MPI)
info    0, dgemm time (sec)     0.000237, Gflops        1474979.915656

could you give me some advise what i miss understand or miss used? if you give me good example of this, i'm really thank for you.

Attachment	Size
Download figure 1.jpg	158.32 KB

Thread Topic:

Help Me

↧

MKL crashing when creating too many OpenMP threads

May 4, 2017, 11:01 am

Latest and popular articles on Intel Technologies

≫ Next: OpenMP with MKL - Get Thread Num

≪ Previous: Data initialize task on scalapack with C file

Hi,
I have 64 threads running on a Intel Xeon Phi 7230. Each thread can run the following MKL rountine:

@constraint (ComputingUnits="${ComputingUnits}")
@task(returns=list)
def createBlock(BSIZE, MKLProc, diag):
    import os
    os.environ["KMP_AFFINITY"]="verbose"
    os.environ["MKL_NUM_THREADS"]=str(MKLProc)
    block = np.array(np.random.random((BSIZE, BSIZE)), dtype=np.double,copy=False)
    mb = np.matrix(block, dtype=np.double, copy=False)
    mb = mb + np.transpose(mb)
    if diag:
        mb = mb + 2*BSIZE*np.eye(BSIZE)
    return mb

MKL_NUM_THREADS is set to 64 in order to take advantage of all the cores. When executing the routine number 32, I obtain the following error:

OMP: Error #34: System unable to allocate necessary resources for OMP thread:
OMP: System error #11: Resource temporarily unavailable
OMP: Hint: Try decreasing the value of OMP_NUM_THREADS.

I've found here https://software.intel.com/en-us/forums/intel-open-source-openmp-runtime... that threads are not destroyed so I can be reaching the thread limit in the machine. The thing is that, at each time, only one thread is running so only 64 OpenMP threads are awaken. My problem is that I'm running this code in a shared cluster so I should not recompile the library with my custom setting if possible. Is there a way to avoid this problem without decrasing the amount of threads running on the machine? I think that just having a fewer amount of threads i could avoid this problem but this is a part of a bigger program and I am really interested in keeping the 64 threads.

Regards,

Ramon

Thread Topic:

Question

↧

OpenMP with MKL - Get Thread Num

May 4, 2017, 1:47 pm

Latest and popular articles on Intel Technologies

≫ Next: Compute schur complement with Pardiso in out-of-core mode

≪ Previous: MKL crashing when creating too many OpenMP threads

When using OpenMP I can get the thread num by using:

omp_get_thread_num()

However when using it with MKL the, the command doesn't work. Is there ano MKL command which replaces this?

Thank you,

Raphael

Thread Topic:

Question

↧

Compute schur complement with Pardiso in out-of-core mode

May 4, 2017, 11:34 pm

Latest and popular articles on Intel Technologies

≫ Next: FGMRES preconditioner applied to?

≪ Previous: OpenMP with MKL - Get Thread Num

Hi,

I'm using pardiso_64 with intel mkl 11.3.3.1.

I am computing the Schur complement of SPD matrices (mtype = 2). In in-core mode everything is OK. In out-of-core mode (iparm(60)=2), I get no error code, but just an all-zero solution matrix.

My iparm array is as follows:
iparm(1) = 1;
iparm(10)=8;
iparm(21)=1;
iparm(35)=1;
iparm(60)=2;

(otherwise zero values).

I've set the phase parameter to 12.

Am I doing something wrong, or is only in-core mode possible for Schur complement calculation?

Best,
Jens

Thread Topic:

Help Me

↧

FGMRES preconditioner applied to?

May 7, 2017, 11:59 pm

Latest and popular articles on Intel Technologies

≫ Next: INTEL MKL ERROR : PARAMETER 5 WAS INCORRECT ON ENTRY TO MKL_DDIASM

≪ Previous: Compute schur complement with Pardiso in out-of-core mode

I am using the MKL's preconditioned FGMRES solver and I am trying to understand what exactly is the vector that FGMRES is asking to apply the preconditioner to. From the reference for the solver, Saad's Iterative methods for sparse linear systems, the left-preconditioned GMRES iteration (I'm assuming that FGMRES does left-preconditioning, please correct me if I'm wrong) involves computing at each step M^-1 A v_j. That is, FGMRES first asks to compute the matrix vector product A v_j, and then I would assume that FGMRES would ask to apply the preconditioner on that result, i.e. compute M^-1 A v_j. Only, when I compute the squared 2 norm of the vectors involved, I get that the vector on which FGMRES asks to apply the preconditioner (which I would assume to be A v_j) always has a unit norm, regardless of the norm of A v_j. What is this unit norm vector that FGMRES is asking the user to apply the preconditioner to?

Thread Topic:

Question

↧

INTEL MKL ERROR : PARAMETER 5 WAS INCORRECT ON ENTRY TO MKL_DDIASM

May 8, 2017, 11:32 am

Latest and popular articles on Intel Technologies

≫ Next: gemv for sparse, rectangular matrices?

≪ Previous: FGMRES preconditioner applied to?

Hello to all of my new firends.

I am writing a code for "Turbulent Negarively Bouyant Jet" in these days for my thesis in BSc.

I would like to use MKL_DDIASM for part of my code but when I RUN the code this message appeares and all of my RHS result (argument C) will be zero. The message is : INTEL MKL ERROR : PARAMETER 5 WAS INCORRECT ON ENTRY TO MKL_DDIASM

Then I tried to make a simple code to understand exactly what DDIASM do. I wrote 2 simple codes which I bring here. The first code works well but the secnod code doesn't. I notice that the second code have a general sparse matrix "A".

When I define MATDESCRA (1) = "G" then this message will apear : INTEL MKL ERROR : PARAMETER 5 WAS INCORRECT ON ENTRY TO MKL_DDIASM

my 1st answer : What's the problem with "G" for MATDESCRA (1) ????!!!

my 2nd answer : what should I do to solve the system of equations in my 2nd simple code to obtain correct answers for that with DDIASM? (How must I define MATDESCRA exactly?)

PROGRAM 2nd_simple_code
    IMPLICIT NONE
    CHARACTER(1) , DIMENSION ( 4 ) :: MATDESCRA
    REAL (8) , DIMENSION ( 3 , 3 ) :: A
    REAL (8) , DIMENSION ( 3 , 2 ) :: B , C
    REAL (8) , DIMENSION ( 3 , 3 ) :: VAL
    INTEGER I , J


    CHARACTER(1) TRANSA

    INTEGER :: M , N , LVAL , NDIAG , LDB , LDC
    REAL (8) :: ALPHA

    INTEGER , DIMENSION ( 3 ) :: IDIAG


    A = 0
    B = 0
    C = 0
    VAL = 0
    IDIAG = 0

    A ( 1 , 1 ) = 1.0D0
    A ( 2 , 2 ) = 1.0D0
    A ( 3 , 3 ) = 1.0D0

    A ( 2 , 1 ) = 2.0D0
    A ( 1 , 3 ) = 5.0D0

    B ( 1 , 1 ) = 16.0D0
    B ( 1 , 2 ) = 34.0D0

    B ( 2 , 1 ) = 4.0D0
    B ( 2 , 2 ) = 13.0D0

    B ( 3 , 1 ) = 3.0D0
    B ( 3 , 2 ) = 6.0D0


    VAL ( 2 , 1 ) = 2.0D0
    VAL ( 1 , 2 ) = 1.0D0
    VAL ( 2 , 2 ) = 1.0D0
    VAL ( 3 , 2 ) = 1.0D0
    VAL ( 1 , 3 ) = 5.0D0


M = 3
N = 2
LVAL = 3
NDIAG = 3
LDB = 3
LDC = 3
ALPHA = 1.0D0
TRANSA = "N"
MATDESCRA (1) = 'G'
MATDESCRA (2) = 'U'
MATDESCRA (3) = 'U'
MATDESCRA (4) = 'F'

IDIAG (1) = -1
IDIAG (2) = 0
IDIAG (3) = 2

    CALL MKL_DDIASM ( TRANSA , M , N , ALPHA , MATDESCRA , VAL , LVAL , IDIAG , NDIAG , B , LDB , C , LDC )


    DO I = 1 , 3
    WRITE ( * , * ) (C (I ,J), J = 1 , 2)
    END DO
READ(*,*)

END PROGRAM 2nd_simple_code

*****************************************************************************************************

PROGRAM 1st_simple_code
    IMPLICIT NONE
    CHARACTER(1) , DIMENSION ( 4 ) :: MATDESCRA
    REAL (8) , DIMENSION ( 3 , 3 ) :: A
    REAL (8) , DIMENSION ( 3 , 2 ) :: B , C
    REAL (8) , DIMENSION ( 3 , 1 ) :: VAL
    INTEGER I , J


    CHARACTER(1) TRANSA

    INTEGER :: M , N , LVAL , NDIAG , LDB , LDC
    REAL (8) :: ALPHA

    INTEGER , DIMENSION ( 1 ) :: IDIAG


    A = 0
    B = 0
    C = 0

    A ( 1 , 1 ) = 1.0D0
    A ( 2 , 2 ) = 1.0D0
    A ( 3 , 3 ) = 1.0D0

    B ( 1 , 1 ) = 1.0D0
    B ( 1 , 2 ) = 4.0D0

    B ( 2 , 1 ) = 2.0D0
    B ( 2 , 2 ) = 5.0D0

    B ( 3 , 1 ) = 3.0D0
    B ( 3 , 2 ) = 6.0D0


    VAL ( 1 , 1 ) = 1.0D0
    VAL ( 2 , 1 ) = 1.0D0
    VAL ( 3 , 1 ) = 1.0D0



M = 3
N = 2
LVAL = 3
NDIAG = 1
LDB = 3
LDC = 3
ALPHA = 1.0D0
TRANSA = "N"
MATDESCRA (1) = 'D'
MATDESCRA (2) = 'U'
MATDESCRA (3) = 'U'
MATDESCRA (4) = 'F'

IDIAG (1) = 0

CALL MKL_DDIASM ( TRANSA , M , N , ALPHA , MATDESCRA , VAL , LVAL , IDIAG , NDIAG , B , LDB , C , LDC )


    DO I = 1 , 3
    WRITE ( * , * ) (C (I ,J), J = 1 , 2)
    END DO
READ(*,*)

END PROGRAM 1st_simple_code

**************************************************************

Thanks a lot dear friends.

Attachment	Size
Download 2nd SIMPLE CODE - DDIASM (GENERAL MATRIX A).txt	1.34 KB
Download 1st SIMPLE CODE - DDIASM DIAGONAL MATRIX A).txt	1.17 KB

Zone:

Thread Topic:

How-To

↧

gemv for sparse, rectangular matrices?

May 8, 2017, 9:13 pm

Latest and popular articles on Intel Technologies

≫ Next: Mkl produce "segmentation fault" in ubuntu

≪ Previous: INTEL MKL ERROR : PARAMETER 5 WAS INCORRECT ON ENTRY TO MKL_DDIASM

Is there a recommended routine in the MKL that can do a GEMV with the transpose of a rectangular sparse matrix? I know I can pad it to square but I'd prefer not to have to do that. These sparse matrices could be large, 10 million+ nonzeroes and a million rows or columns.

Damien

↧

Mkl produce "segmentation fault" in ubuntu

May 8, 2017, 9:57 pm

Latest and popular articles on Intel Technologies

≫ Next: Intel MKL FFT for forward and backward Fourier transform of 2D data

≪ Previous: gemv for sparse, rectangular matrices?

Hi,

code:

#include "iostream"
#include "stdio.h"
#include <vector>

#include "mkl_service.h"
#include "mkl_pardiso.h"
#include "mkl_types.h"
#include "mkl_dss.h"
#include "mkl_types.h"
#include "mkl_spblas.h"

using namespace std;
using std::vector;

//-----------------------------------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------------------------------
void solveSparse_MKL()
{
_DOUBLE_PRECISION_t rhs[9] = { 0,0.333333,0.666667,0.111111,0.444444,0.777778,0.222222,0.555556,0.888889 };

    MKL_INT nnz = 23;
    MKL_INT nRows = 9;
    MKL_INT nCols = 9;
    MKL_INT nRhs = 1;
    MKL_INT rhs_len = 9;

    double acoo[] = { -0.0537308, -0.0512116, 1.10494, -4.17055, -1.73111, 6.95287, -7.78207, 0, 10.5132, -1.73111, -0.865586, 3.65043, -5.3765, -2.14414, 13.5568, -8.98329, 0, 19.9095, -1.30956, 4.04067, -2.5529, 10.239, 12.5362 };
    MKL_INT rowind[] = { 1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,8,8,9 };
    MKL_INT colind[] = { 4, 2, 1, 5, 3, 2, 6, 4, 3, 7, 5, 4, 8, 6, 5, 9, 7, 6, 8, 7, 9, 8, 9 };

    MKL_INT info;
    MKL_INT job[8] = { 2, // COO to CSR
        1, // 1 based indexing in CSR rows
        1, // 1 based indexing in CSR cols
        0, //
        nnz, // number of the non-zero elements
        0, // job indicator
        0,
        0
    };

    MKL_INT* i_csr = new MKL_INT[nCols + 1]; // m+1
    MKL_INT* j_csr = new MKL_INT[nnz];
    double* a_csr = new double[nnz];

mkl_dcsrcoo(job, &nCols, a_csr, j_csr, i_csr, &nnz, acoo, rowind, colind, &info);

_DOUBLE_PRECISION_t* solValues = new _DOUBLE_PRECISION_t[rhs_len];

    // Allocate storage for the solver handle and the right-hand side.
    _MKL_DSS_HANDLE_t handle = 0;
    _INTEGER_t error;
    MKL_INT opt = MKL_DSS_DEFAULTS;
    MKL_INT sym = MKL_DSS_SYMMETRIC;
    MKL_INT type = MKL_DSS_POSITIVE_DEFINITE;
    // ---------------------
    // Initialize the solver
    // ---------------------
    error = dss_create(handle, opt);
    if (error != MKL_DSS_SUCCESS)
        printf("Solver returned error code %d\n", error);
    // -------------------------------------------
    // Define the non-zero structure of the matrix
    // -------------------------------------------
    error = dss_define_structure(handle, sym, i_csr, nRows, nCols, j_csr, nnz);
    if (error != MKL_DSS_SUCCESS)
        printf("Solver returned error code %d\n", error);
    // ------------------
    // Reorder the matrix
    // ------------------
    error = dss_reorder(handle, opt, 0);
    if (error != MKL_DSS_SUCCESS)
        printf("Solver returned error code %d\n", error);
    // ------------------
    // Factor the matrix
    // ------------------
    error = dss_factor_real(handle, type, a_csr);
    if (error != MKL_DSS_SUCCESS)
        printf("Solver returned error code %d\n", error);
    // ------------------------
    // Get the solution vector
    // ------------------------
    error = dss_solve_real(handle, opt, rhs, nRhs, solValues);
    if (error != MKL_DSS_SUCCESS)
        printf("Solver returned error code %d\n", error);

    cout << "------------------------------"<< endl;
    cout << "solution "<< endl;
    cout << "------------------------------"<< endl;
    for (int j = 0; j < rhs_len; ++j)
    {
        cout << solValues[j] << endl;
    }
    // --------------------------
    // Deallocate solver storage
    // --------------------------
    error = dss_delete(handle, opt);
    if (error != MKL_DSS_SUCCESS)
        printf("Solver returned error code %d\n", error);

    delete[] a_csr;
    delete[] i_csr;
    delete[] j_csr;
    delete[] solValues;
}

//-----------------------------------------------------------------------------------------------------
// https://software.intel.com/en-us/articles/intel-mkl-link-line-advisor
//-----------------------------------------------------------------------------------------------------
int main(void)
{

MKLVersion Version;

mkl_get_version(&Version);

    solveSparse_MKL();
    getchar();
    return 0;
}
// c:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\examples\examples_core_c.zip\

Results while run:

Major version: 2017
Minor version: 0
Update version: 2
Product status: Product
Build: 20170126
Platform: Intel(R) 64 architecture
Processor optimization: Intel(R) Streaming SIMD Extensions 2 (Intel(R) SSE2) enabled processors
================================================================

Segmentation fault

Os Details:

I have also posted this question before in

https://software.intel.com/en-us/forums/intel-math-kernel-library/topic/...

Regards

CIBIN

↧

Intel MKL FFT for forward and backward Fourier transform of 2D data

May 8, 2017, 11:03 pm

Latest and popular articles on Intel Technologies

≫ Next: Pardiso of-of-core memory estimates

≪ Previous: Mkl produce "segmentation fault" in ubuntu

Hello,

I am trying to do forward and backward Fourier transform using the FFT routines available in Intel MKL. I have taken help of manual and also website examples to do this but i am getting a segmentation fault. Please help me to resolve it.

I am taking real valued as input to FFT forward and want output in Complex form.

Following is my code which i have implemented so far

#include <iostream>

#include "mkl_dfti.h"

typedef struct {
float re;
float im;
} mkl_float_complex;

using namespace std;

int main(int argc, char **argv)
{

//Read binary file in array xr_in dimension 2001 X 1911

n2=2001 n1=1911

mkl_float_complex **xc_out;
DFTI_DESCRIPTOR_HANDLE desc_handle_for, desc_handle_back;
MKL_LONG status;
MKL_LONG lengths[2];

lengths[0] = n2; lengths[1] = n1;
xc_out = (mkl_float_complex**) malloc(sizeof(mkl_float_complex*)*n2);
for(i2 = 0; i2 < n2; i2++)
xc_out[i2] = (mkl_float_complex*) malloc(sizeof(mkl_float_complex)*n1);

status = DftiCreateDescriptor(&desc_handle_for, DFTI_SINGLE, DFTI_REAL, 2, lengths);

status = DftiSetValue(desc_handle_for, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);

status = DftiSetValue(desc_handle_for, DFTI_PACKED_FORMAT, DFTI_CCE_FORMAT);

status = DftiSetValue(desc_handle_for, DFTI_PLACEMENT, DFTI_NOT_INPLACE);

status = DftiCommitDescriptor(desc_handle_for);

status = DftiComputeForward(desc_handle_for, xr_in, xc_out);

status = DftiFreeDescriptor(&desc_handle_for);

//Writing real part only to file

//Open file

for(i2 = 0; i2 < n2; i2++)
for(i1 = 0; i1 < n1; i1++)
fwrite(&(xc_out[i2][i1].re), sizeof(float), 1, fp);

//Close file

}//End of main

Thread Topic:

How-To

↧

Pardiso of-of-core memory estimates

May 9, 2017, 6:50 am

Latest and popular articles on Intel Technologies

≫ Next: ./install.sh: line 639: /tmp/nix-build-intel-mkl-2017.2-174.drv-0/l_mkl_2017.2.174/./pset/32e/install: No such file or directory

≪ Previous: Intel MKL FFT for forward and backward Fourier transform of 2D data

Hi,

When I run Pardiso (phase=11) in In-core mode (iparm(60)=0), I can get the In-core memory requirement from max(iparm(15), iparm(16) + iparm(17)).

Can I, after running in in-core mode, use the value max(iparm(15), iparm(16)+iparm(63)) as the estimate for the Out-of-core memory requirement, or does this require that I first run Pardiso (phase=11) in out-of-core mode?

Jens

Thread Topic:

Help Me

↧

./install.sh: line 639: /tmp/nix-build-intel-mkl-2017.2-174.drv-0/l_mkl_2017.2.174/./pset/32e/install: No such file or directory

May 9, 2017, 8:42 am

Latest and popular articles on Intel Technologies

≫ Next: MKL Pardiso full iterative solver not combined, and question about RCI ISS

≪ Previous: Pardiso of-of-core memory estimates

Installation of MKL 2017.2-174, invoked with

```

$ ./install.sh --silent

```

fails with

```

./install.sh: line 639: /tmp/nix-build-intel-mkl-2017.2-174.drv-0/l_mkl_2017.2.174/./pset/32e/install: No such file or directory

```

I'm writing a Nix expression for this package. Which `install` file does it refer to, a file that is supposed to be at that location, or the `install.sh` that I invoked?

Thread Topic:

Bug Report

↧

MKL Pardiso full iterative solver not combined, and question about RCI ISS

May 10, 2017, 9:13 am

Latest and popular articles on Intel Technologies

≫ Next: MKL DFT module not using AVX2 backend when the processor does support it

≪ Previous: ./install.sh: line 639: /tmp/nix-build-intel-mkl-2017.2-174.drv-0/l_mkl_2017.2.174/./pset/32e/install: No such file or directory

Hi,

We want to use Pardiso as full iterative solver, not combined direct - iterative, is there some way to do this.

If not, please what's the best solver type to implement Parallel iterative solving, is the Iterative Sparse Solvers based on Reverse Communication Interface (RCI ISS) use a parallel multicore aproach in the solving. Can you give me a complete example of how to use RCI ISS in symmetric matrices.

Thank you very much.

Zone:

Windows*

Thread Topic:

Help Me

↧

MKL DFT module not using AVX2 backend when the processor does support it

May 10, 2017, 11:43 am

Latest and popular articles on Intel Technologies

≫ Next: Problem in MKL example for CG with SSOR

≪ Previous: MKL Pardiso full iterative solver not combined, and question about RCI ISS

I have an application that uses MKL to perform DFTs. I'm using the latest release of MKL on CentOS 7. For a version reference, the MKL shared libraries are installed to /opt/intel/compilers_and_libraries_2017.2.174/linux/mkl/lib/intel64/.

I noticed while profiling my application that MKL is dispatching to the libmkl_avx.so backend library instead of libmkl_avx2.so as I would expect. This results in slower performance than I would expect. The host processor (a Haswell Xeon) does support AVX2 and FMA, however. Here is a snippet from /proc/cpuinfo:

processor    : 0
vendor_id    : GenuineIntel
cpu family    : 6
model        : 63
model name    : Intel(R) Xeon(R) CPU E5-2630 v3 @ 2.40GHz
stepping    : 2
microcode    : 0x36
cpu MHz        : 2599.968
cache size    : 20480 KB
physical id    : 0
siblings    : 16
core id        : 0
cpu cores    : 8
apicid        : 0
initial apicid    : 0
fpu        : yes
fpu_exception    : yes
cpuid level    : 15
wp        : yes
flags        : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat
pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp
lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf
eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 fma cx16 xtpr
pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx
f16c rdrand lahf_lm abm ida arat epb pln pts dtherm tpr_shadow vnmi flexpriority
ept vpid fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm xsaveopt cqm_
llc cqm_occup_llc
bogomips    : 4800.23
clflush size    : 64
cache_alignment    : 64
address sizes    : 46 bits physical, 48 bits virtual
power management:

The runtime CPU feature detection doesn't seem to be working properly for this processor. I tried to fool it by renaming libmkl_avx.so and symlinking it to libmkl_avx2.so instead (so even if MKL detected that it should load the AVX library, it would instead get the AVX2 library via the symlink). After doing that, I received the following error message:

Intel MKL WARNING: Library libmkl_avx.so (MKL type 5) is not suitable for this processor
(MKL type 4).

This again suggests that MKL believes that the host processor can't use the AVX2 library for some reason. Is this a known issue, and if so, is there a workaround?

↧

Problem in MKL example for CG with SSOR

May 10, 2017, 2:04 pm

Latest and popular articles on Intel Technologies

≫ Next: VDEXP undefined on windows for mkl 2017 update 2

≪ Previous: MKL DFT module not using AVX2 backend when the processor does support it

I think there is something wrong with the sample "cg_ssor_precon_c.c" in the for loop that applies the SSOR iteration. The purpose of the loop is to apply SSOR repeatedly on the array &tmp[3*n]. but inside the loop every time it starts using tmp[2*n] and overwrites the previous value in tmp[3*n]. So effectively the loop is equivalent to a single SSOR run regardless of the number of iterations desired.

Can somebody help me confirm this?

↧

VDEXP undefined on windows for mkl 2017 update 2

May 10, 2017, 4:33 pm

Latest and popular articles on Intel Technologies

≫ Next: Pardiso out of memory in phase 11

≪ Previous: Problem in MKL example for CG with SSOR

I have following compiling error using mkl 2017 update 2 on windows (It only happens on windows; it's ok on linux)

.error: identifier "VDEXP" is undefined
VDEXP(&size, &x[0], &expx[0]);

↧

Pardiso out of memory in phase 11

May 11, 2017, 7:16 am

Latest and popular articles on Intel Technologies

≫ Next: FGMRES preconditioner applied to?

≪ Previous: VDEXP undefined on windows for mkl 2017 update 2

Hi,

I have a problem with Pardiso running out of memory in the reordering/numerical factorization phase (11).

It is not clear to me how the out of core option (iparm(60)) and environment variables MKL_PARDISO_OOC_MAX_CORE_SIZE and MKL_PARDISO_OOC_MAX_SWAP_SIZE affects phase 11; from my own testing it seems as if these parameters have no effect in phase 11. Is this correct?

For example, for an SPD matrix with 17 million degrees of freedom and 489 million non-zeros. The matrix is a stiffness matrix originating from a finite-element discretization. Although this is a large problem I would suspect it to work with Pardiso in out-of-core mode. I would like this to work not just for this matrix, but for "any" SPD matrix in principle.

What is the best general strategy to get past phase 11 (so that out-of-core mode will take effect)? Is there any point playing with the parameters mentioned above or do they not have an effect in phase 11? Which reordering algorithm should I use?

Best,
Jens

Thread Topic:

Help Me

↧