// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE

package org.bytedeco.cuda.global;

import org.bytedeco.cuda.cublas.*;

import java.nio.*;
import org.bytedeco.javacpp.*;
import org.bytedeco.javacpp.annotation.*;

import static org.bytedeco.javacpp.presets.javacpp.*;
import org.bytedeco.cuda.cudart.*;
import static org.bytedeco.cuda.global.cudart.*;

public class cublas extends org.bytedeco.cuda.presets.cublas {
    static { Loader.load(); }

// Parsed from <cublas_api.h>

/*
 * Copyright 1993-2019 NVIDIA Corporation. All rights reserved.
 *
 * NOTICE TO LICENSEE:
 *
 * This source code and/or documentation ("Licensed Deliverables") are
 * subject to NVIDIA intellectual property rights under U.S. and
 * international Copyright laws.
 *
 * These Licensed Deliverables contained herein is PROPRIETARY and
 * CONFIDENTIAL to NVIDIA and is being provided under the terms and
 * conditions of a form of NVIDIA software license agreement by and
 * between NVIDIA and Licensee ("License Agreement") or electronically
 * accepted by Licensee.  Notwithstanding any terms or conditions to
 * the contrary in the License Agreement, reproduction or disclosure
 * of the Licensed Deliverables to any third party without the express
 * written consent of NVIDIA is prohibited.
 *
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THESE LICENSED DELIVERABLES.
 *
 * U.S. Government End Users.  These Licensed Deliverables are a
 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 * 1995), consisting of "commercial computer software" and "commercial
 * computer software documentation" as such terms are used in 48
 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 * U.S. Government End Users acquire the Licensed Deliverables with
 * only those rights set forth herein.
 *
 * Any use of the Licensed Deliverables in individual and commercial
 * software must include, in the user documentation and internal
 * comments to the code, the above Disclaimer and U.S. Government End
 * Users Notice.
 */
 
/*
 * This is the public header file for the CUBLAS library, defining the API
 *
 * CUBLAS is an implementation of BLAS (Basic Linear Algebra Subroutines) 
 * on top of the CUDA runtime. 
 */

// #if !defined(CUBLAS_API_H_)
// #define CUBLAS_API_H_

// #ifndef CUBLASWINAPI
// #ifdef _WIN32
// #define CUBLASWINAPI __stdcall
// #else
// #define CUBLASWINAPI 
// #endif
// #endif

// #ifndef CUBLASAPI
// #error "This file should not be included without defining CUBLASAPI"
// #endif

// #include "driver_types.h"
// #include "cuComplex.h"   /* import complex data type */

// #include <cuda_fp16.h>
// #include <cuda_bf16.h>

// #include "library_types.h"


// #if defined(__cplusplus)
// #endif /* __cplusplus */

public static final int CUBLAS_VER_MAJOR = 11;
public static final int CUBLAS_VER_MINOR = 5;
public static final int CUBLAS_VER_PATCH = 2;
public static final int CUBLAS_VER_BUILD = 43;
public static final int CUBLAS_VERSION =  (CUBLAS_VER_MAJOR * 1000 + 
                         CUBLAS_VER_MINOR *  100 + 
                         CUBLAS_VER_PATCH);

/* CUBLAS status type returns */
/** enum cublasStatus_t */
public static final int
    CUBLAS_STATUS_SUCCESS         = 0,
    CUBLAS_STATUS_NOT_INITIALIZED = 1,
    CUBLAS_STATUS_ALLOC_FAILED    = 3,
    CUBLAS_STATUS_INVALID_VALUE   = 7,
    CUBLAS_STATUS_ARCH_MISMATCH   = 8,
    CUBLAS_STATUS_MAPPING_ERROR   = 11,
    CUBLAS_STATUS_EXECUTION_FAILED = 13,
    CUBLAS_STATUS_INTERNAL_ERROR  = 14,
    CUBLAS_STATUS_NOT_SUPPORTED   = 15,
    CUBLAS_STATUS_LICENSE_ERROR   = 16;


/** enum cublasFillMode_t */
public static final int
    CUBLAS_FILL_MODE_LOWER = 0, 
    CUBLAS_FILL_MODE_UPPER = 1,
    CUBLAS_FILL_MODE_FULL = 2;

/** enum cublasDiagType_t */
public static final int
    CUBLAS_DIAG_NON_UNIT = 0, 
    CUBLAS_DIAG_UNIT = 1; 

/** enum cublasSideMode_t */
public static final int
    CUBLAS_SIDE_LEFT = 0, 
    CUBLAS_SIDE_RIGHT = 1; 


/** enum cublasOperation_t */
public static final int
    CUBLAS_OP_N = 0,  
    CUBLAS_OP_T = 1,  
    CUBLAS_OP_C = 2,
    CUBLAS_OP_HERMITAN = 2, /* synonym if CUBLAS_OP_C */
    CUBLAS_OP_CONJG = 3;     /* conjugate, placeholder - not supported in the current release */


/** enum cublasPointerMode_t */
public static final int 
    CUBLAS_POINTER_MODE_HOST   = 0,  
    CUBLAS_POINTER_MODE_DEVICE = 1;

/** enum cublasAtomicsMode_t */
public static final int 
    CUBLAS_ATOMICS_NOT_ALLOWED   = 0,  
    CUBLAS_ATOMICS_ALLOWED       = 1;

/*For different GEMM algorithm */
/** enum cublasGemmAlgo_t */
public static final int
    CUBLAS_GEMM_DFALT               = -1,
    CUBLAS_GEMM_DEFAULT             = -1,
    CUBLAS_GEMM_ALGO0               = 0,
    CUBLAS_GEMM_ALGO1               = 1,
    CUBLAS_GEMM_ALGO2               = 2,
    CUBLAS_GEMM_ALGO3               = 3,
    CUBLAS_GEMM_ALGO4               = 4,
    CUBLAS_GEMM_ALGO5               = 5,
    CUBLAS_GEMM_ALGO6               = 6,
    CUBLAS_GEMM_ALGO7               = 7,
    CUBLAS_GEMM_ALGO8               = 8,
    CUBLAS_GEMM_ALGO9               = 9,
    CUBLAS_GEMM_ALGO10              = 10,   
    CUBLAS_GEMM_ALGO11              = 11,
    CUBLAS_GEMM_ALGO12              = 12,        
    CUBLAS_GEMM_ALGO13              = 13,        
    CUBLAS_GEMM_ALGO14              = 14,        
    CUBLAS_GEMM_ALGO15              = 15,        
    CUBLAS_GEMM_ALGO16              = 16,        
    CUBLAS_GEMM_ALGO17              = 17,       
    CUBLAS_GEMM_ALGO18              = 18, //sliced 32x32    
    CUBLAS_GEMM_ALGO19              = 19, //sliced 64x32     
    CUBLAS_GEMM_ALGO20              = 20, //sliced 128x32     
    CUBLAS_GEMM_ALGO21              = 21, //sliced 32x32  -splitK      
    CUBLAS_GEMM_ALGO22              = 22, //sliced 64x32  -splitK      
    CUBLAS_GEMM_ALGO23              = 23, //sliced 128x32 -splitK      
    CUBLAS_GEMM_DEFAULT_TENSOR_OP   = 99,        
    CUBLAS_GEMM_DFALT_TENSOR_OP     = 99,        
    CUBLAS_GEMM_ALGO0_TENSOR_OP     = 100,        
    CUBLAS_GEMM_ALGO1_TENSOR_OP     = 101,        
    CUBLAS_GEMM_ALGO2_TENSOR_OP     = 102,        
    CUBLAS_GEMM_ALGO3_TENSOR_OP     = 103,        
    CUBLAS_GEMM_ALGO4_TENSOR_OP     = 104,        
    CUBLAS_GEMM_ALGO5_TENSOR_OP     = 105,        
    CUBLAS_GEMM_ALGO6_TENSOR_OP     = 106,        
    CUBLAS_GEMM_ALGO7_TENSOR_OP     = 107,        
    CUBLAS_GEMM_ALGO8_TENSOR_OP     = 108,        
    CUBLAS_GEMM_ALGO9_TENSOR_OP     = 109,        
    CUBLAS_GEMM_ALGO10_TENSOR_OP     = 110,        
    CUBLAS_GEMM_ALGO11_TENSOR_OP     = 111,        
    CUBLAS_GEMM_ALGO12_TENSOR_OP     = 112,        
    CUBLAS_GEMM_ALGO13_TENSOR_OP     = 113,        
    CUBLAS_GEMM_ALGO14_TENSOR_OP     = 114,        
    CUBLAS_GEMM_ALGO15_TENSOR_OP     = 115;

/*Enum for default math mode/tensor operation*/
/** enum cublasMath_t */
public static final int
  CUBLAS_DEFAULT_MATH = 0,

  /* deprecated, same effect as using CUBLAS_COMPUTE_32F_FAST_16F, will be removed in a future release */
  CUBLAS_TENSOR_OP_MATH = 1,

  /* same as using matching _PEDANTIC compute type when using cublas<T>routine calls or cublasEx() calls with
     cudaDataType as compute type */
  CUBLAS_PEDANTIC_MATH = 2,

  /* allow accelerating single precision routines using TF32 tensor cores */ 
  CUBLAS_TF32_TENSOR_OP_MATH = 3,

  /* flag to force any reductons to use the accumulator type and not output type in case of mixed precision routines
     with lower size output type */
  CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION = 16;

/* For backward compatibility purposes */

/* Enum for compute type
 *
 * - default types provide best available performance using all available hardware features
 *   and guarantee internal storage precision with at least the same precision and range;
 * - _PEDANTIC types ensure standard arithmetic and exact specified internal storage format;
 * - _FAST types allow for some loss of precision to enable higher throughput arithmetic.
 */
/** enum cublasComputeType_t */
public static final int
  CUBLAS_COMPUTE_16F               = 64, /* half - default */
  CUBLAS_COMPUTE_16F_PEDANTIC      = 65, /* half - pedantic */
  CUBLAS_COMPUTE_32F               = 68, /* float - default */
  CUBLAS_COMPUTE_32F_PEDANTIC      = 69, /* float - pedantic */
  CUBLAS_COMPUTE_32F_FAST_16F      = 74, /* float - fast, allows down-converting inputs to half or TF32 */
  CUBLAS_COMPUTE_32F_FAST_16BF     = 75, /* float - fast, allows down-converting inputs to bfloat16 or TF32 */
  CUBLAS_COMPUTE_32F_FAST_TF32     = 77, /* float - fast, allows down-converting inputs to TF32 */
  CUBLAS_COMPUTE_64F               = 70, /* double - default */
  CUBLAS_COMPUTE_64F_PEDANTIC      = 71, /* double - pedantic */
  CUBLAS_COMPUTE_32I               = 72, /* signed 32-bit int - default */
  CUBLAS_COMPUTE_32I_PEDANTIC      = 73; /* signed 32-bit int - pedantic */
// Targeting ..\cublas\cublasContext.java



public static native @Cast("cublasStatus_t") int cublasCreate_v2(@ByPtrPtr cublasContext handle);
public static native @Cast("cublasStatus_t") int cublasDestroy_v2(cublasContext handle);

public static native @Cast("cublasStatus_t") int cublasGetVersion_v2(cublasContext handle, IntPointer version);
public static native @Cast("cublasStatus_t") int cublasGetVersion_v2(cublasContext handle, IntBuffer version);
public static native @Cast("cublasStatus_t") int cublasGetVersion_v2(cublasContext handle, int[] version);
public static native @Cast("cublasStatus_t") int cublasGetProperty(@Cast("libraryPropertyType") int type, IntPointer value);
public static native @Cast("cublasStatus_t") int cublasGetProperty(@Cast("libraryPropertyType") int type, IntBuffer value);
public static native @Cast("cublasStatus_t") int cublasGetProperty(@Cast("libraryPropertyType") int type, int[] value);
public static native @Cast("size_t") long cublasGetCudartVersion();

public static native @Cast("cublasStatus_t") int cublasSetWorkspace_v2(cublasContext handle, Pointer workspace, @Cast("size_t") long workspaceSizeInBytes);

public static native @Cast("cublasStatus_t") int cublasSetStream_v2(cublasContext handle, CUstream_st streamId); 
public static native @Cast("cublasStatus_t") int cublasGetStream_v2(cublasContext handle, @ByPtrPtr CUstream_st streamId); 

public static native @Cast("cublasStatus_t") int cublasGetPointerMode_v2(cublasContext handle, @Cast("cublasPointerMode_t*") IntPointer mode);
public static native @Cast("cublasStatus_t") int cublasGetPointerMode_v2(cublasContext handle, @Cast("cublasPointerMode_t*") IntBuffer mode);
public static native @Cast("cublasStatus_t") int cublasGetPointerMode_v2(cublasContext handle, @Cast("cublasPointerMode_t*") int[] mode);
public static native @Cast("cublasStatus_t") int cublasSetPointerMode_v2(cublasContext handle, @Cast("cublasPointerMode_t") int mode);         

public static native @Cast("cublasStatus_t") int cublasGetAtomicsMode(cublasContext handle, @Cast("cublasAtomicsMode_t*") IntPointer mode);
public static native @Cast("cublasStatus_t") int cublasGetAtomicsMode(cublasContext handle, @Cast("cublasAtomicsMode_t*") IntBuffer mode);
public static native @Cast("cublasStatus_t") int cublasGetAtomicsMode(cublasContext handle, @Cast("cublasAtomicsMode_t*") int[] mode);
public static native @Cast("cublasStatus_t") int cublasSetAtomicsMode(cublasContext handle, @Cast("cublasAtomicsMode_t") int mode);         

public static native @Cast("cublasStatus_t") int cublasGetMathMode(cublasContext handle, @Cast("cublasMath_t*") IntPointer mode);
public static native @Cast("cublasStatus_t") int cublasGetMathMode(cublasContext handle, @Cast("cublasMath_t*") IntBuffer mode);
public static native @Cast("cublasStatus_t") int cublasGetMathMode(cublasContext handle, @Cast("cublasMath_t*") int[] mode);
public static native @Cast("cublasStatus_t") int cublasSetMathMode(cublasContext handle, @Cast("cublasMath_t") int mode);         

public static native @Cast("cublasStatus_t") int cublasGetSmCountTarget(cublasContext handle, IntPointer smCountTarget);
public static native @Cast("cublasStatus_t") int cublasGetSmCountTarget(cublasContext handle, IntBuffer smCountTarget);
public static native @Cast("cublasStatus_t") int cublasGetSmCountTarget(cublasContext handle, int[] smCountTarget);
public static native @Cast("cublasStatus_t") int cublasSetSmCountTarget(cublasContext handle, int smCountTarget);
// Targeting ..\cublas\cublasLogCallback.java



public static native @Cast("cublasStatus_t") int cublasLoggerConfigure(int logIsOn, int logToStdOut, int logToStdErr, @Cast("const char*") BytePointer logFileName);
public static native @Cast("cublasStatus_t") int cublasLoggerConfigure(int logIsOn, int logToStdOut, int logToStdErr, String logFileName);
public static native @Cast("cublasStatus_t") int cublasSetLoggerCallback(cublasLogCallback userCallback);
public static native @Cast("cublasStatus_t") int cublasGetLoggerCallback(@ByPtrPtr cublasLogCallback userCallback);

/* 
 * cublasStatus_t 
 * cublasSetVector (int n, int elemSize, const void *x, int incx, 
 *                  void *y, int incy) 
 *
 * copies n elements from a vector x in CPU memory space to a vector y 
 * in GPU memory space. Elements in both vectors are assumed to have a 
 * size of elemSize bytes. Storage spacing between consecutive elements
 * is incx for the source vector x and incy for the destination vector
 * y. In general, y points to an object, or part of an object, allocated
 * via cublasAlloc(). Column major format for two-dimensional matrices
 * is assumed throughout CUBLAS. Therefore, if the increment for a vector 
 * is equal to 1, this access a column vector while using an increment 
 * equal to the leading dimension of the respective matrix accesses a 
 * row vector.
 *
 * Return Values
 * -------------
 * CUBLAS_STATUS_NOT_INITIALIZED  if CUBLAS library not been initialized
 * CUBLAS_STATUS_INVALID_VALUE    if incx, incy, or elemSize <= 0
 * CUBLAS_STATUS_MAPPING_ERROR    if an error occurred accessing GPU memory   
 * CUBLAS_STATUS_SUCCESS          if the operation completed successfully
 */
public static native @Cast("cublasStatus_t") int cublasSetVector(int n, int elemSize, @Const Pointer x, 
                                             int incx, Pointer devicePtr, int incy);

/* 
 * cublasStatus_t 
 * cublasGetVector (int n, int elemSize, const void *x, int incx, 
 *                  void *y, int incy)
 * 
 * copies n elements from a vector x in GPU memory space to a vector y 
 * in CPU memory space. Elements in both vectors are assumed to have a 
 * size of elemSize bytes. Storage spacing between consecutive elements
 * is incx for the source vector x and incy for the destination vector
 * y. In general, x points to an object, or part of an object, allocated
 * via cublasAlloc(). Column major format for two-dimensional matrices
 * is assumed throughout CUBLAS. Therefore, if the increment for a vector 
 * is equal to 1, this access a column vector while using an increment 
 * equal to the leading dimension of the respective matrix accesses a 
 * row vector.
 *
 * Return Values
 * -------------
 * CUBLAS_STATUS_NOT_INITIALIZED  if CUBLAS library not been initialized
 * CUBLAS_STATUS_INVALID_VALUE    if incx, incy, or elemSize <= 0
 * CUBLAS_STATUS_MAPPING_ERROR    if an error occurred accessing GPU memory   
 * CUBLAS_STATUS_SUCCESS          if the operation completed successfully
 */
public static native @Cast("cublasStatus_t") int cublasGetVector(int n, int elemSize, @Const Pointer x, 
                                             int incx, Pointer y, int incy);

/*
 * cublasStatus_t 
 * cublasSetMatrix (int rows, int cols, int elemSize, const void *A, 
 *                  int lda, void *B, int ldb)
 *
 * copies a tile of rows x cols elements from a matrix A in CPU memory
 * space to a matrix B in GPU memory space. Each element requires storage
 * of elemSize bytes. Both matrices are assumed to be stored in column 
 * major format, with the leading dimension (i.e. number of rows) of 
 * source matrix A provided in lda, and the leading dimension of matrix B
 * provided in ldb. In general, B points to an object, or part of an 
 * object, that was allocated via cublasAlloc().
 *
 * Return Values 
 * -------------
 * CUBLAS_STATUS_NOT_INITIALIZED  if CUBLAS library has not been initialized
 * CUBLAS_STATUS_INVALID_VALUE    if rows or cols < 0, or elemSize, lda, or 
 *                                ldb <= 0
 * CUBLAS_STATUS_MAPPING_ERROR    if error occurred accessing GPU memory
 * CUBLAS_STATUS_SUCCESS          if the operation completed successfully
 */
public static native @Cast("cublasStatus_t") int cublasSetMatrix(int rows, int cols, int elemSize, 
                                             @Const Pointer A, int lda, Pointer B, 
                                             int ldb);

/*
 * cublasStatus_t 
 * cublasGetMatrix (int rows, int cols, int elemSize, const void *A, 
 *                  int lda, void *B, int ldb)
 *
 * copies a tile of rows x cols elements from a matrix A in GPU memory
 * space to a matrix B in CPU memory space. Each element requires storage
 * of elemSize bytes. Both matrices are assumed to be stored in column 
 * major format, with the leading dimension (i.e. number of rows) of 
 * source matrix A provided in lda, and the leading dimension of matrix B
 * provided in ldb. In general, A points to an object, or part of an 
 * object, that was allocated via cublasAlloc().
 *
 * Return Values 
 * -------------
 * CUBLAS_STATUS_NOT_INITIALIZED  if CUBLAS library has not been initialized
 * CUBLAS_STATUS_INVALID_VALUE    if rows, cols, eleSize, lda, or ldb <= 0
 * CUBLAS_STATUS_MAPPING_ERROR    if error occurred accessing GPU memory
 * CUBLAS_STATUS_SUCCESS          if the operation completed successfully
 */
public static native @Cast("cublasStatus_t") int cublasGetMatrix(int rows, int cols, int elemSize, 
                                             @Const Pointer A, int lda, Pointer B,
                                             int ldb);

/* 
 * cublasStatus 
 * cublasSetVectorAsync ( int n, int elemSize, const void *x, int incx, 
 *                       void *y, int incy, cudaStream_t stream );
 *
 * cublasSetVectorAsync has the same functionnality as cublasSetVector
 * but the transfer is done asynchronously within the CUDA stream passed
 * in parameter.
 *
 * Return Values
 * -------------
 * CUBLAS_STATUS_NOT_INITIALIZED  if CUBLAS library not been initialized
 * CUBLAS_STATUS_INVALID_VALUE    if incx, incy, or elemSize <= 0
 * CUBLAS_STATUS_MAPPING_ERROR    if an error occurred accessing GPU memory   
 * CUBLAS_STATUS_SUCCESS          if the operation completed successfully
 */
public static native @Cast("cublasStatus_t") int cublasSetVectorAsync(int n, int elemSize, 
                                                  @Const Pointer hostPtr, int incx, 
                                                  Pointer devicePtr, int incy,
                                                  CUstream_st stream);
/* 
 * cublasStatus 
 * cublasGetVectorAsync( int n, int elemSize, const void *x, int incx, 
 *                       void *y, int incy, cudaStream_t stream)
 * 
 * cublasGetVectorAsync has the same functionnality as cublasGetVector
 * but the transfer is done asynchronously within the CUDA stream passed
 * in parameter.
 *
 * Return Values
 * -------------
 * CUBLAS_STATUS_NOT_INITIALIZED  if CUBLAS library not been initialized
 * CUBLAS_STATUS_INVALID_VALUE    if incx, incy, or elemSize <= 0
 * CUBLAS_STATUS_MAPPING_ERROR    if an error occurred accessing GPU memory   
 * CUBLAS_STATUS_SUCCESS          if the operation completed successfully
 */
public static native @Cast("cublasStatus_t") int cublasGetVectorAsync(int n, int elemSize,
                                                  @Const Pointer devicePtr, int incx,
                                                  Pointer hostPtr, int incy,
                                                  CUstream_st stream);

/*
 * cublasStatus_t 
 * cublasSetMatrixAsync (int rows, int cols, int elemSize, const void *A, 
 *                       int lda, void *B, int ldb, cudaStream_t stream)
 *
 * cublasSetMatrixAsync has the same functionnality as cublasSetMatrix
 * but the transfer is done asynchronously within the CUDA stream passed
 * in parameter.
 *
 * Return Values 
 * -------------
 * CUBLAS_STATUS_NOT_INITIALIZED  if CUBLAS library has not been initialized
 * CUBLAS_STATUS_INVALID_VALUE    if rows or cols < 0, or elemSize, lda, or 
 *                                ldb <= 0
 * CUBLAS_STATUS_MAPPING_ERROR    if error occurred accessing GPU memory
 * CUBLAS_STATUS_SUCCESS          if the operation completed successfully
 */
public static native @Cast("cublasStatus_t") int cublasSetMatrixAsync(int rows, int cols, int elemSize,
                                                  @Const Pointer A, int lda, Pointer B,
                                                  int ldb, CUstream_st stream);

/*
 * cublasStatus_t 
 * cublasGetMatrixAsync (int rows, int cols, int elemSize, const void *A, 
 *                       int lda, void *B, int ldb, cudaStream_t stream)
 *
 * cublasGetMatrixAsync has the same functionnality as cublasGetMatrix
 * but the transfer is done asynchronously within the CUDA stream passed
 * in parameter.
 *
 * Return Values 
 * -------------
 * CUBLAS_STATUS_NOT_INITIALIZED  if CUBLAS library has not been initialized
 * CUBLAS_STATUS_INVALID_VALUE    if rows, cols, eleSize, lda, or ldb <= 0
 * CUBLAS_STATUS_MAPPING_ERROR    if error occurred accessing GPU memory
 * CUBLAS_STATUS_SUCCESS          if the operation completed successfully
 */
public static native @Cast("cublasStatus_t") int cublasGetMatrixAsync(int rows, int cols, int elemSize,
                                                  @Const Pointer A, int lda, Pointer B,
                                                  int ldb, CUstream_st stream);


public static native void cublasXerbla(@Cast("const char*") BytePointer srName, int info);
public static native void cublasXerbla(String srName, int info);
/* ---------------- CUBLAS BLAS1 functions ---------------- */
public static native @Cast("cublasStatus_t") int cublasNrm2Ex(cublasContext handle, 
                                                     int n, 
                                                     @Const Pointer x, 
                                                     @Cast("cudaDataType") int xType,
                                                     int incx, 
                                                     Pointer result,
                                                     @Cast("cudaDataType") int resultType,
                                                     @Cast("cudaDataType") int executionType); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasSnrm2_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const FloatPointer x, 
                                                     int incx, 
                                                     FloatPointer result);
public static native @Cast("cublasStatus_t") int cublasSnrm2_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const FloatBuffer x, 
                                                     int incx, 
                                                     FloatBuffer result);
public static native @Cast("cublasStatus_t") int cublasSnrm2_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const float[] x, 
                                                     int incx, 
                                                     float[] result); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasDnrm2_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const DoublePointer x, 
                                                     int incx, 
                                                     DoublePointer result);
public static native @Cast("cublasStatus_t") int cublasDnrm2_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const DoubleBuffer x, 
                                                     int incx, 
                                                     DoubleBuffer result);
public static native @Cast("cublasStatus_t") int cublasDnrm2_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const double[] x, 
                                                     int incx, 
                                                     double[] result);  /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasScnrm2_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx, 
                                                      FloatPointer result);
public static native @Cast("cublasStatus_t") int cublasScnrm2_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx, 
                                                      FloatBuffer result);
public static native @Cast("cublasStatus_t") int cublasScnrm2_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx, 
                                                      float[] result);  /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasDznrm2_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx, 
                                                      DoublePointer result);
public static native @Cast("cublasStatus_t") int cublasDznrm2_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx, 
                                                      DoubleBuffer result);
public static native @Cast("cublasStatus_t") int cublasDznrm2_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx, 
                                                      double[] result);  /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasDotEx(cublasContext handle,
                                                     int n, 
                                                     @Const Pointer x,
                                                     @Cast("cudaDataType") int xType, 
                                                     int incx, 
                                                     @Const Pointer y, 
                                                     @Cast("cudaDataType") int yType,
                                                     int incy,
                                                     Pointer result,
                                                     @Cast("cudaDataType") int resultType,
                                                     @Cast("cudaDataType") int executionType);

public static native @Cast("cublasStatus_t") int cublasDotcEx(cublasContext handle,
                                                     int n, 
                                                     @Const Pointer x,
                                                     @Cast("cudaDataType") int xType, 
                                                     int incx, 
                                                     @Const Pointer y, 
                                                     @Cast("cudaDataType") int yType,
                                                     int incy,
                                                     Pointer result,
                                                     @Cast("cudaDataType") int resultType,
                                                     @Cast("cudaDataType") int executionType);

public static native @Cast("cublasStatus_t") int cublasSdot_v2(cublasContext handle,
                                                     int n, 
                                                     @Const FloatPointer x, 
                                                     int incx, 
                                                     @Const FloatPointer y, 
                                                     int incy,
                                                     FloatPointer result);
public static native @Cast("cublasStatus_t") int cublasSdot_v2(cublasContext handle,
                                                     int n, 
                                                     @Const FloatBuffer x, 
                                                     int incx, 
                                                     @Const FloatBuffer y, 
                                                     int incy,
                                                     FloatBuffer result);
public static native @Cast("cublasStatus_t") int cublasSdot_v2(cublasContext handle,
                                                     int n, 
                                                     @Const float[] x, 
                                                     int incx, 
                                                     @Const float[] y, 
                                                     int incy,
                                                     float[] result);  /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasDdot_v2(cublasContext handle,
                                                     int n, 
                                                     @Const DoublePointer x, 
                                                     int incx, 
                                                     @Const DoublePointer y,
                                                     int incy,
                                                     DoublePointer result);
public static native @Cast("cublasStatus_t") int cublasDdot_v2(cublasContext handle,
                                                     int n, 
                                                     @Const DoubleBuffer x, 
                                                     int incx, 
                                                     @Const DoubleBuffer y,
                                                     int incy,
                                                     DoubleBuffer result);
public static native @Cast("cublasStatus_t") int cublasDdot_v2(cublasContext handle,
                                                     int n, 
                                                     @Const double[] x, 
                                                     int incx, 
                                                     @Const double[] y,
                                                     int incy,
                                                     double[] result);  /* host or device pointer */
    
public static native @Cast("cublasStatus_t") int cublasCdotu_v2(cublasContext handle,
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx, 
                                                      @Cast("const cuComplex*") float2 y, 
                                                      int incy,
                                                      @Cast("cuComplex*") float2 result);  /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasCdotc_v2(cublasContext handle,
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx, 
                                                      @Cast("const cuComplex*") float2 y, 
                                                      int incy,
                                                      @Cast("cuComplex*") float2 result); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasZdotu_v2(cublasContext handle,
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx, 
                                                      @Cast("const cuDoubleComplex*") double2 y, 
                                                      int incy,
                                                      @Cast("cuDoubleComplex*") double2 result); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasZdotc_v2(cublasContext handle,
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx,
                                                      @Cast("const cuDoubleComplex*") double2 y, 
                                                      int incy,
                                                      @Cast("cuDoubleComplex*") double2 result); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasScalEx(cublasContext handle, 
                                                     int n, 
                                                     @Const Pointer alpha,
                                                     @Cast("cudaDataType") int alphaType,
                                                     Pointer x, 
                                                     @Cast("cudaDataType") int xType,
                                                     int incx,
                                                     @Cast("cudaDataType") int executionType);

public static native @Cast("cublasStatus_t") int cublasSscal_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const FloatPointer alpha,
                                                     FloatPointer x, 
                                                     int incx);
public static native @Cast("cublasStatus_t") int cublasSscal_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const FloatBuffer alpha,
                                                     FloatBuffer x, 
                                                     int incx);
public static native @Cast("cublasStatus_t") int cublasSscal_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const float[] alpha,
                                                     float[] x, 
                                                     int incx);
    
public static native @Cast("cublasStatus_t") int cublasDscal_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const DoublePointer alpha,
                                                     DoublePointer x, 
                                                     int incx);
public static native @Cast("cublasStatus_t") int cublasDscal_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const DoubleBuffer alpha,
                                                     DoubleBuffer x, 
                                                     int incx);
public static native @Cast("cublasStatus_t") int cublasDscal_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const double[] alpha,
                                                     double[] x, 
                                                     int incx);
    
public static native @Cast("cublasStatus_t") int cublasCscal_v2(cublasContext handle, 
                                                     int n, 
                                                     @Cast("const cuComplex*") float2 alpha,
                                                     @Cast("cuComplex*") float2 x, 
                                                     int incx);

public static native @Cast("cublasStatus_t") int cublasCsscal_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const FloatPointer alpha,
                                                      @Cast("cuComplex*") float2 x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasCsscal_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const FloatBuffer alpha,
                                                      @Cast("cuComplex*") float2 x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasCsscal_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const float[] alpha,
                                                      @Cast("cuComplex*") float2 x, 
                                                      int incx);

public static native @Cast("cublasStatus_t") int cublasZscal_v2(cublasContext handle, 
                                                     int n, 
                                                     @Cast("const cuDoubleComplex*") double2 alpha,
                                                     @Cast("cuDoubleComplex*") double2 x, 
                                                     int incx);

public static native @Cast("cublasStatus_t") int cublasZdscal_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const DoublePointer alpha,
                                                      @Cast("cuDoubleComplex*") double2 x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasZdscal_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const DoubleBuffer alpha,
                                                      @Cast("cuDoubleComplex*") double2 x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasZdscal_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const double[] alpha,
                                                      @Cast("cuDoubleComplex*") double2 x, 
                                                      int incx);

public static native @Cast("cublasStatus_t") int cublasAxpyEx(cublasContext handle,
                                                      int n,
                                                      @Const Pointer alpha,
                                                      @Cast("cudaDataType") int alphaType,
                                                      @Const Pointer x,
                                                      @Cast("cudaDataType") int xType,
                                                      int incx,
                                                      Pointer y,
                                                      @Cast("cudaDataType") int yType,
                                                      int incy,
                                                      @Cast("cudaDataType") int executiontype);

public static native @Cast("cublasStatus_t") int cublasSaxpy_v2(cublasContext handle,
                                                      int n, 
                                                      @Const FloatPointer alpha,
                                                      @Const FloatPointer x, 
                                                      int incx, 
                                                      FloatPointer y, 
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasSaxpy_v2(cublasContext handle,
                                                      int n, 
                                                      @Const FloatBuffer alpha,
                                                      @Const FloatBuffer x, 
                                                      int incx, 
                                                      FloatBuffer y, 
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasSaxpy_v2(cublasContext handle,
                                                      int n, 
                                                      @Const float[] alpha,
                                                      @Const float[] x, 
                                                      int incx, 
                                                      float[] y, 
                                                      int incy);

public static native @Cast("cublasStatus_t") int cublasDaxpy_v2(cublasContext handle,
                                                      int n, 
                                                      @Const DoublePointer alpha,
                                                      @Const DoublePointer x, 
                                                      int incx, 
                                                      DoublePointer y, 
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasDaxpy_v2(cublasContext handle,
                                                      int n, 
                                                      @Const DoubleBuffer alpha,
                                                      @Const DoubleBuffer x, 
                                                      int incx, 
                                                      DoubleBuffer y, 
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasDaxpy_v2(cublasContext handle,
                                                      int n, 
                                                      @Const double[] alpha,
                                                      @Const double[] x, 
                                                      int incx, 
                                                      double[] y, 
                                                      int incy);

public static native @Cast("cublasStatus_t") int cublasCaxpy_v2(cublasContext handle,
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 alpha,
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx, 
                                                      @Cast("cuComplex*") float2 y, 
                                                      int incy);

public static native @Cast("cublasStatus_t") int cublasZaxpy_v2(cublasContext handle,
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 alpha,
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx, 
                                                      @Cast("cuDoubleComplex*") double2 y, 
                                                      int incy);

public static native @Cast("cublasStatus_t") int cublasCopyEx(cublasContext handle,
                                                      int n, 
                                                      @Const Pointer x,
                                                      @Cast("cudaDataType") int xType,
                                                      int incx, 
                                                      Pointer y,
                                                      @Cast("cudaDataType") int yType,
                                                      int incy);

public static native @Cast("cublasStatus_t") int cublasScopy_v2(cublasContext handle,
                                                      int n, 
                                                      @Const FloatPointer x, 
                                                      int incx, 
                                                      FloatPointer y, 
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasScopy_v2(cublasContext handle,
                                                      int n, 
                                                      @Const FloatBuffer x, 
                                                      int incx, 
                                                      FloatBuffer y, 
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasScopy_v2(cublasContext handle,
                                                      int n, 
                                                      @Const float[] x, 
                                                      int incx, 
                                                      float[] y, 
                                                      int incy);

public static native @Cast("cublasStatus_t") int cublasDcopy_v2(cublasContext handle,
                                                      int n, 
                                                      @Const DoublePointer x, 
                                                      int incx, 
                                                      DoublePointer y, 
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasDcopy_v2(cublasContext handle,
                                                      int n, 
                                                      @Const DoubleBuffer x, 
                                                      int incx, 
                                                      DoubleBuffer y, 
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasDcopy_v2(cublasContext handle,
                                                      int n, 
                                                      @Const double[] x, 
                                                      int incx, 
                                                      double[] y, 
                                                      int incy);

public static native @Cast("cublasStatus_t") int cublasCcopy_v2(cublasContext handle,
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx, 
                                                      @Cast("cuComplex*") float2 y,
                                                      int incy);

public static native @Cast("cublasStatus_t") int cublasZcopy_v2(cublasContext handle,
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx, 
                                                      @Cast("cuDoubleComplex*") double2 y,
                                                      int incy);
    
public static native @Cast("cublasStatus_t") int cublasSswap_v2(cublasContext handle,
                                                      int n, 
                                                      FloatPointer x, 
                                                      int incx, 
                                                      FloatPointer y, 
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasSswap_v2(cublasContext handle,
                                                      int n, 
                                                      FloatBuffer x, 
                                                      int incx, 
                                                      FloatBuffer y, 
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasSswap_v2(cublasContext handle,
                                                      int n, 
                                                      float[] x, 
                                                      int incx, 
                                                      float[] y, 
                                                      int incy);

public static native @Cast("cublasStatus_t") int cublasDswap_v2(cublasContext handle,
                                                      int n, 
                                                      DoublePointer x, 
                                                      int incx, 
                                                      DoublePointer y, 
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasDswap_v2(cublasContext handle,
                                                      int n, 
                                                      DoubleBuffer x, 
                                                      int incx, 
                                                      DoubleBuffer y, 
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasDswap_v2(cublasContext handle,
                                                      int n, 
                                                      double[] x, 
                                                      int incx, 
                                                      double[] y, 
                                                      int incy);

public static native @Cast("cublasStatus_t") int cublasCswap_v2(cublasContext handle,
                                                      int n, 
                                                      @Cast("cuComplex*") float2 x, 
                                                      int incx, 
                                                      @Cast("cuComplex*") float2 y,
                                                      int incy);

public static native @Cast("cublasStatus_t") int cublasZswap_v2(cublasContext handle,
                                                      int n, 
                                                      @Cast("cuDoubleComplex*") double2 x, 
                                                      int incx, 
                                                      @Cast("cuDoubleComplex*") double2 y,
                                                      int incy);

public static native @Cast("cublasStatus_t") int cublasSwapEx(cublasContext handle,
                                                      int n,
                                                      Pointer x,
                                                      @Cast("cudaDataType") int xType,
                                                      int incx,
                                                      Pointer y,
                                                      @Cast("cudaDataType") int yType,
                                                      int incy);

public static native @Cast("cublasStatus_t") int cublasIsamax_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const FloatPointer x, 
                                                      int incx, 
                                                      IntPointer result);
public static native @Cast("cublasStatus_t") int cublasIsamax_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const FloatBuffer x, 
                                                      int incx, 
                                                      IntBuffer result);
public static native @Cast("cublasStatus_t") int cublasIsamax_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const float[] x, 
                                                      int incx, 
                                                      int[] result); /* host or device pointer */
    
public static native @Cast("cublasStatus_t") int cublasIdamax_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const DoublePointer x, 
                                                      int incx, 
                                                      IntPointer result);
public static native @Cast("cublasStatus_t") int cublasIdamax_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const DoubleBuffer x, 
                                                      int incx, 
                                                      IntBuffer result);
public static native @Cast("cublasStatus_t") int cublasIdamax_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const double[] x, 
                                                      int incx, 
                                                      int[] result); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasIcamax_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx, 
                                                      IntPointer result);
public static native @Cast("cublasStatus_t") int cublasIcamax_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx, 
                                                      IntBuffer result);
public static native @Cast("cublasStatus_t") int cublasIcamax_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx, 
                                                      int[] result); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasIzamax_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx, 
                                                      IntPointer result);
public static native @Cast("cublasStatus_t") int cublasIzamax_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx, 
                                                      IntBuffer result);
public static native @Cast("cublasStatus_t") int cublasIzamax_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx, 
                                                      int[] result); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasIamaxEx(cublasContext handle,
                                                      int n,
                                                      @Const Pointer x, @Cast("cudaDataType") int xType,
                                                      int incx,
                                                      IntPointer result
                                                    );
public static native @Cast("cublasStatus_t") int cublasIamaxEx(cublasContext handle,
                                                      int n,
                                                      @Const Pointer x, @Cast("cudaDataType") int xType,
                                                      int incx,
                                                      IntBuffer result
                                                    );
public static native @Cast("cublasStatus_t") int cublasIamaxEx(cublasContext handle,
                                                      int n,
                                                      @Const Pointer x, @Cast("cudaDataType") int xType,
                                                      int incx,
                                                      int[] result
                                                    );


public static native @Cast("cublasStatus_t") int cublasIsamin_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const FloatPointer x, 
                                                      int incx, 
                                                      IntPointer result);
public static native @Cast("cublasStatus_t") int cublasIsamin_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const FloatBuffer x, 
                                                      int incx, 
                                                      IntBuffer result);
public static native @Cast("cublasStatus_t") int cublasIsamin_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const float[] x, 
                                                      int incx, 
                                                      int[] result); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasIdamin_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const DoublePointer x, 
                                                      int incx, 
                                                      IntPointer result);
public static native @Cast("cublasStatus_t") int cublasIdamin_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const DoubleBuffer x, 
                                                      int incx, 
                                                      IntBuffer result);
public static native @Cast("cublasStatus_t") int cublasIdamin_v2(cublasContext handle, 
                                                      int n, 
                                                      @Const double[] x, 
                                                      int incx, 
                                                      int[] result); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasIcamin_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx, 
                                                      IntPointer result);
public static native @Cast("cublasStatus_t") int cublasIcamin_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx, 
                                                      IntBuffer result);
public static native @Cast("cublasStatus_t") int cublasIcamin_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx, 
                                                      int[] result); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasIzamin_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx, 
                                                      IntPointer result);
public static native @Cast("cublasStatus_t") int cublasIzamin_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx, 
                                                      IntBuffer result);
public static native @Cast("cublasStatus_t") int cublasIzamin_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx, 
                                                      int[] result); /* host or device pointer */
 
public static native @Cast("cublasStatus_t") int cublasIaminEx(cublasContext handle,
                                                      int n,
                                                      @Const Pointer x, @Cast("cudaDataType") int xType,
                                                      int incx,
                                                      IntPointer result
                                                    );
public static native @Cast("cublasStatus_t") int cublasIaminEx(cublasContext handle,
                                                      int n,
                                                      @Const Pointer x, @Cast("cudaDataType") int xType,
                                                      int incx,
                                                      IntBuffer result
                                                    );
public static native @Cast("cublasStatus_t") int cublasIaminEx(cublasContext handle,
                                                      int n,
                                                      @Const Pointer x, @Cast("cudaDataType") int xType,
                                                      int incx,
                                                      int[] result
                                                    );

public static native @Cast("cublasStatus_t") int cublasAsumEx(cublasContext handle,
                                                     int n, 
                                                     @Const Pointer x,
                                                     @Cast("cudaDataType") int xType,
                                                     int incx, 
                                                     Pointer result,
                                                     @Cast("cudaDataType") int resultType,
                                                     @Cast("cudaDataType") int executiontype
                                                  );

public static native @Cast("cublasStatus_t") int cublasSasum_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const FloatPointer x, 
                                                     int incx, 
                                                     FloatPointer result);
public static native @Cast("cublasStatus_t") int cublasSasum_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const FloatBuffer x, 
                                                     int incx, 
                                                     FloatBuffer result);
public static native @Cast("cublasStatus_t") int cublasSasum_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const float[] x, 
                                                     int incx, 
                                                     float[] result); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasDasum_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const DoublePointer x, 
                                                     int incx, 
                                                     DoublePointer result);
public static native @Cast("cublasStatus_t") int cublasDasum_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const DoubleBuffer x, 
                                                     int incx, 
                                                     DoubleBuffer result);
public static native @Cast("cublasStatus_t") int cublasDasum_v2(cublasContext handle, 
                                                     int n, 
                                                     @Const double[] x, 
                                                     int incx, 
                                                     double[] result); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasScasum_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx, 
                                                      FloatPointer result);
public static native @Cast("cublasStatus_t") int cublasScasum_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx, 
                                                      FloatBuffer result);
public static native @Cast("cublasStatus_t") int cublasScasum_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx, 
                                                      float[] result); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasDzasum_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx, 
                                                      DoublePointer result);
public static native @Cast("cublasStatus_t") int cublasDzasum_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx, 
                                                      DoubleBuffer result);
public static native @Cast("cublasStatus_t") int cublasDzasum_v2(cublasContext handle, 
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx, 
                                                      double[] result); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasSrot_v2(cublasContext handle, 
                                                     int n, 
                                                     FloatPointer x, 
                                                     int incx, 
                                                     FloatPointer y, 
                                                     int incy, 
                                                     @Const FloatPointer c,
                                                     @Const FloatPointer s);
public static native @Cast("cublasStatus_t") int cublasSrot_v2(cublasContext handle, 
                                                     int n, 
                                                     FloatBuffer x, 
                                                     int incx, 
                                                     FloatBuffer y, 
                                                     int incy, 
                                                     @Const FloatBuffer c,
                                                     @Const FloatBuffer s);
public static native @Cast("cublasStatus_t") int cublasSrot_v2(cublasContext handle, 
                                                     int n, 
                                                     float[] x, 
                                                     int incx, 
                                                     float[] y, 
                                                     int incy, 
                                                     @Const float[] c,
                                                     @Const float[] s); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasDrot_v2(cublasContext handle, 
                                                     int n, 
                                                     DoublePointer x, 
                                                     int incx, 
                                                     DoublePointer y, 
                                                     int incy, 
                                                     @Const DoublePointer c,
                                                     @Const DoublePointer s);
public static native @Cast("cublasStatus_t") int cublasDrot_v2(cublasContext handle, 
                                                     int n, 
                                                     DoubleBuffer x, 
                                                     int incx, 
                                                     DoubleBuffer y, 
                                                     int incy, 
                                                     @Const DoubleBuffer c,
                                                     @Const DoubleBuffer s);
public static native @Cast("cublasStatus_t") int cublasDrot_v2(cublasContext handle, 
                                                     int n, 
                                                     double[] x, 
                                                     int incx, 
                                                     double[] y, 
                                                     int incy, 
                                                     @Const double[] c,
                                                     @Const double[] s); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasCrot_v2(cublasContext handle, 
                                                     int n, 
                                                     @Cast("cuComplex*") float2 x, 
                                                     int incx, 
                                                     @Cast("cuComplex*") float2 y, 
                                                     int incy, 
                                                     @Const FloatPointer c,
                                                     @Cast("const cuComplex*") float2 s);
public static native @Cast("cublasStatus_t") int cublasCrot_v2(cublasContext handle, 
                                                     int n, 
                                                     @Cast("cuComplex*") float2 x, 
                                                     int incx, 
                                                     @Cast("cuComplex*") float2 y, 
                                                     int incy, 
                                                     @Const FloatBuffer c,
                                                     @Cast("const cuComplex*") float2 s);
public static native @Cast("cublasStatus_t") int cublasCrot_v2(cublasContext handle, 
                                                     int n, 
                                                     @Cast("cuComplex*") float2 x, 
                                                     int incx, 
                                                     @Cast("cuComplex*") float2 y, 
                                                     int incy, 
                                                     @Const float[] c,
                                                     @Cast("const cuComplex*") float2 s); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasCsrot_v2(cublasContext handle, 
                                                     int n, 
                                                     @Cast("cuComplex*") float2 x, 
                                                     int incx, 
                                                     @Cast("cuComplex*") float2 y, 
                                                     int incy, 
                                                     @Const FloatPointer c,
                                                     @Const FloatPointer s);
public static native @Cast("cublasStatus_t") int cublasCsrot_v2(cublasContext handle, 
                                                     int n, 
                                                     @Cast("cuComplex*") float2 x, 
                                                     int incx, 
                                                     @Cast("cuComplex*") float2 y, 
                                                     int incy, 
                                                     @Const FloatBuffer c,
                                                     @Const FloatBuffer s);
public static native @Cast("cublasStatus_t") int cublasCsrot_v2(cublasContext handle, 
                                                     int n, 
                                                     @Cast("cuComplex*") float2 x, 
                                                     int incx, 
                                                     @Cast("cuComplex*") float2 y, 
                                                     int incy, 
                                                     @Const float[] c,
                                                     @Const float[] s); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasZrot_v2(cublasContext handle, 
                                                     int n, 
                                                     @Cast("cuDoubleComplex*") double2 x, 
                                                     int incx, 
                                                     @Cast("cuDoubleComplex*") double2 y, 
                                                     int incy, 
                                                     @Const DoublePointer c,
                                                     @Cast("const cuDoubleComplex*") double2 s);
public static native @Cast("cublasStatus_t") int cublasZrot_v2(cublasContext handle, 
                                                     int n, 
                                                     @Cast("cuDoubleComplex*") double2 x, 
                                                     int incx, 
                                                     @Cast("cuDoubleComplex*") double2 y, 
                                                     int incy, 
                                                     @Const DoubleBuffer c,
                                                     @Cast("const cuDoubleComplex*") double2 s);
public static native @Cast("cublasStatus_t") int cublasZrot_v2(cublasContext handle, 
                                                     int n, 
                                                     @Cast("cuDoubleComplex*") double2 x, 
                                                     int incx, 
                                                     @Cast("cuDoubleComplex*") double2 y, 
                                                     int incy, 
                                                     @Const double[] c,
                                                     @Cast("const cuDoubleComplex*") double2 s);  /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasZdrot_v2(cublasContext handle, 
                                                     int n, 
                                                     @Cast("cuDoubleComplex*") double2 x, 
                                                     int incx, 
                                                     @Cast("cuDoubleComplex*") double2 y, 
                                                     int incy, 
                                                     @Const DoublePointer c,
                                                     @Const DoublePointer s);
public static native @Cast("cublasStatus_t") int cublasZdrot_v2(cublasContext handle, 
                                                     int n, 
                                                     @Cast("cuDoubleComplex*") double2 x, 
                                                     int incx, 
                                                     @Cast("cuDoubleComplex*") double2 y, 
                                                     int incy, 
                                                     @Const DoubleBuffer c,
                                                     @Const DoubleBuffer s);
public static native @Cast("cublasStatus_t") int cublasZdrot_v2(cublasContext handle, 
                                                     int n, 
                                                     @Cast("cuDoubleComplex*") double2 x, 
                                                     int incx, 
                                                     @Cast("cuDoubleComplex*") double2 y, 
                                                     int incy, 
                                                     @Const double[] c,
                                                     @Const double[] s); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasRotEx(cublasContext handle,
                                                     int n,
                                                     Pointer x,
                                                     @Cast("cudaDataType") int xType,
                                                     int incx,
                                                     Pointer y,
                                                     @Cast("cudaDataType") int yType,
                                                     int incy,
                                                     @Const Pointer c,
                                                     @Const Pointer s,
                                                     @Cast("cudaDataType") int csType,
                                                     @Cast("cudaDataType") int executiontype);


public static native @Cast("cublasStatus_t") int cublasSrotg_v2(cublasContext handle, 
                                                     FloatPointer a,
                                                     FloatPointer b,
                                                     FloatPointer c,
                                                     FloatPointer s);
public static native @Cast("cublasStatus_t") int cublasSrotg_v2(cublasContext handle, 
                                                     FloatBuffer a,
                                                     FloatBuffer b,
                                                     FloatBuffer c,
                                                     FloatBuffer s);
public static native @Cast("cublasStatus_t") int cublasSrotg_v2(cublasContext handle, 
                                                     float[] a,
                                                     float[] b,
                                                     float[] c,
                                                     float[] s);  /* host or device pointer */
    
public static native @Cast("cublasStatus_t") int cublasDrotg_v2(cublasContext handle, 
                                                     DoublePointer a,
                                                     DoublePointer b,
                                                     DoublePointer c,
                                                     DoublePointer s);
public static native @Cast("cublasStatus_t") int cublasDrotg_v2(cublasContext handle, 
                                                     DoubleBuffer a,
                                                     DoubleBuffer b,
                                                     DoubleBuffer c,
                                                     DoubleBuffer s);
public static native @Cast("cublasStatus_t") int cublasDrotg_v2(cublasContext handle, 
                                                     double[] a,
                                                     double[] b,
                                                     double[] c,
                                                     double[] s); /* host or device pointer */
    
public static native @Cast("cublasStatus_t") int cublasCrotg_v2(cublasContext handle, 
                                                     @Cast("cuComplex*") float2 a,
                                                     @Cast("cuComplex*") float2 b,
                                                     FloatPointer c,
                                                     @Cast("cuComplex*") float2 s);
public static native @Cast("cublasStatus_t") int cublasCrotg_v2(cublasContext handle, 
                                                     @Cast("cuComplex*") float2 a,
                                                     @Cast("cuComplex*") float2 b,
                                                     FloatBuffer c,
                                                     @Cast("cuComplex*") float2 s);
public static native @Cast("cublasStatus_t") int cublasCrotg_v2(cublasContext handle, 
                                                     @Cast("cuComplex*") float2 a,
                                                     @Cast("cuComplex*") float2 b,
                                                     float[] c,
                                                     @Cast("cuComplex*") float2 s); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasZrotg_v2(cublasContext handle, 
                                                     @Cast("cuDoubleComplex*") double2 a,
                                                     @Cast("cuDoubleComplex*") double2 b,
                                                     DoublePointer c,
                                                     @Cast("cuDoubleComplex*") double2 s);
public static native @Cast("cublasStatus_t") int cublasZrotg_v2(cublasContext handle, 
                                                     @Cast("cuDoubleComplex*") double2 a,
                                                     @Cast("cuDoubleComplex*") double2 b,
                                                     DoubleBuffer c,
                                                     @Cast("cuDoubleComplex*") double2 s);
public static native @Cast("cublasStatus_t") int cublasZrotg_v2(cublasContext handle, 
                                                     @Cast("cuDoubleComplex*") double2 a,
                                                     @Cast("cuDoubleComplex*") double2 b,
                                                     double[] c,
                                                     @Cast("cuDoubleComplex*") double2 s); /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasRotgEx(cublasContext handle,
                                                     Pointer a,
                                                     Pointer b,
                                                     @Cast("cudaDataType") int abType,
                                                     Pointer c,
                                                     Pointer s,
                                                     @Cast("cudaDataType") int csType,
                                                     @Cast("cudaDataType") int executiontype);

public static native @Cast("cublasStatus_t") int cublasSrotm_v2(cublasContext handle, 
                                                     int n, 
                                                     FloatPointer x, 
                                                     int incx, 
                                                     FloatPointer y, 
                                                     int incy, 
                                                     @Const FloatPointer param);
public static native @Cast("cublasStatus_t") int cublasSrotm_v2(cublasContext handle, 
                                                     int n, 
                                                     FloatBuffer x, 
                                                     int incx, 
                                                     FloatBuffer y, 
                                                     int incy, 
                                                     @Const FloatBuffer param);
public static native @Cast("cublasStatus_t") int cublasSrotm_v2(cublasContext handle, 
                                                     int n, 
                                                     float[] x, 
                                                     int incx, 
                                                     float[] y, 
                                                     int incy, 
                                                     @Const float[] param);  /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasDrotm_v2(cublasContext handle, 
                                                     int n, 
                                                     DoublePointer x, 
                                                     int incx, 
                                                     DoublePointer y, 
                                                     int incy, 
                                                     @Const DoublePointer param);
public static native @Cast("cublasStatus_t") int cublasDrotm_v2(cublasContext handle, 
                                                     int n, 
                                                     DoubleBuffer x, 
                                                     int incx, 
                                                     DoubleBuffer y, 
                                                     int incy, 
                                                     @Const DoubleBuffer param);
public static native @Cast("cublasStatus_t") int cublasDrotm_v2(cublasContext handle, 
                                                     int n, 
                                                     double[] x, 
                                                     int incx, 
                                                     double[] y, 
                                                     int incy, 
                                                     @Const double[] param);  /* host or device pointer */

public static native @Cast("cublasStatus_t") int cublasRotmEx(cublasContext handle,
                                                     int n,
                                                     Pointer x,
                                                     @Cast("cudaDataType") int xType,
                                                     int incx,
                                                     Pointer y,
                                                     @Cast("cudaDataType") int yType,
                                                     int incy,
                                                     @Const Pointer param,
                                                     @Cast("cudaDataType") int paramType,
                                                     @Cast("cudaDataType") int executiontype);

public static native @Cast("cublasStatus_t") int cublasSrotmg_v2(cublasContext handle, 
                                                      FloatPointer d1,
                                                      FloatPointer d2,
                                                      FloatPointer x1,
                                                      @Const FloatPointer y1,
                                                      FloatPointer param);
public static native @Cast("cublasStatus_t") int cublasSrotmg_v2(cublasContext handle, 
                                                      FloatBuffer d1,
                                                      FloatBuffer d2,
                                                      FloatBuffer x1,
                                                      @Const FloatBuffer y1,
                                                      FloatBuffer param);
public static native @Cast("cublasStatus_t") int cublasSrotmg_v2(cublasContext handle, 
                                                      float[] d1,
                                                      float[] d2,
                                                      float[] x1,
                                                      @Const float[] y1,
                                                      float[] param);    /* host or device pointer */
                                         
public static native @Cast("cublasStatus_t") int cublasDrotmg_v2(cublasContext handle, 
                                                      DoublePointer d1,  
                                                      DoublePointer d2,  
                                                      DoublePointer x1,  
                                                      @Const DoublePointer y1,  
                                                      DoublePointer param);
public static native @Cast("cublasStatus_t") int cublasDrotmg_v2(cublasContext handle, 
                                                      DoubleBuffer d1,  
                                                      DoubleBuffer d2,  
                                                      DoubleBuffer x1,  
                                                      @Const DoubleBuffer y1,  
                                                      DoubleBuffer param);
public static native @Cast("cublasStatus_t") int cublasDrotmg_v2(cublasContext handle, 
                                                      double[] d1,  
                                                      double[] d2,  
                                                      double[] x1,  
                                                      @Const double[] y1,  
                                                      double[] param);    /* host or device pointer */  

public static native @Cast("cublasStatus_t") int cublasRotmgEx(cublasContext handle,
                                                      Pointer d1,
                                                      @Cast("cudaDataType") int d1Type,
                                                      Pointer d2,
                                                      @Cast("cudaDataType") int d2Type,
                                                      Pointer x1,
                                                      @Cast("cudaDataType") int x1Type,
                                                      @Const Pointer y1,
                                                      @Cast("cudaDataType") int y1Type,
                                                      Pointer param,
                                                      @Cast("cudaDataType") int paramType,
                                                      @Cast("cudaDataType") int executiontype
                                                      );
/* --------------- CUBLAS BLAS2 functions  ---------------- */

/* GEMV */
public static native @Cast("cublasStatus_t") int cublasSgemv_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int m, 
                                                      int n, 
                                                      @Const FloatPointer alpha,
                                                      @Const FloatPointer A, 
                                                      int lda, 
                                                      @Const FloatPointer x, 
                                                      int incx, 
                                                      @Const FloatPointer beta,
                                                      FloatPointer y, 
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasSgemv_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int m, 
                                                      int n, 
                                                      @Const FloatBuffer alpha,
                                                      @Const FloatBuffer A, 
                                                      int lda, 
                                                      @Const FloatBuffer x, 
                                                      int incx, 
                                                      @Const FloatBuffer beta,
                                                      FloatBuffer y, 
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasSgemv_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int m, 
                                                      int n, 
                                                      @Const float[] alpha,
                                                      @Const float[] A, 
                                                      int lda, 
                                                      @Const float[] x, 
                                                      int incx, 
                                                      @Const float[] beta,
                                                      float[] y, 
                                                      int incy);  
 
public static native @Cast("cublasStatus_t") int cublasDgemv_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int m,
                                                      int n,
                                                      @Const DoublePointer alpha, 
                                                      @Const DoublePointer A,
                                                      int lda,
                                                      @Const DoublePointer x,
                                                      int incx,
                                                      @Const DoublePointer beta,
                                                      DoublePointer y, 
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasDgemv_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int m,
                                                      int n,
                                                      @Const DoubleBuffer alpha, 
                                                      @Const DoubleBuffer A,
                                                      int lda,
                                                      @Const DoubleBuffer x,
                                                      int incx,
                                                      @Const DoubleBuffer beta,
                                                      DoubleBuffer y, 
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasDgemv_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int m,
                                                      int n,
                                                      @Const double[] alpha, 
                                                      @Const double[] A,
                                                      int lda,
                                                      @Const double[] x,
                                                      int incx,
                                                      @Const double[] beta,
                                                      double[] y, 
                                                      int incy);

public static native @Cast("cublasStatus_t") int cublasCgemv_v2(cublasContext handle,
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int m,
                                                      int n,
                                                      @Cast("const cuComplex*") float2 alpha, 
                                                      @Cast("const cuComplex*") float2 A,
                                                      int lda,
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx,
                                                      @Cast("const cuComplex*") float2 beta, 
                                                      @Cast("cuComplex*") float2 y,
                                                      int incy);

public static native @Cast("cublasStatus_t") int cublasZgemv_v2(cublasContext handle,
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int m,
                                                      int n,
                                                      @Cast("const cuDoubleComplex*") double2 alpha,  
                                                      @Cast("const cuDoubleComplex*") double2 A,
                                                      int lda, 
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx,
                                                      @Cast("const cuDoubleComplex*") double2 beta,  
                                                      @Cast("cuDoubleComplex*") double2 y,
                                                      int incy);
/* GBMV */                                
public static native @Cast("cublasStatus_t") int cublasSgbmv_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int m,
                                                      int n,
                                                      int kl,
                                                      int ku, 
                                                      @Const FloatPointer alpha,  
                                                      @Const FloatPointer A, 
                                                      int lda, 
                                                      @Const FloatPointer x,
                                                      int incx,
                                                      @Const FloatPointer beta,  
                                                      FloatPointer y,
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasSgbmv_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int m,
                                                      int n,
                                                      int kl,
                                                      int ku, 
                                                      @Const FloatBuffer alpha,  
                                                      @Const FloatBuffer A, 
                                                      int lda, 
                                                      @Const FloatBuffer x,
                                                      int incx,
                                                      @Const FloatBuffer beta,  
                                                      FloatBuffer y,
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasSgbmv_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int m,
                                                      int n,
                                                      int kl,
                                                      int ku, 
                                                      @Const float[] alpha,  
                                                      @Const float[] A, 
                                                      int lda, 
                                                      @Const float[] x,
                                                      int incx,
                                                      @Const float[] beta,  
                                                      float[] y,
                                                      int incy);                                
                                
public static native @Cast("cublasStatus_t") int cublasDgbmv_v2(cublasContext handle,
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int m,
                                                      int n,
                                                      int kl,
                                                      int ku, 
                                                      @Const DoublePointer alpha, 
                                                      @Const DoublePointer A,
                                                      int lda, 
                                                      @Const DoublePointer x,
                                                      int incx,
                                                      @Const DoublePointer beta, 
                                                      DoublePointer y,
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasDgbmv_v2(cublasContext handle,
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int m,
                                                      int n,
                                                      int kl,
                                                      int ku, 
                                                      @Const DoubleBuffer alpha, 
                                                      @Const DoubleBuffer A,
                                                      int lda, 
                                                      @Const DoubleBuffer x,
                                                      int incx,
                                                      @Const DoubleBuffer beta, 
                                                      DoubleBuffer y,
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasDgbmv_v2(cublasContext handle,
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int m,
                                                      int n,
                                                      int kl,
                                                      int ku, 
                                                      @Const double[] alpha, 
                                                      @Const double[] A,
                                                      int lda, 
                                                      @Const double[] x,
                                                      int incx,
                                                      @Const double[] beta, 
                                                      double[] y,
                                                      int incy);
                                         
public static native @Cast("cublasStatus_t") int cublasCgbmv_v2(cublasContext handle,
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int m,
                                                      int n,
                                                      int kl,
                                                      int ku, 
                                                      @Cast("const cuComplex*") float2 alpha, 
                                                      @Cast("const cuComplex*") float2 A,
                                                      int lda, 
                                                      @Cast("const cuComplex*") float2 x,
                                                      int incx,
                                                      @Cast("const cuComplex*") float2 beta, 
                                                      @Cast("cuComplex*") float2 y,
                                                      int incy);                                             
                                         
public static native @Cast("cublasStatus_t") int cublasZgbmv_v2(cublasContext handle,
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int m,
                                                      int n,
                                                      int kl,
                                                      int ku, 
                                                      @Cast("const cuDoubleComplex*") double2 alpha, 
                                                      @Cast("const cuDoubleComplex*") double2 A,
                                                      int lda, 
                                                      @Cast("const cuDoubleComplex*") double2 x,
                                                      int incx,
                                                      @Cast("const cuDoubleComplex*") double2 beta, 
                                                      @Cast("cuDoubleComplex*") double2 y,
                                                      int incy);   
                                         
/* TRMV */
public static native @Cast("cublasStatus_t") int cublasStrmv_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const FloatPointer A, 
                                                      int lda, 
                                                      FloatPointer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasStrmv_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const FloatBuffer A, 
                                                      int lda, 
                                                      FloatBuffer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasStrmv_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const float[] A, 
                                                      int lda, 
                                                      float[] x, 
                                                      int incx);                                                 

public static native @Cast("cublasStatus_t") int cublasDtrmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const DoublePointer A, 
                                                      int lda, 
                                                      DoublePointer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasDtrmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const DoubleBuffer A, 
                                                      int lda, 
                                                      DoubleBuffer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasDtrmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const double[] A, 
                                                      int lda, 
                                                      double[] x, 
                                                      int incx);

public static native @Cast("cublasStatus_t") int cublasCtrmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 A, 
                                                      int lda, 
                                                      @Cast("cuComplex*") float2 x, 
                                                      int incx);
                                        
public static native @Cast("cublasStatus_t") int cublasZtrmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 A, 
                                                      int lda, 
                                                      @Cast("cuDoubleComplex*") double2 x, 
                                                      int incx);
                                                                                                             
/* TBMV */
public static native @Cast("cublasStatus_t") int cublasStbmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      int k, 
                                                      @Const FloatPointer A, 
                                                      int lda, 
                                                      FloatPointer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasStbmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      int k, 
                                                      @Const FloatBuffer A, 
                                                      int lda, 
                                                      FloatBuffer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasStbmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      int k, 
                                                      @Const float[] A, 
                                                      int lda, 
                                                      float[] x, 
                                                      int incx);                                                 

public static native @Cast("cublasStatus_t") int cublasDtbmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      int k, 
                                                      @Const DoublePointer A, 
                                                      int lda, 
                                                      DoublePointer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasDtbmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      int k, 
                                                      @Const DoubleBuffer A, 
                                                      int lda, 
                                                      DoubleBuffer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasDtbmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      int k, 
                                                      @Const double[] A, 
                                                      int lda, 
                                                      double[] x, 
                                                      int incx);

public static native @Cast("cublasStatus_t") int cublasCtbmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      int k, 
                                                      @Cast("const cuComplex*") float2 A, 
                                                      int lda, 
                                                      @Cast("cuComplex*") float2 x, 
                                                      int incx);
                                               
public static native @Cast("cublasStatus_t") int cublasZtbmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      int k, 
                                                      @Cast("const cuDoubleComplex*") double2 A, 
                                                      int lda, 
                                                      @Cast("cuDoubleComplex*") double2 x, 
                                                      int incx);
                                         
/* TPMV */
public static native @Cast("cublasStatus_t") int cublasStpmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const FloatPointer AP, 
                                                      FloatPointer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasStpmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const FloatBuffer AP, 
                                                      FloatBuffer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasStpmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const float[] AP, 
                                                      float[] x, 
                                                      int incx);                                                 

public static native @Cast("cublasStatus_t") int cublasDtpmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const DoublePointer AP, 
                                                      DoublePointer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasDtpmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const DoubleBuffer AP, 
                                                      DoubleBuffer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasDtpmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const double[] AP, 
                                                      double[] x, 
                                                      int incx);

public static native @Cast("cublasStatus_t") int cublasCtpmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 AP, 
                                                      @Cast("cuComplex*") float2 x, 
                                                      int incx);
                                                
public static native @Cast("cublasStatus_t") int cublasZtpmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 AP, 
                                                      @Cast("cuDoubleComplex*") double2 x, 
                                                      int incx);

/* TRSV */
public static native @Cast("cublasStatus_t") int cublasStrsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const FloatPointer A, 
                                                      int lda, 
                                                      FloatPointer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasStrsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const FloatBuffer A, 
                                                      int lda, 
                                                      FloatBuffer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasStrsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const float[] A, 
                                                      int lda, 
                                                      float[] x, 
                                                      int incx);                                                 

public static native @Cast("cublasStatus_t") int cublasDtrsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const DoublePointer A, 
                                                      int lda, 
                                                      DoublePointer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasDtrsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const DoubleBuffer A, 
                                                      int lda, 
                                                      DoubleBuffer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasDtrsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const double[] A, 
                                                      int lda, 
                                                      double[] x, 
                                                      int incx);

public static native @Cast("cublasStatus_t") int cublasCtrsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 A, 
                                                      int lda, 
                                                      @Cast("cuComplex*") float2 x, 
                                                      int incx);

public static native @Cast("cublasStatus_t") int cublasZtrsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 A, 
                                                      int lda, 
                                                      @Cast("cuDoubleComplex*") double2 x, 
                                                      int incx);

/* TPSV */
public static native @Cast("cublasStatus_t") int cublasStpsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const FloatPointer AP, 
                                                      FloatPointer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasStpsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const FloatBuffer AP, 
                                                      FloatBuffer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasStpsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const float[] AP, 
                                                      float[] x, 
                                                      int incx);  
                                                                                                            
public static native @Cast("cublasStatus_t") int cublasDtpsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const DoublePointer AP, 
                                                      DoublePointer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasDtpsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const DoubleBuffer AP, 
                                                      DoubleBuffer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasDtpsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Const double[] AP, 
                                                      double[] x, 
                                                      int incx);

public static native @Cast("cublasStatus_t") int cublasCtpsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Cast("const cuComplex*") float2 AP, 
                                                      @Cast("cuComplex*") float2 x, 
                                                      int incx);

public static native @Cast("cublasStatus_t") int cublasZtpsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 AP, 
                                                      @Cast("cuDoubleComplex*") double2 x, 
                                                      int incx);
/* TBSV */                                         
public static native @Cast("cublasStatus_t") int cublasStbsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      int k, 
                                                      @Const FloatPointer A, 
                                                      int lda, 
                                                      FloatPointer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasStbsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      int k, 
                                                      @Const FloatBuffer A, 
                                                      int lda, 
                                                      FloatBuffer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasStbsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      int k, 
                                                      @Const float[] A, 
                                                      int lda, 
                                                      float[] x, 
                                                      int incx);

public static native @Cast("cublasStatus_t") int cublasDtbsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      int k, 
                                                      @Const DoublePointer A, 
                                                      int lda, 
                                                      DoublePointer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasDtbsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      int k, 
                                                      @Const DoubleBuffer A, 
                                                      int lda, 
                                                      DoubleBuffer x, 
                                                      int incx);
public static native @Cast("cublasStatus_t") int cublasDtbsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      int k, 
                                                      @Const double[] A, 
                                                      int lda, 
                                                      double[] x, 
                                                      int incx);
                                         
public static native @Cast("cublasStatus_t") int cublasCtbsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      int k, 
                                                      @Cast("const cuComplex*") float2 A, 
                                                      int lda, 
                                                      @Cast("cuComplex*") float2 x, 
                                                      int incx);
                                         
public static native @Cast("cublasStatus_t") int cublasZtbsv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      @Cast("cublasDiagType_t") int diag, 
                                                      int n, 
                                                      int k, 
                                                      @Cast("const cuDoubleComplex*") double2 A, 
                                                      int lda, 
                                                      @Cast("cuDoubleComplex*") double2 x, 
                                                      int incx);     
                                         
/* SYMV/HEMV */
public static native @Cast("cublasStatus_t") int cublasSsymv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      @Const FloatPointer alpha, 
                                                      @Const FloatPointer A,
                                                      int lda,
                                                      @Const FloatPointer x,
                                                      int incx,
                                                      @Const FloatPointer beta, 
                                                      FloatPointer y,
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasSsymv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      @Const FloatBuffer alpha, 
                                                      @Const FloatBuffer A,
                                                      int lda,
                                                      @Const FloatBuffer x,
                                                      int incx,
                                                      @Const FloatBuffer beta, 
                                                      FloatBuffer y,
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasSsymv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      @Const float[] alpha, 
                                                      @Const float[] A,
                                                      int lda,
                                                      @Const float[] x,
                                                      int incx,
                                                      @Const float[] beta, 
                                                      float[] y,
                                                      int incy);

public static native @Cast("cublasStatus_t") int cublasDsymv_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      @Const DoublePointer alpha, 
                                                      @Const DoublePointer A,
                                                      int lda,
                                                      @Const DoublePointer x,
                                                      int incx,
                                                      @Const DoublePointer beta, 
                                                      DoublePointer y,
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasDsymv_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      @Const DoubleBuffer alpha, 
                                                      @Const DoubleBuffer A,
                                                      int lda,
                                                      @Const DoubleBuffer x,
                                                      int incx,
                                                      @Const DoubleBuffer beta, 
                                                      DoubleBuffer y,
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasDsymv_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      @Const double[] alpha, 
                                                      @Const double[] A,
                                                      int lda,
                                                      @Const double[] x,
                                                      int incx,
                                                      @Const double[] beta, 
                                                      double[] y,
                                                      int incy);
    
public static native @Cast("cublasStatus_t") int cublasCsymv_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      @Cast("const cuComplex*") float2 alpha, 
                                                      @Cast("const cuComplex*") float2 A,
                                                      int lda,
                                                      @Cast("const cuComplex*") float2 x,
                                                      int incx,
                                                      @Cast("const cuComplex*") float2 beta, 
                                                      @Cast("cuComplex*") float2 y,
                                                      int incy);                                     
                                     
public static native @Cast("cublasStatus_t") int cublasZsymv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      @Cast("const cuDoubleComplex*") double2 alpha, 
                                                      @Cast("const cuDoubleComplex*") double2 A,
                                                      int lda,
                                                      @Cast("const cuDoubleComplex*") double2 x,
                                                      int incx,
                                                      @Cast("const cuDoubleComplex*") double2 beta, 
                                                      @Cast("cuDoubleComplex*") double2 y,
                                                      int incy);                                            
                                     
public static native @Cast("cublasStatus_t") int cublasChemv_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      @Cast("const cuComplex*") float2 alpha, 
                                                      @Cast("const cuComplex*") float2 A,
                                                      int lda,
                                                      @Cast("const cuComplex*") float2 x,
                                                      int incx,
                                                      @Cast("const cuComplex*") float2 beta, 
                                                      @Cast("cuComplex*") float2 y,
                                                      int incy);                                     
                                     
public static native @Cast("cublasStatus_t") int cublasZhemv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      @Cast("const cuDoubleComplex*") double2 alpha, 
                                                      @Cast("const cuDoubleComplex*") double2 A,
                                                      int lda,
                                                      @Cast("const cuDoubleComplex*") double2 x,
                                                      int incx,
                                                      @Cast("const cuDoubleComplex*") double2 beta, 
                                                      @Cast("cuDoubleComplex*") double2 y,
                                                      int incy);   
                                     
/* SBMV/HBMV */
public static native @Cast("cublasStatus_t") int cublasSsbmv_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      int k,
                                                      @Const FloatPointer alpha, 
                                                      @Const FloatPointer A,
                                                      int lda,
                                                      @Const FloatPointer x, 
                                                      int incx,
                                                      @Const FloatPointer beta, 
                                                      FloatPointer y,
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasSsbmv_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      int k,
                                                      @Const FloatBuffer alpha, 
                                                      @Const FloatBuffer A,
                                                      int lda,
                                                      @Const FloatBuffer x, 
                                                      int incx,
                                                      @Const FloatBuffer beta, 
                                                      FloatBuffer y,
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasSsbmv_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      int k,
                                                      @Const float[] alpha, 
                                                      @Const float[] A,
                                                      int lda,
                                                      @Const float[] x, 
                                                      int incx,
                                                      @Const float[] beta, 
                                                      float[] y,
                                                      int incy);
                                      
public static native @Cast("cublasStatus_t") int cublasDsbmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      int k,
                                                      @Const DoublePointer alpha, 
                                                      @Const DoublePointer A,
                                                      int lda,
                                                      @Const DoublePointer x, 
                                                      int incx,
                                                      @Const DoublePointer beta, 
                                                      DoublePointer y,
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasDsbmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      int k,
                                                      @Const DoubleBuffer alpha, 
                                                      @Const DoubleBuffer A,
                                                      int lda,
                                                      @Const DoubleBuffer x, 
                                                      int incx,
                                                      @Const DoubleBuffer beta, 
                                                      DoubleBuffer y,
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasDsbmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      int k,
                                                      @Const double[] alpha, 
                                                      @Const double[] A,
                                                      int lda,
                                                      @Const double[] x, 
                                                      int incx,
                                                      @Const double[] beta, 
                                                      double[] y,
                                                      int incy);
                                      
public static native @Cast("cublasStatus_t") int cublasChbmv_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      int k,
                                                      @Cast("const cuComplex*") float2 alpha, 
                                                      @Cast("const cuComplex*") float2 A,
                                                      int lda,
                                                      @Cast("const cuComplex*") float2 x, 
                                                      int incx,
                                                      @Cast("const cuComplex*") float2 beta, 
                                                      @Cast("cuComplex*") float2 y,
                                                      int incy);
                                      
public static native @Cast("cublasStatus_t") int cublasZhbmv_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      int n,
                                                      int k,
                                                      @Cast("const cuDoubleComplex*") double2 alpha,  
                                                      @Cast("const cuDoubleComplex*") double2 A,
                                                      int lda,
                                                      @Cast("const cuDoubleComplex*") double2 x, 
                                                      int incx,
                                                      @Cast("const cuDoubleComplex*") double2 beta, 
                                                      @Cast("cuDoubleComplex*") double2 y,
                                                      int incy);                                                                            
                                                                                                                                                   
/* SPMV/HPMV */
public static native @Cast("cublasStatus_t") int cublasSspmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n, 
                                                      @Const FloatPointer alpha,                                           
                                                      @Const FloatPointer AP,
                                                      @Const FloatPointer x,
                                                      int incx,
                                                      @Const FloatPointer beta,  
                                                      FloatPointer y,
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasSspmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n, 
                                                      @Const FloatBuffer alpha,                                           
                                                      @Const FloatBuffer AP,
                                                      @Const FloatBuffer x,
                                                      int incx,
                                                      @Const FloatBuffer beta,  
                                                      FloatBuffer y,
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasSspmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n, 
                                                      @Const float[] alpha,                                           
                                                      @Const float[] AP,
                                                      @Const float[] x,
                                                      int incx,
                                                      @Const float[] beta,  
                                                      float[] y,
                                                      int incy);
    
public static native @Cast("cublasStatus_t") int cublasDspmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n,
                                                      @Const DoublePointer alpha,  
                                                      @Const DoublePointer AP,
                                                      @Const DoublePointer x,
                                                      int incx,
                                                      @Const DoublePointer beta,  
                                                      DoublePointer y,
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasDspmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n,
                                                      @Const DoubleBuffer alpha,  
                                                      @Const DoubleBuffer AP,
                                                      @Const DoubleBuffer x,
                                                      int incx,
                                                      @Const DoubleBuffer beta,  
                                                      DoubleBuffer y,
                                                      int incy);
public static native @Cast("cublasStatus_t") int cublasDspmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n,
                                                      @Const double[] alpha,  
                                                      @Const double[] AP,
                                                      @Const double[] x,
                                                      int incx,
                                                      @Const double[] beta,  
                                                      double[] y,
                                                      int incy);                                     
                                     
public static native @Cast("cublasStatus_t") int cublasChpmv_v2(cublasContext handle, 
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n,
                                                      @Cast("const cuComplex*") float2 alpha,  
                                                      @Cast("const cuComplex*") float2 AP,
                                                      @Cast("const cuComplex*") float2 x,
                                                      int incx,
                                                      @Cast("const cuComplex*") float2 beta,  
                                                      @Cast("cuComplex*") float2 y,
                                                      int incy);
                                     
public static native @Cast("cublasStatus_t") int cublasZhpmv_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n,
                                                      @Cast("const cuDoubleComplex*") double2 alpha,  
                                                      @Cast("const cuDoubleComplex*") double2 AP,
                                                      @Cast("const cuDoubleComplex*") double2 x,
                                                      int incx,
                                                      @Cast("const cuDoubleComplex*") double2 beta,  
                                                      @Cast("cuDoubleComplex*") double2 y, 
                                                      int incy);

/* GER */
public static native @Cast("cublasStatus_t") int cublasSger_v2(cublasContext handle,
                                                     int m,
                                                     int n,
                                                     @Const FloatPointer alpha,  
                                                     @Const FloatPointer x,
                                                     int incx,
                                                     @Const FloatPointer y,
                                                     int incy,
                                                     FloatPointer A,
                                                     int lda);
public static native @Cast("cublasStatus_t") int cublasSger_v2(cublasContext handle,
                                                     int m,
                                                     int n,
                                                     @Const FloatBuffer alpha,  
                                                     @Const FloatBuffer x,
                                                     int incx,
                                                     @Const FloatBuffer y,
                                                     int incy,
                                                     FloatBuffer A,
                                                     int lda);
public static native @Cast("cublasStatus_t") int cublasSger_v2(cublasContext handle,
                                                     int m,
                                                     int n,
                                                     @Const float[] alpha,  
                                                     @Const float[] x,
                                                     int incx,
                                                     @Const float[] y,
                                                     int incy,
                                                     float[] A,
                                                     int lda);
                                    
public static native @Cast("cublasStatus_t") int cublasDger_v2(cublasContext handle, 
                                                     int m,
                                                     int n,
                                                     @Const DoublePointer alpha,   
                                                     @Const DoublePointer x,
                                                     int incx,
                                                     @Const DoublePointer y,
                                                     int incy,
                                                     DoublePointer A,
                                                     int lda);
public static native @Cast("cublasStatus_t") int cublasDger_v2(cublasContext handle, 
                                                     int m,
                                                     int n,
                                                     @Const DoubleBuffer alpha,   
                                                     @Const DoubleBuffer x,
                                                     int incx,
                                                     @Const DoubleBuffer y,
                                                     int incy,
                                                     DoubleBuffer A,
                                                     int lda);
public static native @Cast("cublasStatus_t") int cublasDger_v2(cublasContext handle, 
                                                     int m,
                                                     int n,
                                                     @Const double[] alpha,   
                                                     @Const double[] x,
                                                     int incx,
                                                     @Const double[] y,
                                                     int incy,
                                                     double[] A,
                                                     int lda);
                                    
public static native @Cast("cublasStatus_t") int cublasCgeru_v2(cublasContext handle, 
                                                      int m,
                                                      int n,
                                                      @Cast("const cuComplex*") float2 alpha,  
                                                      @Cast("const cuComplex*") float2 x,
                                                      int incx,
                                                      @Cast("const cuComplex*") float2 y,
                                                      int incy,
                                                      @Cast("cuComplex*") float2 A,
                                                      int lda);

public static native @Cast("cublasStatus_t") int cublasCgerc_v2(cublasContext handle,
                                                      int m,
                                                      int n,
                                                      @Cast("const cuComplex*") float2 alpha,  
                                                      @Cast("const cuComplex*") float2 x,
                                                      int incx,
                                                      @Cast("const cuComplex*") float2 y,
                                                      int incy,
                                                      @Cast("cuComplex*") float2 A,
                                                      int lda);                                   

public static native @Cast("cublasStatus_t") int cublasZgeru_v2(cublasContext handle, 
                                                      int m,
                                                      int n,
                                                      @Cast("const cuDoubleComplex*") double2 alpha,  
                                                      @Cast("const cuDoubleComplex*") double2 x,
                                                      int incx,
                                                      @Cast("const cuDoubleComplex*") double2 y,
                                                      int incy,
                                                      @Cast("cuDoubleComplex*") double2 A,
                                                      int lda);

public static native @Cast("cublasStatus_t") int cublasZgerc_v2(cublasContext handle,
                                                      int m,
                                                      int n,
                                                      @Cast("const cuDoubleComplex*") double2 alpha,  
                                                      @Cast("const cuDoubleComplex*") double2 x,
                                                      int incx,
                                                      @Cast("const cuDoubleComplex*") double2 y,
                                                      int incy,
                                                      @Cast("cuDoubleComplex*") double2 A,
                                                      int lda); 
                                    
/* SYR/HER */
public static native @Cast("cublasStatus_t") int cublasSsyr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const FloatPointer alpha,  
                                                     @Const FloatPointer x,
                                                     int incx,
                                                     FloatPointer A, 
                                                     int lda);
public static native @Cast("cublasStatus_t") int cublasSsyr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const FloatBuffer alpha,  
                                                     @Const FloatBuffer x,
                                                     int incx,
                                                     FloatBuffer A, 
                                                     int lda);
public static native @Cast("cublasStatus_t") int cublasSsyr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const float[] alpha,  
                                                     @Const float[] x,
                                                     int incx,
                                                     float[] A, 
                                                     int lda);
                                    
public static native @Cast("cublasStatus_t") int cublasDsyr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const DoublePointer alpha,  
                                                     @Const DoublePointer x,
                                                     int incx,
                                                     DoublePointer A, 
                                                     int lda);
public static native @Cast("cublasStatus_t") int cublasDsyr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const DoubleBuffer alpha,  
                                                     @Const DoubleBuffer x,
                                                     int incx,
                                                     DoubleBuffer A, 
                                                     int lda);
public static native @Cast("cublasStatus_t") int cublasDsyr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const double[] alpha,  
                                                     @Const double[] x,
                                                     int incx,
                                                     double[] A, 
                                                     int lda);  
                                        
public static native @Cast("cublasStatus_t") int cublasCsyr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Cast("const cuComplex*") float2 alpha,  
                                                     @Cast("const cuComplex*") float2 x,
                                                     int incx,
                                                     @Cast("cuComplex*") float2 A, 
                                                     int lda);
                                    
public static native @Cast("cublasStatus_t") int cublasZsyr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Cast("const cuDoubleComplex*") double2 alpha,  
                                                     @Cast("const cuDoubleComplex*") double2 x,
                                                     int incx,
                                                     @Cast("cuDoubleComplex*") double2 A, 
                                                     int lda);                                          
                                                                      
public static native @Cast("cublasStatus_t") int cublasCher_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const FloatPointer alpha,  
                                                     @Cast("const cuComplex*") float2 x,
                                                     int incx,
                                                     @Cast("cuComplex*") float2 A, 
                                                     int lda);
public static native @Cast("cublasStatus_t") int cublasCher_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const FloatBuffer alpha,  
                                                     @Cast("const cuComplex*") float2 x,
                                                     int incx,
                                                     @Cast("cuComplex*") float2 A, 
                                                     int lda);
public static native @Cast("cublasStatus_t") int cublasCher_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const float[] alpha,  
                                                     @Cast("const cuComplex*") float2 x,
                                                     int incx,
                                                     @Cast("cuComplex*") float2 A, 
                                                     int lda); 
                                    
public static native @Cast("cublasStatus_t") int cublasZher_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const DoublePointer alpha,  
                                                     @Cast("const cuDoubleComplex*") double2 x,
                                                     int incx,
                                                     @Cast("cuDoubleComplex*") double2 A, 
                                                     int lda);
public static native @Cast("cublasStatus_t") int cublasZher_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const DoubleBuffer alpha,  
                                                     @Cast("const cuDoubleComplex*") double2 x,
                                                     int incx,
                                                     @Cast("cuDoubleComplex*") double2 A, 
                                                     int lda);
public static native @Cast("cublasStatus_t") int cublasZher_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const double[] alpha,  
                                                     @Cast("const cuDoubleComplex*") double2 x,
                                                     int incx,
                                                     @Cast("cuDoubleComplex*") double2 A, 
                                                     int lda); 

/* SPR/HPR */                                    
public static native @Cast("cublasStatus_t") int cublasSspr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const FloatPointer alpha,  
                                                     @Const FloatPointer x,
                                                     int incx,
                                                     FloatPointer AP);
public static native @Cast("cublasStatus_t") int cublasSspr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const FloatBuffer alpha,  
                                                     @Const FloatBuffer x,
                                                     int incx,
                                                     FloatBuffer AP);
public static native @Cast("cublasStatus_t") int cublasSspr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const float[] alpha,  
                                                     @Const float[] x,
                                                     int incx,
                                                     float[] AP);
                                    
public static native @Cast("cublasStatus_t") int cublasDspr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const DoublePointer alpha,  
                                                     @Const DoublePointer x,
                                                     int incx,
                                                     DoublePointer AP);
public static native @Cast("cublasStatus_t") int cublasDspr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const DoubleBuffer alpha,  
                                                     @Const DoubleBuffer x,
                                                     int incx,
                                                     DoubleBuffer AP);
public static native @Cast("cublasStatus_t") int cublasDspr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const double[] alpha,  
                                                     @Const double[] x,
                                                     int incx,
                                                     double[] AP);

public static native @Cast("cublasStatus_t") int cublasChpr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const FloatPointer alpha,  
                                                     @Cast("const cuComplex*") float2 x,
                                                     int incx,
                                                     @Cast("cuComplex*") float2 AP);
public static native @Cast("cublasStatus_t") int cublasChpr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const FloatBuffer alpha,  
                                                     @Cast("const cuComplex*") float2 x,
                                                     int incx,
                                                     @Cast("cuComplex*") float2 AP);
public static native @Cast("cublasStatus_t") int cublasChpr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const float[] alpha,  
                                                     @Cast("const cuComplex*") float2 x,
                                                     int incx,
                                                     @Cast("cuComplex*") float2 AP);

public static native @Cast("cublasStatus_t") int cublasZhpr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const DoublePointer alpha,  
                                                     @Cast("const cuDoubleComplex*") double2 x,
                                                     int incx,
                                                     @Cast("cuDoubleComplex*") double2 AP);
public static native @Cast("cublasStatus_t") int cublasZhpr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const DoubleBuffer alpha,  
                                                     @Cast("const cuDoubleComplex*") double2 x,
                                                     int incx,
                                                     @Cast("cuDoubleComplex*") double2 AP);
public static native @Cast("cublasStatus_t") int cublasZhpr_v2(cublasContext handle,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     int n,
                                                     @Const double[] alpha,  
                                                     @Cast("const cuDoubleComplex*") double2 x,
                                                     int incx,
                                                     @Cast("cuDoubleComplex*") double2 AP);                       
    
/* SYR2/HER2 */                                    
public static native @Cast("cublasStatus_t") int cublasSsyr2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n, 
                                                      @Const FloatPointer alpha,  
                                                      @Const FloatPointer x,
                                                      int incx,
                                                      @Const FloatPointer y,
                                                      int incy,
                                                      FloatPointer A,
                                                      int lda);
public static native @Cast("cublasStatus_t") int cublasSsyr2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n, 
                                                      @Const FloatBuffer alpha,  
                                                      @Const FloatBuffer x,
                                                      int incx,
                                                      @Const FloatBuffer y,
                                                      int incy,
                                                      FloatBuffer A,
                                                      int lda);
public static native @Cast("cublasStatus_t") int cublasSsyr2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n, 
                                                      @Const float[] alpha,  
                                                      @Const float[] x,
                                                      int incx,
                                                      @Const float[] y,
                                                      int incy,
                                                      float[] A,
                                                      int lda);
    
public static native @Cast("cublasStatus_t") int cublasDsyr2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n, 
                                                      @Const DoublePointer alpha,  
                                                      @Const DoublePointer x,
                                                      int incx,
                                                      @Const DoublePointer y,
                                                      int incy,
                                                      DoublePointer A,
                                                      int lda);
public static native @Cast("cublasStatus_t") int cublasDsyr2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n, 
                                                      @Const DoubleBuffer alpha,  
                                                      @Const DoubleBuffer x,
                                                      int incx,
                                                      @Const DoubleBuffer y,
                                                      int incy,
                                                      DoubleBuffer A,
                                                      int lda);
public static native @Cast("cublasStatus_t") int cublasDsyr2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n, 
                                                      @Const double[] alpha,  
                                                      @Const double[] x,
                                                      int incx,
                                                      @Const double[] y,
                                                      int incy,
                                                      double[] A,
                                                      int lda);
                                         
public static native @Cast("cublasStatus_t") int cublasCsyr2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo, int n, 
                                                      @Cast("const cuComplex*") float2 alpha,  
                                                      @Cast("const cuComplex*") float2 x,
                                                      int incx, 
                                                      @Cast("const cuComplex*") float2 y,
                                                      int incy, 
                                                      @Cast("cuComplex*") float2 A, 
                                                      int lda);   
    
public static native @Cast("cublasStatus_t") int cublasZsyr2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 alpha,  
                                                      @Cast("const cuDoubleComplex*") double2 x,
                                                      int incx,
                                                      @Cast("const cuDoubleComplex*") double2 y,
                                                      int incy,
                                                      @Cast("cuDoubleComplex*") double2 A,
                                                      int lda);                       
    

public static native @Cast("cublasStatus_t") int cublasCher2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo, int n, 
                                                      @Cast("const cuComplex*") float2 alpha,  
                                                      @Cast("const cuComplex*") float2 x,
                                                      int incx, 
                                                      @Cast("const cuComplex*") float2 y,
                                                      int incy, 
                                                      @Cast("cuComplex*") float2 A, 
                                                      int lda);   

public static native @Cast("cublasStatus_t") int cublasZher2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n, 
                                                      @Cast("const cuDoubleComplex*") double2 alpha,  
                                                      @Cast("const cuDoubleComplex*") double2 x,
                                                      int incx,
                                                      @Cast("const cuDoubleComplex*") double2 y,
                                                      int incy,
                                                      @Cast("cuDoubleComplex*") double2 A,
                                                      int lda);                       

/* SPR2/HPR2 */
public static native @Cast("cublasStatus_t") int cublasSspr2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n,
                                                      @Const FloatPointer alpha,  
                                                      @Const FloatPointer x,
                                                      int incx,
                                                      @Const FloatPointer y,
                                                      int incy,
                                                      FloatPointer AP);
public static native @Cast("cublasStatus_t") int cublasSspr2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n,
                                                      @Const FloatBuffer alpha,  
                                                      @Const FloatBuffer x,
                                                      int incx,
                                                      @Const FloatBuffer y,
                                                      int incy,
                                                      FloatBuffer AP);
public static native @Cast("cublasStatus_t") int cublasSspr2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n,
                                                      @Const float[] alpha,  
                                                      @Const float[] x,
                                                      int incx,
                                                      @Const float[] y,
                                                      int incy,
                                                      float[] AP);
                                                                          
public static native @Cast("cublasStatus_t") int cublasDspr2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n,
                                                      @Const DoublePointer alpha,  
                                                      @Const DoublePointer x,
                                                      int incx, 
                                                      @Const DoublePointer y,
                                                      int incy,
                                                      DoublePointer AP);
public static native @Cast("cublasStatus_t") int cublasDspr2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n,
                                                      @Const DoubleBuffer alpha,  
                                                      @Const DoubleBuffer x,
                                                      int incx, 
                                                      @Const DoubleBuffer y,
                                                      int incy,
                                                      DoubleBuffer AP);
public static native @Cast("cublasStatus_t") int cublasDspr2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n,
                                                      @Const double[] alpha,  
                                                      @Const double[] x,
                                                      int incx, 
                                                      @Const double[] y,
                                                      int incy,
                                                      double[] AP);
    

public static native @Cast("cublasStatus_t") int cublasChpr2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n,
                                                      @Cast("const cuComplex*") float2 alpha,  
                                                      @Cast("const cuComplex*") float2 x,
                                                      int incx,
                                                      @Cast("const cuComplex*") float2 y,
                                                      int incy,
                                                      @Cast("cuComplex*") float2 AP);
                                     
public static native @Cast("cublasStatus_t") int cublasZhpr2_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int n,
                                                      @Cast("const cuDoubleComplex*") double2 alpha,  
                                                      @Cast("const cuDoubleComplex*") double2 x,
                                                      int incx,
                                                      @Cast("const cuDoubleComplex*") double2 y,
                                                      int incy,
                                                      @Cast("cuDoubleComplex*") double2 AP); 

/* ---------------- CUBLAS BLAS3 functions ---------------- */

/* GEMM */
public static native @Cast("cublasStatus_t") int cublasSgemm_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Const FloatPointer alpha,  
                                                      @Const FloatPointer A, 
                                                      int lda,
                                                      @Const FloatPointer B,
                                                      int ldb, 
                                                      @Const FloatPointer beta,  
                                                      FloatPointer C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasSgemm_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Const FloatBuffer alpha,  
                                                      @Const FloatBuffer A, 
                                                      int lda,
                                                      @Const FloatBuffer B,
                                                      int ldb, 
                                                      @Const FloatBuffer beta,  
                                                      FloatBuffer C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasSgemm_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Const float[] alpha,  
                                                      @Const float[] A, 
                                                      int lda,
                                                      @Const float[] B,
                                                      int ldb, 
                                                      @Const float[] beta,  
                                                      float[] C,
                                                      int ldc);
    
public static native @Cast("cublasStatus_t") int cublasDgemm_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Const DoublePointer alpha,  
                                                      @Const DoublePointer A, 
                                                      int lda,
                                                      @Const DoublePointer B,
                                                      int ldb, 
                                                      @Const DoublePointer beta,  
                                                      DoublePointer C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasDgemm_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Const DoubleBuffer alpha,  
                                                      @Const DoubleBuffer A, 
                                                      int lda,
                                                      @Const DoubleBuffer B,
                                                      int ldb, 
                                                      @Const DoubleBuffer beta,  
                                                      DoubleBuffer C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasDgemm_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Const double[] alpha,  
                                                      @Const double[] A, 
                                                      int lda,
                                                      @Const double[] B,
                                                      int ldb, 
                                                      @Const double[] beta,  
                                                      double[] C,
                                                      int ldc);
                                        
public static native @Cast("cublasStatus_t") int cublasCgemm_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Cast("const cuComplex*") float2 alpha,  
                                                      @Cast("const cuComplex*") float2 A, 
                                                      int lda,
                                                      @Cast("const cuComplex*") float2 B,
                                                      int ldb, 
                                                      @Cast("const cuComplex*") float2 beta,  
                                                      @Cast("cuComplex*") float2 C,
                                                      int ldc);
                                                      
public static native @Cast("cublasStatus_t") int cublasCgemm3m(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Cast("const cuComplex*") float2 alpha,  
                                                      @Cast("const cuComplex*") float2 A, 
                                                      int lda,
                                                      @Cast("const cuComplex*") float2 B,
                                                      int ldb, 
                                                      @Cast("const cuComplex*") float2 beta,  
                                                      @Cast("cuComplex*") float2 C,
                                                      int ldc);                                                      
 public static native @Cast("cublasStatus_t") int cublasCgemm3mEx(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa, @Cast("cublasOperation_t") int transb,  
                                                      int m, int n, int k, 
                                                      @Cast("const cuComplex*") float2 alpha, 
                                                      @Const Pointer A, 
                                                      @Cast("cudaDataType") int Atype, 
                                                      int lda, 
                                                      @Const Pointer B, 
                                                      @Cast("cudaDataType") int Btype, 
                                                      int ldb,
                                                      @Cast("const cuComplex*") float2 beta, 
                                                      Pointer C, 
                                                      @Cast("cudaDataType") int Ctype, 
                                                      int ldc);
                                       

public static native @Cast("cublasStatus_t") int cublasZgemm_v2(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Cast("const cuDoubleComplex*") double2 alpha,  
                                                      @Cast("const cuDoubleComplex*") double2 A, 
                                                      int lda,
                                                      @Cast("const cuDoubleComplex*") double2 B,
                                                      int ldb, 
                                                      @Cast("const cuDoubleComplex*") double2 beta,  
                                                      @Cast("cuDoubleComplex*") double2 C,
                                                      int ldc);     
                                                      
public static native @Cast("cublasStatus_t") int cublasZgemm3m(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Cast("const cuDoubleComplex*") double2 alpha,  
                                                      @Cast("const cuDoubleComplex*") double2 A, 
                                                      int lda,
                                                      @Cast("const cuDoubleComplex*") double2 B,
                                                      int ldb, 
                                                      @Cast("const cuDoubleComplex*") double2 beta,  
                                                      @Cast("cuDoubleComplex*") double2 C,
                                                      int ldc);                                                                   
                                                      
// #if defined(__cplusplus)
public static native @Cast("cublasStatus_t") int cublasHgemm(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Const __half alpha,  
                                                      @Const __half A, 
                                                      int lda,
                                                      @Const __half B,
                                                      int ldb, 
                                                      @Const __half beta,  
                                                      __half C,
                                                      int ldc);             
// #endif
/* IO in FP16/FP32, computation in float */                                                      
public static native @Cast("cublasStatus_t") int cublasSgemmEx(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Const FloatPointer alpha,  
                                                      @Const Pointer A, 
                                                      @Cast("cudaDataType") int Atype,
                                                      int lda,
                                                      @Const Pointer B,
                                                      @Cast("cudaDataType") int Btype,
                                                      int ldb, 
                                                      @Const FloatPointer beta,  
                                                      Pointer C,
                                                      @Cast("cudaDataType") int Ctype,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasSgemmEx(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Const FloatBuffer alpha,  
                                                      @Const Pointer A, 
                                                      @Cast("cudaDataType") int Atype,
                                                      int lda,
                                                      @Const Pointer B,
                                                      @Cast("cudaDataType") int Btype,
                                                      int ldb, 
                                                      @Const FloatBuffer beta,  
                                                      Pointer C,
                                                      @Cast("cudaDataType") int Ctype,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasSgemmEx(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Const float[] alpha,  
                                                      @Const Pointer A, 
                                                      @Cast("cudaDataType") int Atype,
                                                      int lda,
                                                      @Const Pointer B,
                                                      @Cast("cudaDataType") int Btype,
                                                      int ldb, 
                                                      @Const float[] beta,  
                                                      Pointer C,
                                                      @Cast("cudaDataType") int Ctype,
                                                      int ldc); 
                                       
public static native @Cast("cublasStatus_t") int cublasGemmEx(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Const Pointer alpha,  
                                                      @Const Pointer A, 
                                                      @Cast("cudaDataType") int Atype,
                                                      int lda,
                                                      @Const Pointer B,
                                                      @Cast("cudaDataType") int Btype,
                                                      int ldb, 
                                                      @Const Pointer beta,  
                                                      Pointer C,
                                                      @Cast("cudaDataType") int Ctype,
                                                      int ldc,
                                                      @Cast("cublasComputeType_t") int computeType,
                                                      @Cast("cublasGemmAlgo_t") int algo); 
 
/* IO in Int8 complex/cuComplex, computation in cuComplex */                                                      
public static native @Cast("cublasStatus_t") int cublasCgemmEx(cublasContext handle, 
                                                     @Cast("cublasOperation_t") int transa, @Cast("cublasOperation_t") int transb,  
                                                     int m, int n, int k, 
                                                     @Cast("const cuComplex*") float2 alpha, 
                                                     @Const Pointer A, 
                                                     @Cast("cudaDataType") int Atype, 
                                                     int lda, 
                                                     @Const Pointer B, 
                                                     @Cast("cudaDataType") int Btype, 
                                                     int ldb,
                                                     @Cast("const cuComplex*") float2 beta, 
                                                     Pointer C, 
                                                     @Cast("cudaDataType") int Ctype, 
                                                     int ldc);
                                                                                                                                                                                                                                                                                                   
public static native @Cast("cublasStatus_t") int cublasUint8gemmBias(cublasContext handle, 
                                                           @Cast("cublasOperation_t") int transa, @Cast("cublasOperation_t") int transb, @Cast("cublasOperation_t") int transc,  
                                                           int m, int n, int k, 
                                                           @Cast("const unsigned char*") BytePointer A, int A_bias, int lda, 
                                                           @Cast("const unsigned char*") BytePointer B, int B_bias, int ldb,
                                                                 @Cast("unsigned char*") BytePointer C, int C_bias, int ldc,
                                                           int C_mult, int C_shift);
public static native @Cast("cublasStatus_t") int cublasUint8gemmBias(cublasContext handle, 
                                                           @Cast("cublasOperation_t") int transa, @Cast("cublasOperation_t") int transb, @Cast("cublasOperation_t") int transc,  
                                                           int m, int n, int k, 
                                                           @Cast("const unsigned char*") ByteBuffer A, int A_bias, int lda, 
                                                           @Cast("const unsigned char*") ByteBuffer B, int B_bias, int ldb,
                                                                 @Cast("unsigned char*") ByteBuffer C, int C_bias, int ldc,
                                                           int C_mult, int C_shift);
public static native @Cast("cublasStatus_t") int cublasUint8gemmBias(cublasContext handle, 
                                                           @Cast("cublasOperation_t") int transa, @Cast("cublasOperation_t") int transb, @Cast("cublasOperation_t") int transc,  
                                                           int m, int n, int k, 
                                                           @Cast("const unsigned char*") byte[] A, int A_bias, int lda, 
                                                           @Cast("const unsigned char*") byte[] B, int B_bias, int ldb,
                                                                 @Cast("unsigned char*") byte[] C, int C_bias, int ldc,
                                                           int C_mult, int C_shift);
                                                                                       
/* SYRK */
public static native @Cast("cublasStatus_t") int cublasSsyrk_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Const FloatPointer alpha,  
                                                      @Const FloatPointer A,
                                                      int lda,
                                                      @Const FloatPointer beta,  
                                                      FloatPointer C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasSsyrk_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Const FloatBuffer alpha,  
                                                      @Const FloatBuffer A,
                                                      int lda,
                                                      @Const FloatBuffer beta,  
                                                      FloatBuffer C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasSsyrk_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Const float[] alpha,  
                                                      @Const float[] A,
                                                      int lda,
                                                      @Const float[] beta,  
                                                      float[] C,
                                                      int ldc);
                                     
public static native @Cast("cublasStatus_t") int cublasDsyrk_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Const DoublePointer alpha,  
                                                      @Const DoublePointer A,
                                                      int lda,
                                                      @Const DoublePointer beta,  
                                                      DoublePointer C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasDsyrk_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Const DoubleBuffer alpha,  
                                                      @Const DoubleBuffer A,
                                                      int lda,
                                                      @Const DoubleBuffer beta,  
                                                      DoubleBuffer C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasDsyrk_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Const double[] alpha,  
                                                      @Const double[] A,
                                                      int lda,
                                                      @Const double[] beta,  
                                                      double[] C,
                                                      int ldc);   
                                     
public static native @Cast("cublasStatus_t") int cublasCsyrk_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Cast("const cuComplex*") float2 alpha,  
                                                      @Cast("const cuComplex*") float2 A,
                                                      int lda,
                                                      @Cast("const cuComplex*") float2 beta,  
                                                      @Cast("cuComplex*") float2 C,
                                                      int ldc);         
                                     
public static native @Cast("cublasStatus_t") int cublasZsyrk_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Cast("const cuDoubleComplex*") double2 alpha,  
                                                      @Cast("const cuDoubleComplex*") double2 A,
                                                      int lda,
                                                      @Cast("const cuDoubleComplex*") double2 beta,  
                                                      @Cast("cuDoubleComplex*") double2 C, 
                                                      int ldc);
/* IO in Int8 complex/cuComplex, computation in cuComplex */  
public static native @Cast("cublasStatus_t") int cublasCsyrkEx( cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Cast("const cuComplex*") float2 alpha,  
                                                      @Const Pointer A, 
                                                      @Cast("cudaDataType") int Atype, 
                                                      int lda,
                                                      @Cast("const cuComplex*") float2 beta,  
                                                      Pointer C, 
                                                      @Cast("cudaDataType") int Ctype, 
                                                      int ldc);  
                                                      
/* IO in Int8 complex/cuComplex, computation in cuComplex, Gaussian math */                                                          
public static native @Cast("cublasStatus_t") int cublasCsyrk3mEx(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo, 
                                                      @Cast("cublasOperation_t") int trans, 
                                                      int n, 
                                                      int k,
                                                      @Cast("const cuComplex*") float2 alpha, 
                                                      @Const Pointer A, 
                                                      @Cast("cudaDataType") int Atype, 
                                                      int lda,
                                                      @Cast("const cuComplex*") float2 beta, 
                                                      Pointer C, 
                                                      @Cast("cudaDataType") int Ctype, 
                                                      int ldc);
                                                      
/* HERK */
public static native @Cast("cublasStatus_t") int cublasCherk_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Const FloatPointer alpha,  
                                                      @Cast("const cuComplex*") float2 A,
                                                      int lda,
                                                      @Const FloatPointer beta,  
                                                      @Cast("cuComplex*") float2 C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasCherk_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Const FloatBuffer alpha,  
                                                      @Cast("const cuComplex*") float2 A,
                                                      int lda,
                                                      @Const FloatBuffer beta,  
                                                      @Cast("cuComplex*") float2 C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasCherk_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Const float[] alpha,  
                                                      @Cast("const cuComplex*") float2 A,
                                                      int lda,
                                                      @Const float[] beta,  
                                                      @Cast("cuComplex*") float2 C,
                                                      int ldc);
    
public static native @Cast("cublasStatus_t") int cublasZherk_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Const DoublePointer alpha,  
                                                      @Cast("const cuDoubleComplex*") double2 A,
                                                      int lda,
                                                      @Const DoublePointer beta,  
                                                      @Cast("cuDoubleComplex*") double2 C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasZherk_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Const DoubleBuffer alpha,  
                                                      @Cast("const cuDoubleComplex*") double2 A,
                                                      int lda,
                                                      @Const DoubleBuffer beta,  
                                                      @Cast("cuDoubleComplex*") double2 C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasZherk_v2(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Const double[] alpha,  
                                                      @Cast("const cuDoubleComplex*") double2 A,
                                                      int lda,
                                                      @Const double[] beta,  
                                                      @Cast("cuDoubleComplex*") double2 C,
                                                      int ldc);  
                                                        
/* IO in Int8 complex/cuComplex, computation in cuComplex */                                                       
public static native @Cast("cublasStatus_t") int cublasCherkEx(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Const FloatPointer alpha,  
                                                      @Const Pointer A, 
                                                      @Cast("cudaDataType") int Atype,
                                                      int lda,
                                                      @Const FloatPointer beta,  
                                                      Pointer C,
                                                      @Cast("cudaDataType") int Ctype,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasCherkEx(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Const FloatBuffer alpha,  
                                                      @Const Pointer A, 
                                                      @Cast("cudaDataType") int Atype,
                                                      int lda,
                                                      @Const FloatBuffer beta,  
                                                      Pointer C,
                                                      @Cast("cudaDataType") int Ctype,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasCherkEx(cublasContext handle,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      int n,
                                                      int k,
                                                      @Const float[] alpha,  
                                                      @Const Pointer A, 
                                                      @Cast("cudaDataType") int Atype,
                                                      int lda,
                                                      @Const float[] beta,  
                                                      Pointer C,
                                                      @Cast("cudaDataType") int Ctype,
                                                      int ldc);
                                                      
/* IO in Int8 complex/cuComplex, computation in cuComplex, Gaussian math */                                                          
public static native @Cast("cublasStatus_t") int cublasCherk3mEx(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo, 
                                                       @Cast("cublasOperation_t") int trans, 
                                                       int n, 
                                                       int k,
                                                       @Const FloatPointer alpha, 
                                                       @Const Pointer A, @Cast("cudaDataType") int Atype, 
                                                       int lda,
                                                       @Const FloatPointer beta, 
                                                       Pointer C, 
                                                       @Cast("cudaDataType") int Ctype, 
                                                       int ldc);
public static native @Cast("cublasStatus_t") int cublasCherk3mEx(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo, 
                                                       @Cast("cublasOperation_t") int trans, 
                                                       int n, 
                                                       int k,
                                                       @Const FloatBuffer alpha, 
                                                       @Const Pointer A, @Cast("cudaDataType") int Atype, 
                                                       int lda,
                                                       @Const FloatBuffer beta, 
                                                       Pointer C, 
                                                       @Cast("cudaDataType") int Ctype, 
                                                       int ldc);
public static native @Cast("cublasStatus_t") int cublasCherk3mEx(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo, 
                                                       @Cast("cublasOperation_t") int trans, 
                                                       int n, 
                                                       int k,
                                                       @Const float[] alpha, 
                                                       @Const Pointer A, @Cast("cudaDataType") int Atype, 
                                                       int lda,
                                                       @Const float[] beta, 
                                                       Pointer C, 
                                                       @Cast("cudaDataType") int Ctype, 
                                                       int ldc);
                                                       
                                                       
                                                                                                             
/* SYR2K */                                     
public static native @Cast("cublasStatus_t") int cublasSsyr2k_v2(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo,
                                                       @Cast("cublasOperation_t") int trans,
                                                       int n,
                                                       int k,
                                                       @Const FloatPointer alpha,  
                                                       @Const FloatPointer A,
                                                       int lda,
                                                       @Const FloatPointer B,
                                                       int ldb,
                                                       @Const FloatPointer beta,  
                                                       FloatPointer C,
                                                       int ldc);
public static native @Cast("cublasStatus_t") int cublasSsyr2k_v2(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo,
                                                       @Cast("cublasOperation_t") int trans,
                                                       int n,
                                                       int k,
                                                       @Const FloatBuffer alpha,  
                                                       @Const FloatBuffer A,
                                                       int lda,
                                                       @Const FloatBuffer B,
                                                       int ldb,
                                                       @Const FloatBuffer beta,  
                                                       FloatBuffer C,
                                                       int ldc);
public static native @Cast("cublasStatus_t") int cublasSsyr2k_v2(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo,
                                                       @Cast("cublasOperation_t") int trans,
                                                       int n,
                                                       int k,
                                                       @Const float[] alpha,  
                                                       @Const float[] A,
                                                       int lda,
                                                       @Const float[] B,
                                                       int ldb,
                                                       @Const float[] beta,  
                                                       float[] C,
                                                       int ldc);  
                                      
public static native @Cast("cublasStatus_t") int cublasDsyr2k_v2(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo,
                                                       @Cast("cublasOperation_t") int trans,
                                                       int n,
                                                       int k,
                                                       @Const DoublePointer alpha,  
                                                       @Const DoublePointer A,
                                                       int lda,
                                                       @Const DoublePointer B,
                                                       int ldb,
                                                       @Const DoublePointer beta,  
                                                       DoublePointer C,
                                                       int ldc);
public static native @Cast("cublasStatus_t") int cublasDsyr2k_v2(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo,
                                                       @Cast("cublasOperation_t") int trans,
                                                       int n,
                                                       int k,
                                                       @Const DoubleBuffer alpha,  
                                                       @Const DoubleBuffer A,
                                                       int lda,
                                                       @Const DoubleBuffer B,
                                                       int ldb,
                                                       @Const DoubleBuffer beta,  
                                                       DoubleBuffer C,
                                                       int ldc);
public static native @Cast("cublasStatus_t") int cublasDsyr2k_v2(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo,
                                                       @Cast("cublasOperation_t") int trans,
                                                       int n,
                                                       int k,
                                                       @Const double[] alpha,  
                                                       @Const double[] A,
                                                       int lda,
                                                       @Const double[] B,
                                                       int ldb,
                                                       @Const double[] beta,  
                                                       double[] C,
                                                       int ldc);
                                      
public static native @Cast("cublasStatus_t") int cublasCsyr2k_v2(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo,
                                                       @Cast("cublasOperation_t") int trans,
                                                       int n,
                                                       int k,
                                                       @Cast("const cuComplex*") float2 alpha,  
                                                       @Cast("const cuComplex*") float2 A,
                                                       int lda,
                                                       @Cast("const cuComplex*") float2 B,
                                                       int ldb,
                                                       @Cast("const cuComplex*") float2 beta,  
                                                       @Cast("cuComplex*") float2 C,
                                                       int ldc);
                                      
public static native @Cast("cublasStatus_t") int cublasZsyr2k_v2(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo,
                                                       @Cast("cublasOperation_t") int trans,
                                                       int n,
                                                       int k,
                                                       @Cast("const cuDoubleComplex*") double2 alpha,  
                                                       @Cast("const cuDoubleComplex*") double2 A,
                                                       int lda,
                                                       @Cast("const cuDoubleComplex*") double2 B,
                                                       int ldb,
                                                       @Cast("const cuDoubleComplex*") double2 beta,  
                                                       @Cast("cuDoubleComplex*") double2 C,
                                                       int ldc);  
/* HER2K */                                       
public static native @Cast("cublasStatus_t") int cublasCher2k_v2(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo,
                                                       @Cast("cublasOperation_t") int trans,
                                                       int n,
                                                       int k,
                                                       @Cast("const cuComplex*") float2 alpha,  
                                                       @Cast("const cuComplex*") float2 A,
                                                       int lda,
                                                       @Cast("const cuComplex*") float2 B,
                                                       int ldb,
                                                       @Const FloatPointer beta,  
                                                       @Cast("cuComplex*") float2 C,
                                                       int ldc);
public static native @Cast("cublasStatus_t") int cublasCher2k_v2(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo,
                                                       @Cast("cublasOperation_t") int trans,
                                                       int n,
                                                       int k,
                                                       @Cast("const cuComplex*") float2 alpha,  
                                                       @Cast("const cuComplex*") float2 A,
                                                       int lda,
                                                       @Cast("const cuComplex*") float2 B,
                                                       int ldb,
                                                       @Const FloatBuffer beta,  
                                                       @Cast("cuComplex*") float2 C,
                                                       int ldc);
public static native @Cast("cublasStatus_t") int cublasCher2k_v2(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo,
                                                       @Cast("cublasOperation_t") int trans,
                                                       int n,
                                                       int k,
                                                       @Cast("const cuComplex*") float2 alpha,  
                                                       @Cast("const cuComplex*") float2 A,
                                                       int lda,
                                                       @Cast("const cuComplex*") float2 B,
                                                       int ldb,
                                                       @Const float[] beta,  
                                                       @Cast("cuComplex*") float2 C,
                                                       int ldc);  
                                      
public static native @Cast("cublasStatus_t") int cublasZher2k_v2(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo,
                                                       @Cast("cublasOperation_t") int trans, 
                                                       int n,
                                                       int k,
                                                       @Cast("const cuDoubleComplex*") double2 alpha,  
                                                       @Cast("const cuDoubleComplex*") double2 A, 
                                                       int lda,
                                                       @Cast("const cuDoubleComplex*") double2 B,
                                                       int ldb,
                                                       @Const DoublePointer beta,  
                                                       @Cast("cuDoubleComplex*") double2 C,
                                                       int ldc);
public static native @Cast("cublasStatus_t") int cublasZher2k_v2(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo,
                                                       @Cast("cublasOperation_t") int trans, 
                                                       int n,
                                                       int k,
                                                       @Cast("const cuDoubleComplex*") double2 alpha,  
                                                       @Cast("const cuDoubleComplex*") double2 A, 
                                                       int lda,
                                                       @Cast("const cuDoubleComplex*") double2 B,
                                                       int ldb,
                                                       @Const DoubleBuffer beta,  
                                                       @Cast("cuDoubleComplex*") double2 C,
                                                       int ldc);
public static native @Cast("cublasStatus_t") int cublasZher2k_v2(cublasContext handle,
                                                       @Cast("cublasFillMode_t") int uplo,
                                                       @Cast("cublasOperation_t") int trans, 
                                                       int n,
                                                       int k,
                                                       @Cast("const cuDoubleComplex*") double2 alpha,  
                                                       @Cast("const cuDoubleComplex*") double2 A, 
                                                       int lda,
                                                       @Cast("const cuDoubleComplex*") double2 B,
                                                       int ldb,
                                                       @Const double[] beta,  
                                                       @Cast("cuDoubleComplex*") double2 C,
                                                       int ldc);     
/* SYRKX : eXtended SYRK*/
public static native @Cast("cublasStatus_t") int cublasSsyrkx(cublasContext handle,
                                                    @Cast("cublasFillMode_t") int uplo,
                                                    @Cast("cublasOperation_t") int trans,
                                                    int n,
                                                    int k,
                                                    @Const FloatPointer alpha, 
                                                    @Const FloatPointer A,
                                                    int lda,
                                                    @Const FloatPointer B,
                                                    int ldb,
                                                    @Const FloatPointer beta, 
                                                    FloatPointer C,
                                                    int ldc);
public static native @Cast("cublasStatus_t") int cublasSsyrkx(cublasContext handle,
                                                    @Cast("cublasFillMode_t") int uplo,
                                                    @Cast("cublasOperation_t") int trans,
                                                    int n,
                                                    int k,
                                                    @Const FloatBuffer alpha, 
                                                    @Const FloatBuffer A,
                                                    int lda,
                                                    @Const FloatBuffer B,
                                                    int ldb,
                                                    @Const FloatBuffer beta, 
                                                    FloatBuffer C,
                                                    int ldc);
public static native @Cast("cublasStatus_t") int cublasSsyrkx(cublasContext handle,
                                                    @Cast("cublasFillMode_t") int uplo,
                                                    @Cast("cublasOperation_t") int trans,
                                                    int n,
                                                    int k,
                                                    @Const float[] alpha, 
                                                    @Const float[] A,
                                                    int lda,
                                                    @Const float[] B,
                                                    int ldb,
                                                    @Const float[] beta, 
                                                    float[] C,
                                                    int ldc);
                                                   
public static native @Cast("cublasStatus_t") int cublasDsyrkx(cublasContext handle,
                                                    @Cast("cublasFillMode_t") int uplo,
                                                    @Cast("cublasOperation_t") int trans,
                                                    int n,
                                                    int k,
                                                    @Const DoublePointer alpha, 
                                                    @Const DoublePointer A,
                                                    int lda,
                                                    @Const DoublePointer B,
                                                    int ldb,
                                                    @Const DoublePointer beta, 
                                                    DoublePointer C,
                                                    int ldc);
public static native @Cast("cublasStatus_t") int cublasDsyrkx(cublasContext handle,
                                                    @Cast("cublasFillMode_t") int uplo,
                                                    @Cast("cublasOperation_t") int trans,
                                                    int n,
                                                    int k,
                                                    @Const DoubleBuffer alpha, 
                                                    @Const DoubleBuffer A,
                                                    int lda,
                                                    @Const DoubleBuffer B,
                                                    int ldb,
                                                    @Const DoubleBuffer beta, 
                                                    DoubleBuffer C,
                                                    int ldc);
public static native @Cast("cublasStatus_t") int cublasDsyrkx(cublasContext handle,
                                                    @Cast("cublasFillMode_t") int uplo,
                                                    @Cast("cublasOperation_t") int trans,
                                                    int n,
                                                    int k,
                                                    @Const double[] alpha, 
                                                    @Const double[] A,
                                                    int lda,
                                                    @Const double[] B,
                                                    int ldb,
                                                    @Const double[] beta, 
                                                    double[] C,
                                                    int ldc);
                                                    
public static native @Cast("cublasStatus_t") int cublasCsyrkx(cublasContext handle,
                                                    @Cast("cublasFillMode_t") int uplo,
                                                    @Cast("cublasOperation_t") int trans,
                                                    int n,
                                                    int k,
                                                    @Cast("const cuComplex*") float2 alpha, 
                                                    @Cast("const cuComplex*") float2 A,
                                                    int lda,
                                                    @Cast("const cuComplex*") float2 B,
                                                    int ldb,
                                                    @Cast("const cuComplex*") float2 beta, 
                                                    @Cast("cuComplex*") float2 C, 
                                                    int ldc);
                                                    
public static native @Cast("cublasStatus_t") int cublasZsyrkx(cublasContext handle,
                                                    @Cast("cublasFillMode_t") int uplo, 
                                                    @Cast("cublasOperation_t") int trans,
                                                    int n,
                                                    int k,
                                                    @Cast("const cuDoubleComplex*") double2 alpha, 
                                                    @Cast("const cuDoubleComplex*") double2 A,
                                                    int lda,
                                                    @Cast("const cuDoubleComplex*") double2 B,
                                                    int ldb,
                                                    @Cast("const cuDoubleComplex*") double2 beta, 
                                                    @Cast("cuDoubleComplex*") double2 C, 
                                                    int ldc);
/* HERKX : eXtended HERK */
public static native @Cast("cublasStatus_t") int cublasCherkx(cublasContext handle,
                                                    @Cast("cublasFillMode_t") int uplo,
                                                    @Cast("cublasOperation_t") int trans,
                                                    int n,
                                                    int k,
                                                    @Cast("const cuComplex*") float2 alpha, 
                                                    @Cast("const cuComplex*") float2 A,
                                                    int lda,
                                                    @Cast("const cuComplex*") float2 B,
                                                    int ldb,
                                                    @Const FloatPointer beta, 
                                                    @Cast("cuComplex*") float2 C,
                                                    int ldc);
public static native @Cast("cublasStatus_t") int cublasCherkx(cublasContext handle,
                                                    @Cast("cublasFillMode_t") int uplo,
                                                    @Cast("cublasOperation_t") int trans,
                                                    int n,
                                                    int k,
                                                    @Cast("const cuComplex*") float2 alpha, 
                                                    @Cast("const cuComplex*") float2 A,
                                                    int lda,
                                                    @Cast("const cuComplex*") float2 B,
                                                    int ldb,
                                                    @Const FloatBuffer beta, 
                                                    @Cast("cuComplex*") float2 C,
                                                    int ldc);
public static native @Cast("cublasStatus_t") int cublasCherkx(cublasContext handle,
                                                    @Cast("cublasFillMode_t") int uplo,
                                                    @Cast("cublasOperation_t") int trans,
                                                    int n,
                                                    int k,
                                                    @Cast("const cuComplex*") float2 alpha, 
                                                    @Cast("const cuComplex*") float2 A,
                                                    int lda,
                                                    @Cast("const cuComplex*") float2 B,
                                                    int ldb,
                                                    @Const float[] beta, 
                                                    @Cast("cuComplex*") float2 C,
                                                    int ldc);
                                                
public static native @Cast("cublasStatus_t") int cublasZherkx(cublasContext handle,
                                                    @Cast("cublasFillMode_t") int uplo,
                                                    @Cast("cublasOperation_t") int trans,
                                                    int n,
                                                    int k,
                                                    @Cast("const cuDoubleComplex*") double2 alpha, 
                                                    @Cast("const cuDoubleComplex*") double2 A,
                                                    int lda,
                                                    @Cast("const cuDoubleComplex*") double2 B,
                                                    int ldb,
                                                    @Const DoublePointer beta, 
                                                    @Cast("cuDoubleComplex*") double2 C,
                                                    int ldc);
public static native @Cast("cublasStatus_t") int cublasZherkx(cublasContext handle,
                                                    @Cast("cublasFillMode_t") int uplo,
                                                    @Cast("cublasOperation_t") int trans,
                                                    int n,
                                                    int k,
                                                    @Cast("const cuDoubleComplex*") double2 alpha, 
                                                    @Cast("const cuDoubleComplex*") double2 A,
                                                    int lda,
                                                    @Cast("const cuDoubleComplex*") double2 B,
                                                    int ldb,
                                                    @Const DoubleBuffer beta, 
                                                    @Cast("cuDoubleComplex*") double2 C,
                                                    int ldc);
public static native @Cast("cublasStatus_t") int cublasZherkx(cublasContext handle,
                                                    @Cast("cublasFillMode_t") int uplo,
                                                    @Cast("cublasOperation_t") int trans,
                                                    int n,
                                                    int k,
                                                    @Cast("const cuDoubleComplex*") double2 alpha, 
                                                    @Cast("const cuDoubleComplex*") double2 A,
                                                    int lda,
                                                    @Cast("const cuDoubleComplex*") double2 B,
                                                    int ldb,
                                                    @Const double[] beta, 
                                                    @Cast("cuDoubleComplex*") double2 C,
                                                    int ldc);
/* SYMM */
public static native @Cast("cublasStatus_t") int cublasSsymm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int m,
                                                      int n,
                                                      @Const FloatPointer alpha,  
                                                      @Const FloatPointer A,
                                                      int lda,
                                                      @Const FloatPointer B,
                                                      int ldb,
                                                      @Const FloatPointer beta,  
                                                      FloatPointer C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasSsymm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int m,
                                                      int n,
                                                      @Const FloatBuffer alpha,  
                                                      @Const FloatBuffer A,
                                                      int lda,
                                                      @Const FloatBuffer B,
                                                      int ldb,
                                                      @Const FloatBuffer beta,  
                                                      FloatBuffer C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasSsymm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int m,
                                                      int n,
                                                      @Const float[] alpha,  
                                                      @Const float[] A,
                                                      int lda,
                                                      @Const float[] B,
                                                      int ldb,
                                                      @Const float[] beta,  
                                                      float[] C,
                                                      int ldc);
                                     
public static native @Cast("cublasStatus_t") int cublasDsymm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int m, 
                                                      int n,
                                                      @Const DoublePointer alpha,  
                                                      @Const DoublePointer A,
                                                      int lda,
                                                      @Const DoublePointer B,
                                                      int ldb,
                                                      @Const DoublePointer beta,  
                                                      DoublePointer C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasDsymm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int m, 
                                                      int n,
                                                      @Const DoubleBuffer alpha,  
                                                      @Const DoubleBuffer A,
                                                      int lda,
                                                      @Const DoubleBuffer B,
                                                      int ldb,
                                                      @Const DoubleBuffer beta,  
                                                      DoubleBuffer C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasDsymm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int m, 
                                                      int n,
                                                      @Const double[] alpha,  
                                                      @Const double[] A,
                                                      int lda,
                                                      @Const double[] B,
                                                      int ldb,
                                                      @Const double[] beta,  
                                                      double[] C,
                                                      int ldc);                                     

public static native @Cast("cublasStatus_t") int cublasCsymm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int m,
                                                      int n,
                                                      @Cast("const cuComplex*") float2 alpha,  
                                                      @Cast("const cuComplex*") float2 A,
                                                      int lda,
                                                      @Cast("const cuComplex*") float2 B,
                                                      int ldb,
                                                      @Cast("const cuComplex*") float2 beta,  
                                                      @Cast("cuComplex*") float2 C,
                                                      int ldc);
                                                   
public static native @Cast("cublasStatus_t") int cublasZsymm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int m,
                                                      int n,
                                                      @Cast("const cuDoubleComplex*") double2 alpha,  
                                                      @Cast("const cuDoubleComplex*") double2 A,
                                                      int lda,
                                                      @Cast("const cuDoubleComplex*") double2 B,
                                                      int ldb,
                                                      @Cast("const cuDoubleComplex*") double2 beta,  
                                                      @Cast("cuDoubleComplex*") double2 C,
                                                      int ldc);   
                                     
/* HEMM */
public static native @Cast("cublasStatus_t") int cublasChemm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int m,
                                                      int n,
                                                      @Cast("const cuComplex*") float2 alpha,  
                                                      @Cast("const cuComplex*") float2 A,
                                                      int lda,
                                                      @Cast("const cuComplex*") float2 B,
                                                      int ldb,
                                                      @Cast("const cuComplex*") float2 beta,  
                                                      @Cast("cuComplex*") float2 C, 
                                                      int ldc); 

public static native @Cast("cublasStatus_t") int cublasZhemm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      int m,
                                                      int n,
                                                      @Cast("const cuDoubleComplex*") double2 alpha,  
                                                      @Cast("const cuDoubleComplex*") double2 A,
                                                      int lda,
                                                      @Cast("const cuDoubleComplex*") double2 B,
                                                      int ldb,
                                                      @Cast("const cuDoubleComplex*") double2 beta,  
                                                      @Cast("cuDoubleComplex*") double2 C,
                                                      int ldc); 
    
/* TRSM */                                                                         
public static native @Cast("cublasStatus_t") int cublasStrsm_v2(cublasContext handle, 
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      @Cast("cublasDiagType_t") int diag,
                                                      int m,
                                                      int n,
                                                      @Const FloatPointer alpha,  
                                                      @Const FloatPointer A,
                                                      int lda,
                                                      FloatPointer B,
                                                      int ldb);
public static native @Cast("cublasStatus_t") int cublasStrsm_v2(cublasContext handle, 
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      @Cast("cublasDiagType_t") int diag,
                                                      int m,
                                                      int n,
                                                      @Const FloatBuffer alpha,  
                                                      @Const FloatBuffer A,
                                                      int lda,
                                                      FloatBuffer B,
                                                      int ldb);
public static native @Cast("cublasStatus_t") int cublasStrsm_v2(cublasContext handle, 
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      @Cast("cublasDiagType_t") int diag,
                                                      int m,
                                                      int n,
                                                      @Const float[] alpha,  
                                                      @Const float[] A,
                                                      int lda,
                                                      float[] B,
                                                      int ldb);
    

public static native @Cast("cublasStatus_t") int cublasDtrsm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      @Cast("cublasDiagType_t") int diag,
                                                      int m,
                                                      int n,
                                                      @Const DoublePointer alpha,  
                                                      @Const DoublePointer A, 
                                                      int lda, 
                                                      DoublePointer B,
                                                      int ldb);
public static native @Cast("cublasStatus_t") int cublasDtrsm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      @Cast("cublasDiagType_t") int diag,
                                                      int m,
                                                      int n,
                                                      @Const DoubleBuffer alpha,  
                                                      @Const DoubleBuffer A, 
                                                      int lda, 
                                                      DoubleBuffer B,
                                                      int ldb);
public static native @Cast("cublasStatus_t") int cublasDtrsm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      @Cast("cublasDiagType_t") int diag,
                                                      int m,
                                                      int n,
                                                      @Const double[] alpha,  
                                                      @Const double[] A, 
                                                      int lda, 
                                                      double[] B,
                                                      int ldb);
    
public static native @Cast("cublasStatus_t") int cublasCtrsm_v2(cublasContext handle,
                                                     @Cast("cublasSideMode_t") int side,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     @Cast("cublasOperation_t") int trans,
                                                     @Cast("cublasDiagType_t") int diag,
                                                     int m,
                                                     int n,
                                                     @Cast("const cuComplex*") float2 alpha,  
                                                     @Cast("const cuComplex*") float2 A,
                                                     int lda,
                                                     @Cast("cuComplex*") float2 B,
                                                     int ldb);
                  
public static native @Cast("cublasStatus_t") int cublasZtrsm_v2(cublasContext handle, 
                                                     @Cast("cublasSideMode_t") int side,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     @Cast("cublasOperation_t") int trans,
                                                     @Cast("cublasDiagType_t") int diag,
                                                     int m,
                                                     int n,
                                                     @Cast("const cuDoubleComplex*") double2 alpha,  
                                                     @Cast("const cuDoubleComplex*") double2 A,                                        
                                                     int lda,
                                                     @Cast("cuDoubleComplex*") double2 B,
                                                     int ldb);              
                                                
 /* TRMM */  
public static native @Cast("cublasStatus_t") int cublasStrmm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      @Cast("cublasDiagType_t") int diag,
                                                      int m,
                                                      int n,
                                                      @Const FloatPointer alpha,  
                                                      @Const FloatPointer A,
                                                      int lda, 
                                                      @Const FloatPointer B,
                                                      int ldb,
                                                      FloatPointer C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasStrmm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      @Cast("cublasDiagType_t") int diag,
                                                      int m,
                                                      int n,
                                                      @Const FloatBuffer alpha,  
                                                      @Const FloatBuffer A,
                                                      int lda, 
                                                      @Const FloatBuffer B,
                                                      int ldb,
                                                      FloatBuffer C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasStrmm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      @Cast("cublasDiagType_t") int diag,
                                                      int m,
                                                      int n,
                                                      @Const float[] alpha,  
                                                      @Const float[] A,
                                                      int lda, 
                                                      @Const float[] B,
                                                      int ldb,
                                                      float[] C,
                                                      int ldc);
                                               
public static native @Cast("cublasStatus_t") int cublasDtrmm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      @Cast("cublasDiagType_t") int diag,
                                                      int m,
                                                      int n,
                                                      @Const DoublePointer alpha,  
                                                      @Const DoublePointer A,
                                                      int lda,
                                                      @Const DoublePointer B,
                                                      int ldb,
                                                      DoublePointer C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasDtrmm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      @Cast("cublasDiagType_t") int diag,
                                                      int m,
                                                      int n,
                                                      @Const DoubleBuffer alpha,  
                                                      @Const DoubleBuffer A,
                                                      int lda,
                                                      @Const DoubleBuffer B,
                                                      int ldb,
                                                      DoubleBuffer C,
                                                      int ldc);
public static native @Cast("cublasStatus_t") int cublasDtrmm_v2(cublasContext handle,
                                                      @Cast("cublasSideMode_t") int side,
                                                      @Cast("cublasFillMode_t") int uplo,
                                                      @Cast("cublasOperation_t") int trans,
                                                      @Cast("cublasDiagType_t") int diag,
                                                      int m,
                                                      int n,
                                                      @Const double[] alpha,  
                                                      @Const double[] A,
                                                      int lda,
                                                      @Const double[] B,
                                                      int ldb,
                                                      double[] C,
                                                      int ldc);
                                     
public static native @Cast("cublasStatus_t") int cublasCtrmm_v2(cublasContext handle,
                                                     @Cast("cublasSideMode_t") int side,
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     @Cast("cublasOperation_t") int trans,
                                                     @Cast("cublasDiagType_t") int diag,
                                                     int m,
                                                     int n,
                                                     @Cast("const cuComplex*") float2 alpha,  
                                                     @Cast("const cuComplex*") float2 A,
                                                     int lda,
                                                     @Cast("const cuComplex*") float2 B,
                                                     int ldb,
                                                     @Cast("cuComplex*") float2 C,
                                                     int ldc);
                  
public static native @Cast("cublasStatus_t") int cublasZtrmm_v2(cublasContext handle, @Cast("cublasSideMode_t") int side, 
                                                     @Cast("cublasFillMode_t") int uplo,
                                                     @Cast("cublasOperation_t") int trans,
                                                     @Cast("cublasDiagType_t") int diag,
                                                     int m,
                                                     int n,
                                                     @Cast("const cuDoubleComplex*") double2 alpha,  
                                                     @Cast("const cuDoubleComplex*") double2 A,
                                                     int lda,
                                                     @Cast("const cuDoubleComplex*") double2 B,
                                                     int ldb,
                                                     @Cast("cuDoubleComplex*") double2 C,
                                                     int ldc);
/* BATCH GEMM */
// #if defined(__cplusplus)
public static native @Cast("cublasStatus_t") int cublasHgemmBatched(cublasContext handle,
                                                          @Cast("cublasOperation_t") int transa,
                                                          @Cast("cublasOperation_t") int transb, 
                                                          int m,
                                                          int n,
                                                          int k,
                                                          @Const __half alpha,  
                                                          @Cast("const __half*const*") PointerPointer Aarray, 
                                                          int lda,
                                                          @Cast("const __half*const*") PointerPointer Barray,
                                                          int ldb, 
                                                          @Const __half beta,  
                                                          @Cast("__half*const*") PointerPointer Carray,
                                                          int ldc,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasHgemmBatched(cublasContext handle,
                                                          @Cast("cublasOperation_t") int transa,
                                                          @Cast("cublasOperation_t") int transb, 
                                                          int m,
                                                          int n,
                                                          int k,
                                                          @Const __half alpha,  
                                                          @Const @ByPtrPtr __half Aarray, 
                                                          int lda,
                                                          @Const @ByPtrPtr __half Barray,
                                                          int ldb, 
                                                          @Const __half beta,  
                                                          @ByPtrPtr __half Carray,
                                                          int ldc,
                                                          int batchCount);
// #endif
public static native @Cast("cublasStatus_t") int cublasSgemmBatched(cublasContext handle,
                                                          @Cast("cublasOperation_t") int transa,
                                                          @Cast("cublasOperation_t") int transb, 
                                                          int m,
                                                          int n,
                                                          int k,
                                                          @Const FloatPointer alpha,  
                                                          @Cast("const float*const*") PointerPointer Aarray, 
                                                          int lda,
                                                          @Cast("const float*const*") PointerPointer Barray,
                                                          int ldb, 
                                                          @Const FloatPointer beta,  
                                                          @Cast("float*const*") PointerPointer Carray,
                                                          int ldc,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasSgemmBatched(cublasContext handle,
                                                          @Cast("cublasOperation_t") int transa,
                                                          @Cast("cublasOperation_t") int transb, 
                                                          int m,
                                                          int n,
                                                          int k,
                                                          @Const FloatPointer alpha,  
                                                          @Const @ByPtrPtr FloatPointer Aarray, 
                                                          int lda,
                                                          @Const @ByPtrPtr FloatPointer Barray,
                                                          int ldb, 
                                                          @Const FloatPointer beta,  
                                                          @ByPtrPtr FloatPointer Carray,
                                                          int ldc,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasSgemmBatched(cublasContext handle,
                                                          @Cast("cublasOperation_t") int transa,
                                                          @Cast("cublasOperation_t") int transb, 
                                                          int m,
                                                          int n,
                                                          int k,
                                                          @Const FloatBuffer alpha,  
                                                          @Const @ByPtrPtr FloatBuffer Aarray, 
                                                          int lda,
                                                          @Const @ByPtrPtr FloatBuffer Barray,
                                                          int ldb, 
                                                          @Const FloatBuffer beta,  
                                                          @ByPtrPtr FloatBuffer Carray,
                                                          int ldc,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasSgemmBatched(cublasContext handle,
                                                          @Cast("cublasOperation_t") int transa,
                                                          @Cast("cublasOperation_t") int transb, 
                                                          int m,
                                                          int n,
                                                          int k,
                                                          @Const float[] alpha,  
                                                          @Const @ByPtrPtr float[] Aarray, 
                                                          int lda,
                                                          @Const @ByPtrPtr float[] Barray,
                                                          int ldb, 
                                                          @Const float[] beta,  
                                                          @ByPtrPtr float[] Carray,
                                                          int ldc,
                                                          int batchCount);

public static native @Cast("cublasStatus_t") int cublasDgemmBatched(cublasContext handle,
                                                          @Cast("cublasOperation_t") int transa,
                                                          @Cast("cublasOperation_t") int transb, 
                                                          int m,
                                                          int n,
                                                          int k,
                                                          @Const DoublePointer alpha, 
                                                          @Cast("const double*const*") PointerPointer Aarray, 
                                                          int lda,
                                                          @Cast("const double*const*") PointerPointer Barray,
                                                          int ldb, 
                                                          @Const DoublePointer beta, 
                                                          @Cast("double*const*") PointerPointer Carray,
                                                          int ldc,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasDgemmBatched(cublasContext handle,
                                                          @Cast("cublasOperation_t") int transa,
                                                          @Cast("cublasOperation_t") int transb, 
                                                          int m,
                                                          int n,
                                                          int k,
                                                          @Const DoublePointer alpha, 
                                                          @Const @ByPtrPtr DoublePointer Aarray, 
                                                          int lda,
                                                          @Const @ByPtrPtr DoublePointer Barray,
                                                          int ldb, 
                                                          @Const DoublePointer beta, 
                                                          @ByPtrPtr DoublePointer Carray,
                                                          int ldc,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasDgemmBatched(cublasContext handle,
                                                          @Cast("cublasOperation_t") int transa,
                                                          @Cast("cublasOperation_t") int transb, 
                                                          int m,
                                                          int n,
                                                          int k,
                                                          @Const DoubleBuffer alpha, 
                                                          @Const @ByPtrPtr DoubleBuffer Aarray, 
                                                          int lda,
                                                          @Const @ByPtrPtr DoubleBuffer Barray,
                                                          int ldb, 
                                                          @Const DoubleBuffer beta, 
                                                          @ByPtrPtr DoubleBuffer Carray,
                                                          int ldc,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasDgemmBatched(cublasContext handle,
                                                          @Cast("cublasOperation_t") int transa,
                                                          @Cast("cublasOperation_t") int transb, 
                                                          int m,
                                                          int n,
                                                          int k,
                                                          @Const double[] alpha, 
                                                          @Const @ByPtrPtr double[] Aarray, 
                                                          int lda,
                                                          @Const @ByPtrPtr double[] Barray,
                                                          int ldb, 
                                                          @Const double[] beta, 
                                                          @ByPtrPtr double[] Carray,
                                                          int ldc,
                                                          int batchCount);

public static native @Cast("cublasStatus_t") int cublasCgemmBatched(cublasContext handle,
                                                          @Cast("cublasOperation_t") int transa,
                                                          @Cast("cublasOperation_t") int transb, 
                                                          int m,
                                                          int n,
                                                          int k,
                                                          @Cast("const cuComplex*") float2 alpha, 
                                                          @Cast("const cuComplex*const*") PointerPointer Aarray, 
                                                          int lda,
                                                          @Cast("const cuComplex*const*") PointerPointer Barray,
                                                          int ldb, 
                                                          @Cast("const cuComplex*") float2 beta, 
                                                          @Cast("cuComplex*const*") PointerPointer Carray,
                                                          int ldc,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasCgemmBatched(cublasContext handle,
                                                          @Cast("cublasOperation_t") int transa,
                                                          @Cast("cublasOperation_t") int transb, 
                                                          int m,
                                                          int n,
                                                          int k,
                                                          @Cast("const cuComplex*") float2 alpha, 
                                                          @Cast("const cuComplex*const*") @ByPtrPtr float2 Aarray, 
                                                          int lda,
                                                          @Cast("const cuComplex*const*") @ByPtrPtr float2 Barray,
                                                          int ldb, 
                                                          @Cast("const cuComplex*") float2 beta, 
                                                          @Cast("cuComplex*const*") @ByPtrPtr float2 Carray,
                                                          int ldc,
                                                          int batchCount);

public static native @Cast("cublasStatus_t") int cublasCgemm3mBatched(cublasContext handle,
                                                          @Cast("cublasOperation_t") int transa,
                                                          @Cast("cublasOperation_t") int transb, 
                                                          int m,
                                                          int n,
                                                          int k,
                                                          @Cast("const cuComplex*") float2 alpha, 
                                                          @Cast("const cuComplex*const*") PointerPointer Aarray, 
                                                          int lda,
                                                          @Cast("const cuComplex*const*") PointerPointer Barray,
                                                          int ldb, 
                                                          @Cast("const cuComplex*") float2 beta, 
                                                          @Cast("cuComplex*const*") PointerPointer Carray,
                                                          int ldc,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasCgemm3mBatched(cublasContext handle,
                                                          @Cast("cublasOperation_t") int transa,
                                                          @Cast("cublasOperation_t") int transb, 
                                                          int m,
                                                          int n,
                                                          int k,
                                                          @Cast("const cuComplex*") float2 alpha, 
                                                          @Cast("const cuComplex*const*") @ByPtrPtr float2 Aarray, 
                                                          int lda,
                                                          @Cast("const cuComplex*const*") @ByPtrPtr float2 Barray,
                                                          int ldb, 
                                                          @Cast("const cuComplex*") float2 beta, 
                                                          @Cast("cuComplex*const*") @ByPtrPtr float2 Carray,
                                                          int ldc,
                                                          int batchCount);

public static native @Cast("cublasStatus_t") int cublasZgemmBatched(cublasContext handle,
                                                          @Cast("cublasOperation_t") int transa,
                                                          @Cast("cublasOperation_t") int transb, 
                                                          int m,
                                                          int n,
                                                          int k,
                                                          @Cast("const cuDoubleComplex*") double2 alpha, 
                                                          @Cast("const cuDoubleComplex*const*") PointerPointer Aarray, 
                                                          int lda,
                                                          @Cast("const cuDoubleComplex*const*") PointerPointer Barray,
                                                          int ldb, 
                                                          @Cast("const cuDoubleComplex*") double2 beta, 
                                                          @Cast("cuDoubleComplex*const*") PointerPointer Carray,
                                                          int ldc,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasZgemmBatched(cublasContext handle,
                                                          @Cast("cublasOperation_t") int transa,
                                                          @Cast("cublasOperation_t") int transb, 
                                                          int m,
                                                          int n,
                                                          int k,
                                                          @Cast("const cuDoubleComplex*") double2 alpha, 
                                                          @Cast("const cuDoubleComplex*const*") @ByPtrPtr double2 Aarray, 
                                                          int lda,
                                                          @Cast("const cuDoubleComplex*const*") @ByPtrPtr double2 Barray,
                                                          int ldb, 
                                                          @Cast("const cuDoubleComplex*") double2 beta, 
                                                          @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 Carray,
                                                          int ldc,
                                                          int batchCount);

public static native @Cast("cublasStatus_t") int cublasGemmBatchedEx(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Const Pointer alpha,  
                                                      @Cast("const void*const*") PointerPointer Aarray, 
                                                      @Cast("cudaDataType") int Atype,
                                                      int lda,
                                                      @Cast("const void*const*") PointerPointer Barray,
                                                      @Cast("cudaDataType") int Btype,
                                                      int ldb, 
                                                      @Const Pointer beta,  
                                                      @Cast("void*const*") PointerPointer Carray,
                                                      @Cast("cudaDataType") int Ctype,
                                                      int ldc,
                                                      int batchCount,
                                                      @Cast("cublasComputeType_t") int computeType,
                                                      @Cast("cublasGemmAlgo_t") int algo);
public static native @Cast("cublasStatus_t") int cublasGemmBatchedEx(cublasContext handle, 
                                                      @Cast("cublasOperation_t") int transa,
                                                      @Cast("cublasOperation_t") int transb, 
                                                      int m,
                                                      int n,
                                                      int k,
                                                      @Const Pointer alpha,  
                                                      @Cast("const void*const*") @ByPtrPtr Pointer Aarray, 
                                                      @Cast("cudaDataType") int Atype,
                                                      int lda,
                                                      @Cast("const void*const*") @ByPtrPtr Pointer Barray,
                                                      @Cast("cudaDataType") int Btype,
                                                      int ldb, 
                                                      @Const Pointer beta,  
                                                      @Cast("void*const*") @ByPtrPtr Pointer Carray,
                                                      @Cast("cudaDataType") int Ctype,
                                                      int ldc,
                                                      int batchCount,
                                                      @Cast("cublasComputeType_t") int computeType,
                                                      @Cast("cublasGemmAlgo_t") int algo); 
 
public static native @Cast("cublasStatus_t") int cublasGemmStridedBatchedEx(cublasContext handle,
                                                                 @Cast("cublasOperation_t") int transa,
                                                                 @Cast("cublasOperation_t") int transb, 
                                                                 int m,
                                                                 int n,
                                                                 int k,
                                                                 @Const Pointer alpha,
                                                                 @Const Pointer A,
                                                                 @Cast("cudaDataType") int Atype,
                                                                 int lda,
                                                                 long strideA,
                                                                 @Const Pointer B,
                                                                 @Cast("cudaDataType") int Btype,
                                                                 int ldb,
                                                                 long strideB,
                                                                 @Const Pointer beta,
                                                                 Pointer C,
                                                                 @Cast("cudaDataType") int Ctype,
                                                                 int ldc,
                                                                 long strideC,
                                                                 int batchCount,
                                                                 @Cast("cublasComputeType_t") int computeType,
                                                                 @Cast("cublasGemmAlgo_t") int algo);

public static native @Cast("cublasStatus_t") int cublasSgemmStridedBatched(cublasContext handle,
                                                                 @Cast("cublasOperation_t") int transa,
                                                                 @Cast("cublasOperation_t") int transb, 
                                                                 int m,
                                                                 int n,
                                                                 int k,
                                                                 @Const FloatPointer alpha,
                                                                 @Const FloatPointer A,
                                                                 int lda,
                                                                 long strideA,
                                                                 @Const FloatPointer B,
                                                                 int ldb,
                                                                 long strideB,
                                                                 @Const FloatPointer beta,
                                                                 FloatPointer C,
                                                                 int ldc,
                                                                 long strideC,
                                                                 int batchCount);
public static native @Cast("cublasStatus_t") int cublasSgemmStridedBatched(cublasContext handle,
                                                                 @Cast("cublasOperation_t") int transa,
                                                                 @Cast("cublasOperation_t") int transb, 
                                                                 int m,
                                                                 int n,
                                                                 int k,
                                                                 @Const FloatBuffer alpha,
                                                                 @Const FloatBuffer A,
                                                                 int lda,
                                                                 long strideA,
                                                                 @Const FloatBuffer B,
                                                                 int ldb,
                                                                 long strideB,
                                                                 @Const FloatBuffer beta,
                                                                 FloatBuffer C,
                                                                 int ldc,
                                                                 long strideC,
                                                                 int batchCount);
public static native @Cast("cublasStatus_t") int cublasSgemmStridedBatched(cublasContext handle,
                                                                 @Cast("cublasOperation_t") int transa,
                                                                 @Cast("cublasOperation_t") int transb, 
                                                                 int m,
                                                                 int n,
                                                                 int k,
                                                                 @Const float[] alpha,
                                                                 @Const float[] A,
                                                                 int lda,
                                                                 long strideA,
                                                                 @Const float[] B,
                                                                 int ldb,
                                                                 long strideB,
                                                                 @Const float[] beta,
                                                                 float[] C,
                                                                 int ldc,
                                                                 long strideC,
                                                                 int batchCount);

public static native @Cast("cublasStatus_t") int cublasDgemmStridedBatched(cublasContext handle,
                                                                 @Cast("cublasOperation_t") int transa,
                                                                 @Cast("cublasOperation_t") int transb, 
                                                                 int m,
                                                                 int n,
                                                                 int k,
                                                                 @Const DoublePointer alpha,
                                                                 @Const DoublePointer A, 
                                                                 int lda,
                                                                 long strideA,
                                                                 @Const DoublePointer B,
                                                                 int ldb, 
                                                                 long strideB,
                                                                 @Const DoublePointer beta,
                                                                 DoublePointer C,
                                                                 int ldc,
                                                                 long strideC,
                                                                 int batchCount);
public static native @Cast("cublasStatus_t") int cublasDgemmStridedBatched(cublasContext handle,
                                                                 @Cast("cublasOperation_t") int transa,
                                                                 @Cast("cublasOperation_t") int transb, 
                                                                 int m,
                                                                 int n,
                                                                 int k,
                                                                 @Const DoubleBuffer alpha,
                                                                 @Const DoubleBuffer A, 
                                                                 int lda,
                                                                 long strideA,
                                                                 @Const DoubleBuffer B,
                                                                 int ldb, 
                                                                 long strideB,
                                                                 @Const DoubleBuffer beta,
                                                                 DoubleBuffer C,
                                                                 int ldc,
                                                                 long strideC,
                                                                 int batchCount);
public static native @Cast("cublasStatus_t") int cublasDgemmStridedBatched(cublasContext handle,
                                                                 @Cast("cublasOperation_t") int transa,
                                                                 @Cast("cublasOperation_t") int transb, 
                                                                 int m,
                                                                 int n,
                                                                 int k,
                                                                 @Const double[] alpha,
                                                                 @Const double[] A, 
                                                                 int lda,
                                                                 long strideA,
                                                                 @Const double[] B,
                                                                 int ldb, 
                                                                 long strideB,
                                                                 @Const double[] beta,
                                                                 double[] C,
                                                                 int ldc,
                                                                 long strideC,
                                                                 int batchCount);

public static native @Cast("cublasStatus_t") int cublasCgemmStridedBatched(cublasContext handle,
                                                                 @Cast("cublasOperation_t") int transa,
                                                                 @Cast("cublasOperation_t") int transb, 
                                                                 int m,
                                                                 int n,
                                                                 int k,
                                                                 @Cast("const cuComplex*") float2 alpha,
                                                                 @Cast("const cuComplex*") float2 A, 
                                                                 int lda,
                                                                 long strideA,
                                                                 @Cast("const cuComplex*") float2 B,
                                                                 int ldb, 
                                                                 long strideB,
                                                                 @Cast("const cuComplex*") float2 beta,
                                                                 @Cast("cuComplex*") float2 C,
                                                                 int ldc,
                                                                 long strideC,
                                                                 int batchCount);

public static native @Cast("cublasStatus_t") int cublasCgemm3mStridedBatched(cublasContext handle,
                                                                 @Cast("cublasOperation_t") int transa,
                                                                 @Cast("cublasOperation_t") int transb, 
                                                                 int m,
                                                                 int n,
                                                                 int k,
                                                                 @Cast("const cuComplex*") float2 alpha,
                                                                 @Cast("const cuComplex*") float2 A, 
                                                                 int lda,
                                                                 long strideA,
                                                                 @Cast("const cuComplex*") float2 B,
                                                                 int ldb, 
                                                                 long strideB,
                                                                 @Cast("const cuComplex*") float2 beta,
                                                                 @Cast("cuComplex*") float2 C,
                                                                 int ldc,
                                                                 long strideC,
                                                                 int batchCount);


public static native @Cast("cublasStatus_t") int cublasZgemmStridedBatched(cublasContext handle,
                                                                 @Cast("cublasOperation_t") int transa,
                                                                 @Cast("cublasOperation_t") int transb, 
                                                                 int m,
                                                                 int n,
                                                                 int k,
                                                                 @Cast("const cuDoubleComplex*") double2 alpha,
                                                                 @Cast("const cuDoubleComplex*") double2 A, 
                                                                 int lda,
                                                                 long strideA,
                                                                 @Cast("const cuDoubleComplex*") double2 B,
                                                                 int ldb, 
                                                                 long strideB,
                                                                 @Cast("const cuDoubleComplex*") double2 beta,
                                                                 @Cast("cuDoubleComplex*") double2 C,
                                                                 int ldc,
                                                                 long strideC,
                                                                 int batchCount);

// #if defined(__cplusplus)
public static native @Cast("cublasStatus_t") int cublasHgemmStridedBatched(cublasContext handle,
                                                                 @Cast("cublasOperation_t") int transa,
                                                                 @Cast("cublasOperation_t") int transb, 
                                                                 int m,
                                                                 int n,
                                                                 int k,
                                                                 @Const __half alpha,
                                                                 @Const __half A, 
                                                                 int lda,
                                                                 long strideA,
                                                                 @Const __half B,
                                                                 int ldb, 
                                                                 long strideB,
                                                                 @Const __half beta,
                                                                 __half C,
                                                                 int ldc,
                                                                 long strideC,
                                                                 int batchCount);
// #endif
/* ---------------- CUBLAS BLAS-like extension ---------------- */
/* GEAM */
public static native @Cast("cublasStatus_t") int cublasSgeam(cublasContext handle,
                                                  @Cast("cublasOperation_t") int transa, 
                                                  @Cast("cublasOperation_t") int transb,
                                                  int m, 
                                                  int n,
                                                  @Const FloatPointer alpha, 
                                                  @Const FloatPointer A, 
                                                  int lda,
                                                  @Const FloatPointer beta, 
                                                  @Const FloatPointer B, 
                                                  int ldb,
                                                  FloatPointer C, 
                                                  int ldc);
public static native @Cast("cublasStatus_t") int cublasSgeam(cublasContext handle,
                                                  @Cast("cublasOperation_t") int transa, 
                                                  @Cast("cublasOperation_t") int transb,
                                                  int m, 
                                                  int n,
                                                  @Const FloatBuffer alpha, 
                                                  @Const FloatBuffer A, 
                                                  int lda,
                                                  @Const FloatBuffer beta, 
                                                  @Const FloatBuffer B, 
                                                  int ldb,
                                                  FloatBuffer C, 
                                                  int ldc);
public static native @Cast("cublasStatus_t") int cublasSgeam(cublasContext handle,
                                                  @Cast("cublasOperation_t") int transa, 
                                                  @Cast("cublasOperation_t") int transb,
                                                  int m, 
                                                  int n,
                                                  @Const float[] alpha, 
                                                  @Const float[] A, 
                                                  int lda,
                                                  @Const float[] beta, 
                                                  @Const float[] B, 
                                                  int ldb,
                                                  float[] C, 
                                                  int ldc);
    
public static native @Cast("cublasStatus_t") int cublasDgeam(cublasContext handle,
                                                  @Cast("cublasOperation_t") int transa, 
                                                  @Cast("cublasOperation_t") int transb,
                                                  int m, 
                                                  int n,
                                                  @Const DoublePointer alpha, 
                                                  @Const DoublePointer A, 
                                                  int lda,
                                                  @Const DoublePointer beta, 
                                                  @Const DoublePointer B, 
                                                  int ldb,
                                                  DoublePointer C, 
                                                  int ldc);
public static native @Cast("cublasStatus_t") int cublasDgeam(cublasContext handle,
                                                  @Cast("cublasOperation_t") int transa, 
                                                  @Cast("cublasOperation_t") int transb,
                                                  int m, 
                                                  int n,
                                                  @Const DoubleBuffer alpha, 
                                                  @Const DoubleBuffer A, 
                                                  int lda,
                                                  @Const DoubleBuffer beta, 
                                                  @Const DoubleBuffer B, 
                                                  int ldb,
                                                  DoubleBuffer C, 
                                                  int ldc);
public static native @Cast("cublasStatus_t") int cublasDgeam(cublasContext handle,
                                                  @Cast("cublasOperation_t") int transa, 
                                                  @Cast("cublasOperation_t") int transb,
                                                  int m, 
                                                  int n,
                                                  @Const double[] alpha, 
                                                  @Const double[] A, 
                                                  int lda,
                                                  @Const double[] beta, 
                                                  @Const double[] B, 
                                                  int ldb,
                                                  double[] C, 
                                                  int ldc);

public static native @Cast("cublasStatus_t") int cublasCgeam(cublasContext handle,
                                                  @Cast("cublasOperation_t") int transa, 
                                                  @Cast("cublasOperation_t") int transb,
                                                  int m, 
                                                  int n,
                                                  @Cast("const cuComplex*") float2 alpha, 
                                                  @Cast("const cuComplex*") float2 A, 
                                                  int lda,
                                                  @Cast("const cuComplex*") float2 beta,  
                                                  @Cast("const cuComplex*") float2 B, 
                                                  int ldb,
                                                  @Cast("cuComplex*") float2 C, 
                                                  int ldc);

public static native @Cast("cublasStatus_t") int cublasZgeam(cublasContext handle,
                                                  @Cast("cublasOperation_t") int transa, 
                                                  @Cast("cublasOperation_t") int transb,
                                                  int m, 
                                                  int n,
                                                  @Cast("const cuDoubleComplex*") double2 alpha, 
                                                  @Cast("const cuDoubleComplex*") double2 A, 
                                                  int lda,
                                                  @Cast("const cuDoubleComplex*") double2 beta,  
                                                  @Cast("const cuDoubleComplex*") double2 B, 
                                                  int ldb,
                                                  @Cast("cuDoubleComplex*") double2 C, 
                                                  int ldc);
 
/* Batched LU - GETRF*/
public static native @Cast("cublasStatus_t") int cublasSgetrfBatched(cublasContext handle,
                                                  int n, 
                                                  @Cast("float*const*") PointerPointer A,
                                                  int lda, 
                                                  IntPointer P,
                                                  IntPointer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasSgetrfBatched(cublasContext handle,
                                                  int n, 
                                                  @ByPtrPtr FloatPointer A,
                                                  int lda, 
                                                  IntPointer P,
                                                  IntPointer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasSgetrfBatched(cublasContext handle,
                                                  int n, 
                                                  @ByPtrPtr FloatBuffer A,
                                                  int lda, 
                                                  IntBuffer P,
                                                  IntBuffer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasSgetrfBatched(cublasContext handle,
                                                  int n, 
                                                  @ByPtrPtr float[] A,
                                                  int lda, 
                                                  int[] P,
                                                  int[] info,
                                                  int batchSize);

public static native @Cast("cublasStatus_t") int cublasDgetrfBatched(cublasContext handle,
                                                  int n, 
                                                  @Cast("double*const*") PointerPointer A,
                                                  int lda, 
                                                  IntPointer P,
                                                  IntPointer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasDgetrfBatched(cublasContext handle,
                                                  int n, 
                                                  @ByPtrPtr DoublePointer A,
                                                  int lda, 
                                                  IntPointer P,
                                                  IntPointer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasDgetrfBatched(cublasContext handle,
                                                  int n, 
                                                  @ByPtrPtr DoubleBuffer A,
                                                  int lda, 
                                                  IntBuffer P,
                                                  IntBuffer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasDgetrfBatched(cublasContext handle,
                                                  int n, 
                                                  @ByPtrPtr double[] A,
                                                  int lda, 
                                                  int[] P,
                                                  int[] info,
                                                  int batchSize);

public static native @Cast("cublasStatus_t") int cublasCgetrfBatched(cublasContext handle,
                                                  int n, 
                                                  @Cast("cuComplex*const*") PointerPointer A,
                                                  int lda, 
                                                  IntPointer P,
                                                  IntPointer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasCgetrfBatched(cublasContext handle,
                                                  int n, 
                                                  @Cast("cuComplex*const*") @ByPtrPtr float2 A,
                                                  int lda, 
                                                  IntPointer P,
                                                  IntPointer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasCgetrfBatched(cublasContext handle,
                                                  int n, 
                                                  @Cast("cuComplex*const*") @ByPtrPtr float2 A,
                                                  int lda, 
                                                  IntBuffer P,
                                                  IntBuffer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasCgetrfBatched(cublasContext handle,
                                                  int n, 
                                                  @Cast("cuComplex*const*") @ByPtrPtr float2 A,
                                                  int lda, 
                                                  int[] P,
                                                  int[] info,
                                                  int batchSize);

public static native @Cast("cublasStatus_t") int cublasZgetrfBatched(cublasContext handle,
                                                  int n, 
                                                  @Cast("cuDoubleComplex*const*") PointerPointer A,
                                                  int lda, 
                                                  IntPointer P,
                                                  IntPointer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasZgetrfBatched(cublasContext handle,
                                                  int n, 
                                                  @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 A,
                                                  int lda, 
                                                  IntPointer P,
                                                  IntPointer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasZgetrfBatched(cublasContext handle,
                                                  int n, 
                                                  @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 A,
                                                  int lda, 
                                                  IntBuffer P,
                                                  IntBuffer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasZgetrfBatched(cublasContext handle,
                                                  int n, 
                                                  @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 A,
                                                  int lda, 
                                                  int[] P,
                                                  int[] info,
                                                  int batchSize);

/* Batched inversion based on LU factorization from getrf */
public static native @Cast("cublasStatus_t") int cublasSgetriBatched(cublasContext handle,
                                                  int n,
                                                  @Cast("const float*const*") PointerPointer A,
                                                  int lda,
                                                  @Const IntPointer P,
                                                  @Cast("float*const*") PointerPointer C,
                                                  int ldc,
                                                  IntPointer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasSgetriBatched(cublasContext handle,
                                                  int n,
                                                  @Const @ByPtrPtr FloatPointer A,
                                                  int lda,
                                                  @Const IntPointer P,
                                                  @ByPtrPtr FloatPointer C,
                                                  int ldc,
                                                  IntPointer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasSgetriBatched(cublasContext handle,
                                                  int n,
                                                  @Const @ByPtrPtr FloatBuffer A,
                                                  int lda,
                                                  @Const IntBuffer P,
                                                  @ByPtrPtr FloatBuffer C,
                                                  int ldc,
                                                  IntBuffer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasSgetriBatched(cublasContext handle,
                                                  int n,
                                                  @Const @ByPtrPtr float[] A,
                                                  int lda,
                                                  @Const int[] P,
                                                  @ByPtrPtr float[] C,
                                                  int ldc,
                                                  int[] info,
                                                  int batchSize);

public static native @Cast("cublasStatus_t") int cublasDgetriBatched(cublasContext handle,
                                                  int n,
                                                  @Cast("const double*const*") PointerPointer A,
                                                  int lda,
                                                  @Const IntPointer P,
                                                  @Cast("double*const*") PointerPointer C,
                                                  int ldc,
                                                  IntPointer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasDgetriBatched(cublasContext handle,
                                                  int n,
                                                  @Const @ByPtrPtr DoublePointer A,
                                                  int lda,
                                                  @Const IntPointer P,
                                                  @ByPtrPtr DoublePointer C,
                                                  int ldc,
                                                  IntPointer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasDgetriBatched(cublasContext handle,
                                                  int n,
                                                  @Const @ByPtrPtr DoubleBuffer A,
                                                  int lda,
                                                  @Const IntBuffer P,
                                                  @ByPtrPtr DoubleBuffer C,
                                                  int ldc,
                                                  IntBuffer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasDgetriBatched(cublasContext handle,
                                                  int n,
                                                  @Const @ByPtrPtr double[] A,
                                                  int lda,
                                                  @Const int[] P,
                                                  @ByPtrPtr double[] C,
                                                  int ldc,
                                                  int[] info,
                                                  int batchSize);

public static native @Cast("cublasStatus_t") int cublasCgetriBatched(cublasContext handle,
                                                  int n,
                                                  @Cast("const cuComplex*const*") PointerPointer A,
                                                  int lda,
                                                  @Const IntPointer P,
                                                  @Cast("cuComplex*const*") PointerPointer C,
                                                  int ldc,
                                                  IntPointer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasCgetriBatched(cublasContext handle,
                                                  int n,
                                                  @Cast("const cuComplex*const*") @ByPtrPtr float2 A,
                                                  int lda,
                                                  @Const IntPointer P,
                                                  @Cast("cuComplex*const*") @ByPtrPtr float2 C,
                                                  int ldc,
                                                  IntPointer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasCgetriBatched(cublasContext handle,
                                                  int n,
                                                  @Cast("const cuComplex*const*") @ByPtrPtr float2 A,
                                                  int lda,
                                                  @Const IntBuffer P,
                                                  @Cast("cuComplex*const*") @ByPtrPtr float2 C,
                                                  int ldc,
                                                  IntBuffer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasCgetriBatched(cublasContext handle,
                                                  int n,
                                                  @Cast("const cuComplex*const*") @ByPtrPtr float2 A,
                                                  int lda,
                                                  @Const int[] P,
                                                  @Cast("cuComplex*const*") @ByPtrPtr float2 C,
                                                  int ldc,
                                                  int[] info,
                                                  int batchSize);

public static native @Cast("cublasStatus_t") int cublasZgetriBatched(cublasContext handle,
                                                  int n,
                                                  @Cast("const cuDoubleComplex*const*") PointerPointer A,
                                                  int lda,
                                                  @Const IntPointer P,
                                                  @Cast("cuDoubleComplex*const*") PointerPointer C,
                                                  int ldc,
                                                  IntPointer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasZgetriBatched(cublasContext handle,
                                                  int n,
                                                  @Cast("const cuDoubleComplex*const*") @ByPtrPtr double2 A,
                                                  int lda,
                                                  @Const IntPointer P,
                                                  @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 C,
                                                  int ldc,
                                                  IntPointer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasZgetriBatched(cublasContext handle,
                                                  int n,
                                                  @Cast("const cuDoubleComplex*const*") @ByPtrPtr double2 A,
                                                  int lda,
                                                  @Const IntBuffer P,
                                                  @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 C,
                                                  int ldc,
                                                  IntBuffer info,
                                                  int batchSize);
public static native @Cast("cublasStatus_t") int cublasZgetriBatched(cublasContext handle,
                                                  int n,
                                                  @Cast("const cuDoubleComplex*const*") @ByPtrPtr double2 A,
                                                  int lda,
                                                  @Const int[] P,
                                                  @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 C,
                                                  int ldc,
                                                  int[] info,
                                                  int batchSize);

/* Batched solver based on LU factorization from getrf */

public static native @Cast("cublasStatus_t") int cublasSgetrsBatched( cublasContext handle, 
                                                            @Cast("cublasOperation_t") int trans, 
                                                            int n, 
                                                            int nrhs, 
                                                            @Cast("const float*const*") PointerPointer Aarray, 
                                                            int lda, 
                                                            @Const IntPointer devIpiv, 
                                                            @Cast("float*const*") PointerPointer Barray, 
                                                            int ldb, 
                                                            IntPointer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasSgetrsBatched( cublasContext handle, 
                                                            @Cast("cublasOperation_t") int trans, 
                                                            int n, 
                                                            int nrhs, 
                                                            @Const @ByPtrPtr FloatPointer Aarray, 
                                                            int lda, 
                                                            @Const IntPointer devIpiv, 
                                                            @ByPtrPtr FloatPointer Barray, 
                                                            int ldb, 
                                                            IntPointer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasSgetrsBatched( cublasContext handle, 
                                                            @Cast("cublasOperation_t") int trans, 
                                                            int n, 
                                                            int nrhs, 
                                                            @Const @ByPtrPtr FloatBuffer Aarray, 
                                                            int lda, 
                                                            @Const IntBuffer devIpiv, 
                                                            @ByPtrPtr FloatBuffer Barray, 
                                                            int ldb, 
                                                            IntBuffer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasSgetrsBatched( cublasContext handle, 
                                                            @Cast("cublasOperation_t") int trans, 
                                                            int n, 
                                                            int nrhs, 
                                                            @Const @ByPtrPtr float[] Aarray, 
                                                            int lda, 
                                                            @Const int[] devIpiv, 
                                                            @ByPtrPtr float[] Barray, 
                                                            int ldb, 
                                                            int[] info,
                                                            int batchSize);

public static native @Cast("cublasStatus_t") int cublasDgetrsBatched( cublasContext handle, 
                                                           @Cast("cublasOperation_t") int trans, 
                                                           int n, 
                                                           int nrhs, 
                                                           @Cast("const double*const*") PointerPointer Aarray, 
                                                           int lda, 
                                                           @Const IntPointer devIpiv, 
                                                           @Cast("double*const*") PointerPointer Barray, 
                                                           int ldb, 
                                                           IntPointer info,
                                                           int batchSize);
public static native @Cast("cublasStatus_t") int cublasDgetrsBatched( cublasContext handle, 
                                                           @Cast("cublasOperation_t") int trans, 
                                                           int n, 
                                                           int nrhs, 
                                                           @Const @ByPtrPtr DoublePointer Aarray, 
                                                           int lda, 
                                                           @Const IntPointer devIpiv, 
                                                           @ByPtrPtr DoublePointer Barray, 
                                                           int ldb, 
                                                           IntPointer info,
                                                           int batchSize);
public static native @Cast("cublasStatus_t") int cublasDgetrsBatched( cublasContext handle, 
                                                           @Cast("cublasOperation_t") int trans, 
                                                           int n, 
                                                           int nrhs, 
                                                           @Const @ByPtrPtr DoubleBuffer Aarray, 
                                                           int lda, 
                                                           @Const IntBuffer devIpiv, 
                                                           @ByPtrPtr DoubleBuffer Barray, 
                                                           int ldb, 
                                                           IntBuffer info,
                                                           int batchSize);
public static native @Cast("cublasStatus_t") int cublasDgetrsBatched( cublasContext handle, 
                                                           @Cast("cublasOperation_t") int trans, 
                                                           int n, 
                                                           int nrhs, 
                                                           @Const @ByPtrPtr double[] Aarray, 
                                                           int lda, 
                                                           @Const int[] devIpiv, 
                                                           @ByPtrPtr double[] Barray, 
                                                           int ldb, 
                                                           int[] info,
                                                           int batchSize);

public static native @Cast("cublasStatus_t") int cublasCgetrsBatched( cublasContext handle, 
                                                            @Cast("cublasOperation_t") int trans, 
                                                            int n, 
                                                            int nrhs, 
                                                            @Cast("const cuComplex*const*") PointerPointer Aarray, 
                                                            int lda, 
                                                            @Const IntPointer devIpiv, 
                                                            @Cast("cuComplex*const*") PointerPointer Barray, 
                                                            int ldb, 
                                                            IntPointer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasCgetrsBatched( cublasContext handle, 
                                                            @Cast("cublasOperation_t") int trans, 
                                                            int n, 
                                                            int nrhs, 
                                                            @Cast("const cuComplex*const*") @ByPtrPtr float2 Aarray, 
                                                            int lda, 
                                                            @Const IntPointer devIpiv, 
                                                            @Cast("cuComplex*const*") @ByPtrPtr float2 Barray, 
                                                            int ldb, 
                                                            IntPointer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasCgetrsBatched( cublasContext handle, 
                                                            @Cast("cublasOperation_t") int trans, 
                                                            int n, 
                                                            int nrhs, 
                                                            @Cast("const cuComplex*const*") @ByPtrPtr float2 Aarray, 
                                                            int lda, 
                                                            @Const IntBuffer devIpiv, 
                                                            @Cast("cuComplex*const*") @ByPtrPtr float2 Barray, 
                                                            int ldb, 
                                                            IntBuffer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasCgetrsBatched( cublasContext handle, 
                                                            @Cast("cublasOperation_t") int trans, 
                                                            int n, 
                                                            int nrhs, 
                                                            @Cast("const cuComplex*const*") @ByPtrPtr float2 Aarray, 
                                                            int lda, 
                                                            @Const int[] devIpiv, 
                                                            @Cast("cuComplex*const*") @ByPtrPtr float2 Barray, 
                                                            int ldb, 
                                                            int[] info,
                                                            int batchSize);


public static native @Cast("cublasStatus_t") int cublasZgetrsBatched( cublasContext handle, 
                                                            @Cast("cublasOperation_t") int trans, 
                                                            int n, 
                                                            int nrhs, 
                                                            @Cast("const cuDoubleComplex*const*") PointerPointer Aarray, 
                                                            int lda, 
                                                            @Const IntPointer devIpiv, 
                                                            @Cast("cuDoubleComplex*const*") PointerPointer Barray, 
                                                            int ldb, 
                                                            IntPointer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasZgetrsBatched( cublasContext handle, 
                                                            @Cast("cublasOperation_t") int trans, 
                                                            int n, 
                                                            int nrhs, 
                                                            @Cast("const cuDoubleComplex*const*") @ByPtrPtr double2 Aarray, 
                                                            int lda, 
                                                            @Const IntPointer devIpiv, 
                                                            @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 Barray, 
                                                            int ldb, 
                                                            IntPointer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasZgetrsBatched( cublasContext handle, 
                                                            @Cast("cublasOperation_t") int trans, 
                                                            int n, 
                                                            int nrhs, 
                                                            @Cast("const cuDoubleComplex*const*") @ByPtrPtr double2 Aarray, 
                                                            int lda, 
                                                            @Const IntBuffer devIpiv, 
                                                            @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 Barray, 
                                                            int ldb, 
                                                            IntBuffer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasZgetrsBatched( cublasContext handle, 
                                                            @Cast("cublasOperation_t") int trans, 
                                                            int n, 
                                                            int nrhs, 
                                                            @Cast("const cuDoubleComplex*const*") @ByPtrPtr double2 Aarray, 
                                                            int lda, 
                                                            @Const int[] devIpiv, 
                                                            @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 Barray, 
                                                            int ldb, 
                                                            int[] info,
                                                            int batchSize);



/* TRSM - Batched Triangular Solver */
public static native @Cast("cublasStatus_t") int cublasStrsmBatched( cublasContext handle, 
                                                          @Cast("cublasSideMode_t") int side, 
                                                          @Cast("cublasFillMode_t") int uplo,
                                                          @Cast("cublasOperation_t") int trans, 
                                                          @Cast("cublasDiagType_t") int diag,
                                                          int m, 
                                                          int n, 
                                                          @Const FloatPointer alpha,
                                                          @Cast("const float*const*") PointerPointer A, 
                                                          int lda,
                                                          @Cast("float*const*") PointerPointer B, 
                                                          int ldb,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasStrsmBatched( cublasContext handle, 
                                                          @Cast("cublasSideMode_t") int side, 
                                                          @Cast("cublasFillMode_t") int uplo,
                                                          @Cast("cublasOperation_t") int trans, 
                                                          @Cast("cublasDiagType_t") int diag,
                                                          int m, 
                                                          int n, 
                                                          @Const FloatPointer alpha,
                                                          @Const @ByPtrPtr FloatPointer A, 
                                                          int lda,
                                                          @ByPtrPtr FloatPointer B, 
                                                          int ldb,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasStrsmBatched( cublasContext handle, 
                                                          @Cast("cublasSideMode_t") int side, 
                                                          @Cast("cublasFillMode_t") int uplo,
                                                          @Cast("cublasOperation_t") int trans, 
                                                          @Cast("cublasDiagType_t") int diag,
                                                          int m, 
                                                          int n, 
                                                          @Const FloatBuffer alpha,
                                                          @Const @ByPtrPtr FloatBuffer A, 
                                                          int lda,
                                                          @ByPtrPtr FloatBuffer B, 
                                                          int ldb,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasStrsmBatched( cublasContext handle, 
                                                          @Cast("cublasSideMode_t") int side, 
                                                          @Cast("cublasFillMode_t") int uplo,
                                                          @Cast("cublasOperation_t") int trans, 
                                                          @Cast("cublasDiagType_t") int diag,
                                                          int m, 
                                                          int n, 
                                                          @Const float[] alpha,
                                                          @Const @ByPtrPtr float[] A, 
                                                          int lda,
                                                          @ByPtrPtr float[] B, 
                                                          int ldb,
                                                          int batchCount);

public static native @Cast("cublasStatus_t") int cublasDtrsmBatched( cublasContext handle, 
                                                          @Cast("cublasSideMode_t") int side, 
                                                          @Cast("cublasFillMode_t") int uplo,
                                                          @Cast("cublasOperation_t") int trans, 
                                                          @Cast("cublasDiagType_t") int diag,
                                                          int m, 
                                                          int n, 
                                                          @Const DoublePointer alpha,
                                                          @Cast("const double*const*") PointerPointer A, 
                                                          int lda,
                                                          @Cast("double*const*") PointerPointer B, 
                                                          int ldb,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasDtrsmBatched( cublasContext handle, 
                                                          @Cast("cublasSideMode_t") int side, 
                                                          @Cast("cublasFillMode_t") int uplo,
                                                          @Cast("cublasOperation_t") int trans, 
                                                          @Cast("cublasDiagType_t") int diag,
                                                          int m, 
                                                          int n, 
                                                          @Const DoublePointer alpha,
                                                          @Const @ByPtrPtr DoublePointer A, 
                                                          int lda,
                                                          @ByPtrPtr DoublePointer B, 
                                                          int ldb,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasDtrsmBatched( cublasContext handle, 
                                                          @Cast("cublasSideMode_t") int side, 
                                                          @Cast("cublasFillMode_t") int uplo,
                                                          @Cast("cublasOperation_t") int trans, 
                                                          @Cast("cublasDiagType_t") int diag,
                                                          int m, 
                                                          int n, 
                                                          @Const DoubleBuffer alpha,
                                                          @Const @ByPtrPtr DoubleBuffer A, 
                                                          int lda,
                                                          @ByPtrPtr DoubleBuffer B, 
                                                          int ldb,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasDtrsmBatched( cublasContext handle, 
                                                          @Cast("cublasSideMode_t") int side, 
                                                          @Cast("cublasFillMode_t") int uplo,
                                                          @Cast("cublasOperation_t") int trans, 
                                                          @Cast("cublasDiagType_t") int diag,
                                                          int m, 
                                                          int n, 
                                                          @Const double[] alpha,
                                                          @Const @ByPtrPtr double[] A, 
                                                          int lda,
                                                          @ByPtrPtr double[] B, 
                                                          int ldb,
                                                          int batchCount);

public static native @Cast("cublasStatus_t") int cublasCtrsmBatched( cublasContext handle, 
                                                          @Cast("cublasSideMode_t") int side, 
                                                          @Cast("cublasFillMode_t") int uplo,
                                                          @Cast("cublasOperation_t") int trans, 
                                                          @Cast("cublasDiagType_t") int diag,
                                                          int m, 
                                                          int n, 
                                                          @Cast("const cuComplex*") float2 alpha,
                                                          @Cast("const cuComplex*const*") PointerPointer A, 
                                                          int lda,
                                                          @Cast("cuComplex*const*") PointerPointer B, 
                                                          int ldb,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasCtrsmBatched( cublasContext handle, 
                                                          @Cast("cublasSideMode_t") int side, 
                                                          @Cast("cublasFillMode_t") int uplo,
                                                          @Cast("cublasOperation_t") int trans, 
                                                          @Cast("cublasDiagType_t") int diag,
                                                          int m, 
                                                          int n, 
                                                          @Cast("const cuComplex*") float2 alpha,
                                                          @Cast("const cuComplex*const*") @ByPtrPtr float2 A, 
                                                          int lda,
                                                          @Cast("cuComplex*const*") @ByPtrPtr float2 B, 
                                                          int ldb,
                                                          int batchCount);

public static native @Cast("cublasStatus_t") int cublasZtrsmBatched( cublasContext handle, 
                                                          @Cast("cublasSideMode_t") int side, 
                                                          @Cast("cublasFillMode_t") int uplo,
                                                          @Cast("cublasOperation_t") int trans, 
                                                          @Cast("cublasDiagType_t") int diag,
                                                          int m, 
                                                          int n, 
                                                          @Cast("const cuDoubleComplex*") double2 alpha,
                                                          @Cast("const cuDoubleComplex*const*") PointerPointer A, 
                                                          int lda,
                                                          @Cast("cuDoubleComplex*const*") PointerPointer B, 
                                                          int ldb,
                                                          int batchCount);
public static native @Cast("cublasStatus_t") int cublasZtrsmBatched( cublasContext handle, 
                                                          @Cast("cublasSideMode_t") int side, 
                                                          @Cast("cublasFillMode_t") int uplo,
                                                          @Cast("cublasOperation_t") int trans, 
                                                          @Cast("cublasDiagType_t") int diag,
                                                          int m, 
                                                          int n, 
                                                          @Cast("const cuDoubleComplex*") double2 alpha,
                                                          @Cast("const cuDoubleComplex*const*") @ByPtrPtr double2 A, 
                                                          int lda,
                                                          @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 B, 
                                                          int ldb,
                                                          int batchCount);

/* Batched - MATINV*/
public static native @Cast("cublasStatus_t") int cublasSmatinvBatched(cublasContext handle,
                                                          int n, 
                                                          @Cast("const float*const*") PointerPointer A,
                                                          int lda, 
                                                          @Cast("float*const*") PointerPointer Ainv,
                                                          int lda_inv, 
                                                          IntPointer info,
                                                          int batchSize);
public static native @Cast("cublasStatus_t") int cublasSmatinvBatched(cublasContext handle,
                                                          int n, 
                                                          @Const @ByPtrPtr FloatPointer A,
                                                          int lda, 
                                                          @ByPtrPtr FloatPointer Ainv,
                                                          int lda_inv, 
                                                          IntPointer info,
                                                          int batchSize);
public static native @Cast("cublasStatus_t") int cublasSmatinvBatched(cublasContext handle,
                                                          int n, 
                                                          @Const @ByPtrPtr FloatBuffer A,
                                                          int lda, 
                                                          @ByPtrPtr FloatBuffer Ainv,
                                                          int lda_inv, 
                                                          IntBuffer info,
                                                          int batchSize);
public static native @Cast("cublasStatus_t") int cublasSmatinvBatched(cublasContext handle,
                                                          int n, 
                                                          @Const @ByPtrPtr float[] A,
                                                          int lda, 
                                                          @ByPtrPtr float[] Ainv,
                                                          int lda_inv, 
                                                          int[] info,
                                                          int batchSize);

public static native @Cast("cublasStatus_t") int cublasDmatinvBatched(cublasContext handle,
                                                          int n, 
                                                          @Cast("const double*const*") PointerPointer A,
                                                          int lda, 
                                                          @Cast("double*const*") PointerPointer Ainv,
                                                          int lda_inv, 
                                                          IntPointer info,
                                                          int batchSize);
public static native @Cast("cublasStatus_t") int cublasDmatinvBatched(cublasContext handle,
                                                          int n, 
                                                          @Const @ByPtrPtr DoublePointer A,
                                                          int lda, 
                                                          @ByPtrPtr DoublePointer Ainv,
                                                          int lda_inv, 
                                                          IntPointer info,
                                                          int batchSize);
public static native @Cast("cublasStatus_t") int cublasDmatinvBatched(cublasContext handle,
                                                          int n, 
                                                          @Const @ByPtrPtr DoubleBuffer A,
                                                          int lda, 
                                                          @ByPtrPtr DoubleBuffer Ainv,
                                                          int lda_inv, 
                                                          IntBuffer info,
                                                          int batchSize);
public static native @Cast("cublasStatus_t") int cublasDmatinvBatched(cublasContext handle,
                                                          int n, 
                                                          @Const @ByPtrPtr double[] A,
                                                          int lda, 
                                                          @ByPtrPtr double[] Ainv,
                                                          int lda_inv, 
                                                          int[] info,
                                                          int batchSize);

public static native @Cast("cublasStatus_t") int cublasCmatinvBatched(cublasContext handle,
                                                          int n, 
                                                          @Cast("const cuComplex*const*") PointerPointer A,
                                                          int lda, 
                                                          @Cast("cuComplex*const*") PointerPointer Ainv,
                                                          int lda_inv, 
                                                          IntPointer info,
                                                          int batchSize);
public static native @Cast("cublasStatus_t") int cublasCmatinvBatched(cublasContext handle,
                                                          int n, 
                                                          @Cast("const cuComplex*const*") @ByPtrPtr float2 A,
                                                          int lda, 
                                                          @Cast("cuComplex*const*") @ByPtrPtr float2 Ainv,
                                                          int lda_inv, 
                                                          IntPointer info,
                                                          int batchSize);
public static native @Cast("cublasStatus_t") int cublasCmatinvBatched(cublasContext handle,
                                                          int n, 
                                                          @Cast("const cuComplex*const*") @ByPtrPtr float2 A,
                                                          int lda, 
                                                          @Cast("cuComplex*const*") @ByPtrPtr float2 Ainv,
                                                          int lda_inv, 
                                                          IntBuffer info,
                                                          int batchSize);
public static native @Cast("cublasStatus_t") int cublasCmatinvBatched(cublasContext handle,
                                                          int n, 
                                                          @Cast("const cuComplex*const*") @ByPtrPtr float2 A,
                                                          int lda, 
                                                          @Cast("cuComplex*const*") @ByPtrPtr float2 Ainv,
                                                          int lda_inv, 
                                                          int[] info,
                                                          int batchSize);

public static native @Cast("cublasStatus_t") int cublasZmatinvBatched(cublasContext handle,
                                                          int n, 
                                                          @Cast("const cuDoubleComplex*const*") PointerPointer A,
                                                          int lda, 
                                                          @Cast("cuDoubleComplex*const*") PointerPointer Ainv,
                                                          int lda_inv, 
                                                          IntPointer info,
                                                          int batchSize);
public static native @Cast("cublasStatus_t") int cublasZmatinvBatched(cublasContext handle,
                                                          int n, 
                                                          @Cast("const cuDoubleComplex*const*") @ByPtrPtr double2 A,
                                                          int lda, 
                                                          @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 Ainv,
                                                          int lda_inv, 
                                                          IntPointer info,
                                                          int batchSize);
public static native @Cast("cublasStatus_t") int cublasZmatinvBatched(cublasContext handle,
                                                          int n, 
                                                          @Cast("const cuDoubleComplex*const*") @ByPtrPtr double2 A,
                                                          int lda, 
                                                          @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 Ainv,
                                                          int lda_inv, 
                                                          IntBuffer info,
                                                          int batchSize);
public static native @Cast("cublasStatus_t") int cublasZmatinvBatched(cublasContext handle,
                                                          int n, 
                                                          @Cast("const cuDoubleComplex*const*") @ByPtrPtr double2 A,
                                                          int lda, 
                                                          @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 Ainv,
                                                          int lda_inv, 
                                                          int[] info,
                                                          int batchSize);

/* Batch QR Factorization */
public static native @Cast("cublasStatus_t") int cublasSgeqrfBatched( cublasContext handle, 
                                                           int m, 
                                                           int n,
                                                           @Cast("float*const*") PointerPointer Aarray,
                                                           int lda,
                                                           @Cast("float*const*") PointerPointer TauArray,                                                           
                                                           IntPointer info,
                                                           int batchSize);
public static native @Cast("cublasStatus_t") int cublasSgeqrfBatched( cublasContext handle, 
                                                           int m, 
                                                           int n,
                                                           @ByPtrPtr FloatPointer Aarray,
                                                           int lda,
                                                           @ByPtrPtr FloatPointer TauArray,                                                           
                                                           IntPointer info,
                                                           int batchSize);
public static native @Cast("cublasStatus_t") int cublasSgeqrfBatched( cublasContext handle, 
                                                           int m, 
                                                           int n,
                                                           @ByPtrPtr FloatBuffer Aarray,
                                                           int lda,
                                                           @ByPtrPtr FloatBuffer TauArray,                                                           
                                                           IntBuffer info,
                                                           int batchSize);
public static native @Cast("cublasStatus_t") int cublasSgeqrfBatched( cublasContext handle, 
                                                           int m, 
                                                           int n,
                                                           @ByPtrPtr float[] Aarray,
                                                           int lda,
                                                           @ByPtrPtr float[] TauArray,                                                           
                                                           int[] info,
                                                           int batchSize);

public static native @Cast("cublasStatus_t") int cublasDgeqrfBatched( cublasContext handle, 
                                                            int m, 
                                                            int n,
                                                            @Cast("double*const*") PointerPointer Aarray,
                                                            int lda, 
                                                            @Cast("double*const*") PointerPointer TauArray,                                                            
                                                            IntPointer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasDgeqrfBatched( cublasContext handle, 
                                                            int m, 
                                                            int n,
                                                            @ByPtrPtr DoublePointer Aarray,
                                                            int lda, 
                                                            @ByPtrPtr DoublePointer TauArray,                                                            
                                                            IntPointer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasDgeqrfBatched( cublasContext handle, 
                                                            int m, 
                                                            int n,
                                                            @ByPtrPtr DoubleBuffer Aarray,
                                                            int lda, 
                                                            @ByPtrPtr DoubleBuffer TauArray,                                                            
                                                            IntBuffer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasDgeqrfBatched( cublasContext handle, 
                                                            int m, 
                                                            int n,
                                                            @ByPtrPtr double[] Aarray,
                                                            int lda, 
                                                            @ByPtrPtr double[] TauArray,                                                            
                                                            int[] info,
                                                            int batchSize);

public static native @Cast("cublasStatus_t") int cublasCgeqrfBatched( cublasContext handle, 
                                                            int m, 
                                                            int n,
                                                            @Cast("cuComplex*const*") PointerPointer Aarray,
                                                            int lda, 
                                                            @Cast("cuComplex*const*") PointerPointer TauArray,                                                            
                                                            IntPointer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasCgeqrfBatched( cublasContext handle, 
                                                            int m, 
                                                            int n,
                                                            @Cast("cuComplex*const*") @ByPtrPtr float2 Aarray,
                                                            int lda, 
                                                            @Cast("cuComplex*const*") @ByPtrPtr float2 TauArray,                                                            
                                                            IntPointer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasCgeqrfBatched( cublasContext handle, 
                                                            int m, 
                                                            int n,
                                                            @Cast("cuComplex*const*") @ByPtrPtr float2 Aarray,
                                                            int lda, 
                                                            @Cast("cuComplex*const*") @ByPtrPtr float2 TauArray,                                                            
                                                            IntBuffer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasCgeqrfBatched( cublasContext handle, 
                                                            int m, 
                                                            int n,
                                                            @Cast("cuComplex*const*") @ByPtrPtr float2 Aarray,
                                                            int lda, 
                                                            @Cast("cuComplex*const*") @ByPtrPtr float2 TauArray,                                                            
                                                            int[] info,
                                                            int batchSize);
                                                            
public static native @Cast("cublasStatus_t") int cublasZgeqrfBatched( cublasContext handle, 
                                                            int m, 
                                                            int n,
                                                            @Cast("cuDoubleComplex*const*") PointerPointer Aarray,
                                                            int lda,
                                                            @Cast("cuDoubleComplex*const*") PointerPointer TauArray,
                                                            IntPointer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasZgeqrfBatched( cublasContext handle, 
                                                            int m, 
                                                            int n,
                                                            @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 Aarray,
                                                            int lda,
                                                            @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 TauArray,
                                                            IntPointer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasZgeqrfBatched( cublasContext handle, 
                                                            int m, 
                                                            int n,
                                                            @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 Aarray,
                                                            int lda,
                                                            @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 TauArray,
                                                            IntBuffer info,
                                                            int batchSize);
public static native @Cast("cublasStatus_t") int cublasZgeqrfBatched( cublasContext handle, 
                                                            int m, 
                                                            int n,
                                                            @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 Aarray,
                                                            int lda,
                                                            @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 TauArray,
                                                            int[] info,
                                                            int batchSize);
/* Least Square Min only m >= n and Non-transpose supported */
public static native @Cast("cublasStatus_t") int cublasSgelsBatched( cublasContext handle, 
                                                           @Cast("cublasOperation_t") int trans, 
                                                           int m,  
                                                           int n,
                                                           int nrhs,
                                                           @Cast("float*const*") PointerPointer Aarray,
                                                           int lda, 
                                                           @Cast("float*const*") PointerPointer Carray,
                                                           int ldc,
                                                           IntPointer info, 
                                                           IntPointer devInfoArray,
                                                           int batchSize );
public static native @Cast("cublasStatus_t") int cublasSgelsBatched( cublasContext handle, 
                                                           @Cast("cublasOperation_t") int trans, 
                                                           int m,  
                                                           int n,
                                                           int nrhs,
                                                           @ByPtrPtr FloatPointer Aarray,
                                                           int lda, 
                                                           @ByPtrPtr FloatPointer Carray,
                                                           int ldc,
                                                           IntPointer info, 
                                                           IntPointer devInfoArray,
                                                           int batchSize );
public static native @Cast("cublasStatus_t") int cublasSgelsBatched( cublasContext handle, 
                                                           @Cast("cublasOperation_t") int trans, 
                                                           int m,  
                                                           int n,
                                                           int nrhs,
                                                           @ByPtrPtr FloatBuffer Aarray,
                                                           int lda, 
                                                           @ByPtrPtr FloatBuffer Carray,
                                                           int ldc,
                                                           IntBuffer info, 
                                                           IntBuffer devInfoArray,
                                                           int batchSize );
public static native @Cast("cublasStatus_t") int cublasSgelsBatched( cublasContext handle, 
                                                           @Cast("cublasOperation_t") int trans, 
                                                           int m,  
                                                           int n,
                                                           int nrhs,
                                                           @ByPtrPtr float[] Aarray,
                                                           int lda, 
                                                           @ByPtrPtr float[] Carray,
                                                           int ldc,
                                                           int[] info, 
                                                           int[] devInfoArray,
                                                           int batchSize );
                                                                
public static native @Cast("cublasStatus_t") int cublasDgelsBatched( cublasContext handle,
                                                           @Cast("cublasOperation_t") int trans,
                                                           int m,
                                                           int n,
                                                           int nrhs,
                                                           @Cast("double*const*") PointerPointer Aarray,
                                                           int lda, 
                                                           @Cast("double*const*") PointerPointer Carray,
                                                           int ldc,
                                                           IntPointer info, 
                                                           IntPointer devInfoArray,
                                                           int batchSize);
public static native @Cast("cublasStatus_t") int cublasDgelsBatched( cublasContext handle,
                                                           @Cast("cublasOperation_t") int trans,
                                                           int m,
                                                           int n,
                                                           int nrhs,
                                                           @ByPtrPtr DoublePointer Aarray,
                                                           int lda, 
                                                           @ByPtrPtr DoublePointer Carray,
                                                           int ldc,
                                                           IntPointer info, 
                                                           IntPointer devInfoArray,
                                                           int batchSize);
public static native @Cast("cublasStatus_t") int cublasDgelsBatched( cublasContext handle,
                                                           @Cast("cublasOperation_t") int trans,
                                                           int m,
                                                           int n,
                                                           int nrhs,
                                                           @ByPtrPtr DoubleBuffer Aarray,
                                                           int lda, 
                                                           @ByPtrPtr DoubleBuffer Carray,
                                                           int ldc,
                                                           IntBuffer info, 
                                                           IntBuffer devInfoArray,
                                                           int batchSize);
public static native @Cast("cublasStatus_t") int cublasDgelsBatched( cublasContext handle,
                                                           @Cast("cublasOperation_t") int trans,
                                                           int m,
                                                           int n,
                                                           int nrhs,
                                                           @ByPtrPtr double[] Aarray,
                                                           int lda, 
                                                           @ByPtrPtr double[] Carray,
                                                           int ldc,
                                                           int[] info, 
                                                           int[] devInfoArray,
                                                           int batchSize);
                                                                
public static native @Cast("cublasStatus_t") int cublasCgelsBatched( cublasContext handle,
                                                           @Cast("cublasOperation_t") int trans,
                                                           int m,
                                                           int n,
                                                           int nrhs,
                                                           @Cast("cuComplex*const*") PointerPointer Aarray,
                                                           int lda,
                                                           @Cast("cuComplex*const*") PointerPointer Carray,
                                                           int ldc,
                                                           IntPointer info,
                                                           IntPointer devInfoArray,
                                                           int batchSize);
public static native @Cast("cublasStatus_t") int cublasCgelsBatched( cublasContext handle,
                                                           @Cast("cublasOperation_t") int trans,
                                                           int m,
                                                           int n,
                                                           int nrhs,
                                                           @Cast("cuComplex*const*") @ByPtrPtr float2 Aarray,
                                                           int lda,
                                                           @Cast("cuComplex*const*") @ByPtrPtr float2 Carray,
                                                           int ldc,
                                                           IntPointer info,
                                                           IntPointer devInfoArray,
                                                           int batchSize);
public static native @Cast("cublasStatus_t") int cublasCgelsBatched( cublasContext handle,
                                                           @Cast("cublasOperation_t") int trans,
                                                           int m,
                                                           int n,
                                                           int nrhs,
                                                           @Cast("cuComplex*const*") @ByPtrPtr float2 Aarray,
                                                           int lda,
                                                           @Cast("cuComplex*const*") @ByPtrPtr float2 Carray,
                                                           int ldc,
                                                           IntBuffer info,
                                                           IntBuffer devInfoArray,
                                                           int batchSize);
public static native @Cast("cublasStatus_t") int cublasCgelsBatched( cublasContext handle,
                                                           @Cast("cublasOperation_t") int trans,
                                                           int m,
                                                           int n,
                                                           int nrhs,
                                                           @Cast("cuComplex*const*") @ByPtrPtr float2 Aarray,
                                                           int lda,
                                                           @Cast("cuComplex*const*") @ByPtrPtr float2 Carray,
                                                           int ldc,
                                                           int[] info,
                                                           int[] devInfoArray,
                                                           int batchSize);
                                                                
public static native @Cast("cublasStatus_t") int cublasZgelsBatched( cublasContext handle,
                                                           @Cast("cublasOperation_t") int trans,
                                                           int m,
                                                           int n,
                                                           int nrhs,
                                                           @Cast("cuDoubleComplex*const*") PointerPointer Aarray,
                                                           int lda,
                                                           @Cast("cuDoubleComplex*const*") PointerPointer Carray,
                                                           int ldc,
                                                           IntPointer info,
                                                           IntPointer devInfoArray,
                                                           int batchSize);
public static native @Cast("cublasStatus_t") int cublasZgelsBatched( cublasContext handle,
                                                           @Cast("cublasOperation_t") int trans,
                                                           int m,
                                                           int n,
                                                           int nrhs,
                                                           @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 Aarray,
                                                           int lda,
                                                           @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 Carray,
                                                           int ldc,
                                                           IntPointer info,
                                                           IntPointer devInfoArray,
                                                           int batchSize);
public static native @Cast("cublasStatus_t") int cublasZgelsBatched( cublasContext handle,
                                                           @Cast("cublasOperation_t") int trans,
                                                           int m,
                                                           int n,
                                                           int nrhs,
                                                           @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 Aarray,
                                                           int lda,
                                                           @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 Carray,
                                                           int ldc,
                                                           IntBuffer info,
                                                           IntBuffer devInfoArray,
                                                           int batchSize);
public static native @Cast("cublasStatus_t") int cublasZgelsBatched( cublasContext handle,
                                                           @Cast("cublasOperation_t") int trans,
                                                           int m,
                                                           int n,
                                                           int nrhs,
                                                           @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 Aarray,
                                                           int lda,
                                                           @Cast("cuDoubleComplex*const*") @ByPtrPtr double2 Carray,
                                                           int ldc,
                                                           int[] info,
                                                           int[] devInfoArray,
                                                           int batchSize);
/* DGMM */
public static native @Cast("cublasStatus_t") int cublasSdgmm(cublasContext handle,
                                                  @Cast("cublasSideMode_t") int mode, 
                                                  int m, 
                                                  int n,
                                                  @Const FloatPointer A, 
                                                  int lda,
                                                  @Const FloatPointer x, 
                                                  int incx,
                                                  FloatPointer C, 
                                                  int ldc);
public static native @Cast("cublasStatus_t") int cublasSdgmm(cublasContext handle,
                                                  @Cast("cublasSideMode_t") int mode, 
                                                  int m, 
                                                  int n,
                                                  @Const FloatBuffer A, 
                                                  int lda,
                                                  @Const FloatBuffer x, 
                                                  int incx,
                                                  FloatBuffer C, 
                                                  int ldc);
public static native @Cast("cublasStatus_t") int cublasSdgmm(cublasContext handle,
                                                  @Cast("cublasSideMode_t") int mode, 
                                                  int m, 
                                                  int n,
                                                  @Const float[] A, 
                                                  int lda,
                                                  @Const float[] x, 
                                                  int incx,
                                                  float[] C, 
                                                  int ldc);
    
public static native @Cast("cublasStatus_t") int cublasDdgmm(cublasContext handle,
                                                  @Cast("cublasSideMode_t") int mode, 
                                                  int m, 
                                                  int n,
                                                  @Const DoublePointer A, 
                                                  int lda,
                                                  @Const DoublePointer x, 
                                                  int incx,
                                                  DoublePointer C, 
                                                  int ldc);
public static native @Cast("cublasStatus_t") int cublasDdgmm(cublasContext handle,
                                                  @Cast("cublasSideMode_t") int mode, 
                                                  int m, 
                                                  int n,
                                                  @Const DoubleBuffer A, 
                                                  int lda,
                                                  @Const DoubleBuffer x, 
                                                  int incx,
                                                  DoubleBuffer C, 
                                                  int ldc);
public static native @Cast("cublasStatus_t") int cublasDdgmm(cublasContext handle,
                                                  @Cast("cublasSideMode_t") int mode, 
                                                  int m, 
                                                  int n,
                                                  @Const double[] A, 
                                                  int lda,
                                                  @Const double[] x, 
                                                  int incx,
                                                  double[] C, 
                                                  int ldc);

public static native @Cast("cublasStatus_t") int cublasCdgmm(cublasContext handle,
                                                  @Cast("cublasSideMode_t") int mode, 
                                                  int m, 
                                                  int n,
                                                  @Cast("const cuComplex*") float2 A, 
                                                  int lda,
                                                  @Cast("const cuComplex*") float2 x, 
                                                  int incx,
                                                  @Cast("cuComplex*") float2 C, 
                                                  int ldc);
    
public static native @Cast("cublasStatus_t") int cublasZdgmm(cublasContext handle,
                                                  @Cast("cublasSideMode_t") int mode, 
                                                  int m, 
                                                  int n,
                                                  @Cast("const cuDoubleComplex*") double2 A, 
                                                  int lda,
                                                  @Cast("const cuDoubleComplex*") double2 x, 
                                                  int incx,
                                                  @Cast("cuDoubleComplex*") double2 C, 
                                                  int ldc);

/* TPTTR : Triangular Pack format to Triangular format */
public static native @Cast("cublasStatus_t") int cublasStpttr( cublasContext handle, 
                                                     @Cast("cublasFillMode_t") int uplo, 
                                                     int n,                                     
                                                     @Const FloatPointer AP,
                                                     FloatPointer A,  
                                                     int lda );
public static native @Cast("cublasStatus_t") int cublasStpttr( cublasContext handle, 
                                                     @Cast("cublasFillMode_t") int uplo, 
                                                     int n,                                     
                                                     @Const FloatBuffer AP,
                                                     FloatBuffer A,  
                                                     int lda );
public static native @Cast("cublasStatus_t") int cublasStpttr( cublasContext handle, 
                                                     @Cast("cublasFillMode_t") int uplo, 
                                                     int n,                                     
                                                     @Const float[] AP,
                                                     float[] A,  
                                                     int lda );
                                       
public static native @Cast("cublasStatus_t") int cublasDtpttr( cublasContext handle, 
                                                     @Cast("cublasFillMode_t") int uplo, 
                                                     int n,                                     
                                                     @Const DoublePointer AP,
                                                     DoublePointer A,  
                                                     int lda );
public static native @Cast("cublasStatus_t") int cublasDtpttr( cublasContext handle, 
                                                     @Cast("cublasFillMode_t") int uplo, 
                                                     int n,                                     
                                                     @Const DoubleBuffer AP,
                                                     DoubleBuffer A,  
                                                     int lda );
public static native @Cast("cublasStatus_t") int cublasDtpttr( cublasContext handle, 
                                                     @Cast("cublasFillMode_t") int uplo, 
                                                     int n,                                     
                                                     @Const double[] AP,
                                                     double[] A,  
                                                     int lda );
                                      
public static native @Cast("cublasStatus_t") int cublasCtpttr( cublasContext handle, 
                                                     @Cast("cublasFillMode_t") int uplo, 
                                                     int n,                                     
                                                     @Cast("const cuComplex*") float2 AP,
                                                     @Cast("cuComplex*") float2 A,  
                                                     int lda );
                                                    
public static native @Cast("cublasStatus_t") int cublasZtpttr( cublasContext handle, 
                                                     @Cast("cublasFillMode_t") int uplo, 
                                                     int n,                                     
                                                     @Cast("const cuDoubleComplex*") double2 AP,
                                                     @Cast("cuDoubleComplex*") double2 A,  
                                                     int lda );
 /* TRTTP : Triangular format to Triangular Pack format */                                      
public static native @Cast("cublasStatus_t") int cublasStrttp( cublasContext handle, 
                                                     @Cast("cublasFillMode_t") int uplo, 
                                                     int n,                                     
                                                     @Const FloatPointer A,
                                                     int lda,
                                                     FloatPointer AP );
public static native @Cast("cublasStatus_t") int cublasStrttp( cublasContext handle, 
                                                     @Cast("cublasFillMode_t") int uplo, 
                                                     int n,                                     
                                                     @Const FloatBuffer A,
                                                     int lda,
                                                     FloatBuffer AP );
public static native @Cast("cublasStatus_t") int cublasStrttp( cublasContext handle, 
                                                     @Cast("cublasFillMode_t") int uplo, 
                                                     int n,                                     
                                                     @Const float[] A,
                                                     int lda,
                                                     float[] AP );
                                      
public static native @Cast("cublasStatus_t") int cublasDtrttp( cublasContext handle, 
                                                     @Cast("cublasFillMode_t") int uplo, 
                                                     int n,                                     
                                                     @Const DoublePointer A,
                                                     int lda,
                                                     DoublePointer AP );
public static native @Cast("cublasStatus_t") int cublasDtrttp( cublasContext handle, 
                                                     @Cast("cublasFillMode_t") int uplo, 
                                                     int n,                                     
                                                     @Const DoubleBuffer A,
                                                     int lda,
                                                     DoubleBuffer AP );
public static native @Cast("cublasStatus_t") int cublasDtrttp( cublasContext handle, 
                                                     @Cast("cublasFillMode_t") int uplo, 
                                                     int n,                                     
                                                     @Const double[] A,
                                                     int lda,
                                                     double[] AP );
                                      
public static native @Cast("cublasStatus_t") int cublasCtrttp( cublasContext handle, 
                                                     @Cast("cublasFillMode_t") int uplo, 
                                                     int n,                                     
                                                     @Cast("const cuComplex*") float2 A,
                                                     int lda,
                                                     @Cast("cuComplex*") float2 AP );
                                                     
public static native @Cast("cublasStatus_t") int cublasZtrttp( cublasContext handle, 
                                                     @Cast("cublasFillMode_t") int uplo, 
                                                     int n,                                     
                                                     @Cast("const cuDoubleComplex*") double2 A,
                                                     int lda,
                                                     @Cast("cuDoubleComplex*") double2 AP );                                        


// #if defined(__cplusplus)

public static native @Cast("cublasStatus_t") int cublasMigrateComputeType(cublasContext handle,
                                                      @Cast("cudaDataType_t") int dataType,
                                                      @Cast("cublasComputeType_t*") IntPointer computeType);
public static native @Cast("cublasStatus_t") int cublasMigrateComputeType(cublasContext handle,
                                                      @Cast("cudaDataType_t") int dataType,
                                                      @Cast("cublasComputeType_t*") IntBuffer computeType);
public static native @Cast("cublasStatus_t") int cublasMigrateComputeType(cublasContext handle,
                                                      @Cast("cudaDataType_t") int dataType,
                                                      @Cast("cublasComputeType_t*") int[] computeType);
/* wrappers to accept old code with cudaDataType computeType when referenced from c++ code */
// #endif /* __cplusplus */

// #endif /* !defined(CUBLAS_API_H_) */


// Parsed from <cublas.h>

/*
 * Copyright 1993-2019 NVIDIA Corporation. All rights reserved.
 *
 * NOTICE TO LICENSEE:
 *
 * This source code and/or documentation ("Licensed Deliverables") are
 * subject to NVIDIA intellectual property rights under U.S. and
 * international Copyright laws.
 *
 * These Licensed Deliverables contained herein is PROPRIETARY and
 * CONFIDENTIAL to NVIDIA and is being provided under the terms and
 * conditions of a form of NVIDIA software license agreement by and
 * between NVIDIA and Licensee ("License Agreement") or electronically
 * accepted by Licensee.  Notwithstanding any terms or conditions to
 * the contrary in the License Agreement, reproduction or disclosure
 * of the Licensed Deliverables to any third party without the express
 * written consent of NVIDIA is prohibited.
 *
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THESE LICENSED DELIVERABLES.
 *
 * U.S. Government End Users.  These Licensed Deliverables are a
 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 * 1995), consisting of "commercial computer software" and "commercial
 * computer software documentation" as such terms are used in 48
 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 * U.S. Government End Users acquire the Licensed Deliverables with
 * only those rights set forth herein.
 *
 * Any use of the Licensed Deliverables in individual and commercial
 * software must include, in the user documentation and internal
 * comments to the code, the above Disclaimer and U.S. Government End
 * Users Notice.
 */
 
/*
 * This is the public header file for the CUBLAS library, defining the API
 *
 * CUBLAS is an implementation of BLAS (Basic Linear Algebra Subroutines) 
 * on top of the CUDA runtime. 
 */

// #if !defined(CUBLAS_H_)
// #define CUBLAS_H_

// #include <cuda_runtime.h>

// #ifndef CUBLASWINAPI
// #ifdef _WIN32
// #define CUBLASWINAPI __stdcall
// #else
// #define CUBLASWINAPI 
// #endif
// #endif

// #undef CUBLASAPI
// #ifdef __CUDACC__
// #define CUBLASAPI __host__
// #else
// #define CUBLASAPI
// #endif

// #include "cublas_api.h"

// #if defined(__cplusplus)
// #endif

/* CUBLAS data types */
// #define cublasStatus cublasStatus_t

public static native @Cast("cublasStatus_t") int cublasInit();
public static native @Cast("cublasStatus_t") int cublasShutdown();
public static native @Cast("cublasStatus_t") int cublasGetError();

public static native @Cast("cublasStatus_t") int cublasGetVersion(IntPointer version);
public static native @Cast("cublasStatus_t") int cublasGetVersion(IntBuffer version);
public static native @Cast("cublasStatus_t") int cublasGetVersion(int[] version);
public static native @Cast("cublasStatus_t") int cublasAlloc(int n, int elemSize, @Cast("void**") PointerPointer devicePtr);
public static native @Cast("cublasStatus_t") int cublasAlloc(int n, int elemSize, @Cast("void**") @ByPtrPtr Pointer devicePtr);

public static native @Cast("cublasStatus_t") int cublasFree(Pointer devicePtr);


public static native @Cast("cublasStatus_t") int cublasSetKernelStream(CUstream_st stream);



/* ---------------- CUBLAS BLAS1 functions ---------------- */
/* NRM2 */
public static native float cublasSnrm2(int n, @Const FloatPointer x, int incx);
public static native float cublasSnrm2(int n, @Const FloatBuffer x, int incx);
public static native float cublasSnrm2(int n, @Const float[] x, int incx);
public static native double cublasDnrm2(int n, @Const DoublePointer x, int incx);
public static native double cublasDnrm2(int n, @Const DoubleBuffer x, int incx);
public static native double cublasDnrm2(int n, @Const double[] x, int incx);
public static native float cublasScnrm2(int n, @Cast("const cuComplex*") float2 x, int incx);
public static native double cublasDznrm2(int n, @Cast("const cuDoubleComplex*") double2 x, int incx);
/*------------------------------------------------------------------------*/
/* DOT */
public static native float cublasSdot(int n, @Const FloatPointer x, int incx, @Const FloatPointer y, 
                               int incy);
public static native float cublasSdot(int n, @Const FloatBuffer x, int incx, @Const FloatBuffer y, 
                               int incy);
public static native float cublasSdot(int n, @Const float[] x, int incx, @Const float[] y, 
                               int incy);
public static native double cublasDdot(int n, @Const DoublePointer x, int incx, @Const DoublePointer y, 
                               int incy);
public static native double cublasDdot(int n, @Const DoubleBuffer x, int incx, @Const DoubleBuffer y, 
                               int incy);
public static native double cublasDdot(int n, @Const double[] x, int incx, @Const double[] y, 
                               int incy);
public static native @ByVal @Cast("cuComplex*") float2 cublasCdotu(int n, @Cast("const cuComplex*") float2 x, int incx, @Cast("const cuComplex*") float2 y, 
                               int incy);
public static native @ByVal @Cast("cuComplex*") float2 cublasCdotc(int n, @Cast("const cuComplex*") float2 x, int incx, @Cast("const cuComplex*") float2 y, 
                               int incy);
public static native @ByVal @Cast("cuDoubleComplex*") double2 cublasZdotu(int n, @Cast("const cuDoubleComplex*") double2 x, int incx, @Cast("const cuDoubleComplex*") double2 y, 
                               int incy);
public static native @ByVal @Cast("cuDoubleComplex*") double2 cublasZdotc(int n, @Cast("const cuDoubleComplex*") double2 x, int incx, @Cast("const cuDoubleComplex*") double2 y, 
                               int incy);
/*------------------------------------------------------------------------*/
/* SCAL */
public static native void cublasSscal(int n, float alpha, FloatPointer x, int incx);
public static native void cublasSscal(int n, float alpha, FloatBuffer x, int incx);
public static native void cublasSscal(int n, float alpha, float[] x, int incx);
public static native void cublasDscal(int n, double alpha, DoublePointer x, int incx);
public static native void cublasDscal(int n, double alpha, DoubleBuffer x, int incx);
public static native void cublasDscal(int n, double alpha, double[] x, int incx);
public static native void cublasCscal(int n, @ByVal @Cast("cuComplex*") float2 alpha, @Cast("cuComplex*") float2 x, int incx);
public static native void cublasZscal(int n, @ByVal @Cast("cuDoubleComplex*") double2 alpha, @Cast("cuDoubleComplex*") double2 x, int incx);

public static native void cublasCsscal(int n, float alpha, @Cast("cuComplex*") float2 x, int incx);
public static native void cublasZdscal(int n, double alpha, @Cast("cuDoubleComplex*") double2 x, int incx);
/*------------------------------------------------------------------------*/
/* AXPY */
public static native void cublasSaxpy(int n, float alpha, @Const FloatPointer x, int incx, 
                               FloatPointer y, int incy);
public static native void cublasSaxpy(int n, float alpha, @Const FloatBuffer x, int incx, 
                               FloatBuffer y, int incy);
public static native void cublasSaxpy(int n, float alpha, @Const float[] x, int incx, 
                               float[] y, int incy);
public static native void cublasDaxpy(int n, double alpha, @Const DoublePointer x, 
                               int incx, DoublePointer y, int incy);
public static native void cublasDaxpy(int n, double alpha, @Const DoubleBuffer x, 
                               int incx, DoubleBuffer y, int incy);
public static native void cublasDaxpy(int n, double alpha, @Const double[] x, 
                               int incx, double[] y, int incy);
public static native void cublasCaxpy(int n, @ByVal @Cast("cuComplex*") float2 alpha, @Cast("const cuComplex*") float2 x, 
                               int incx, @Cast("cuComplex*") float2 y, int incy);
public static native void cublasZaxpy(int n, @ByVal @Cast("cuDoubleComplex*") double2 alpha, @Cast("const cuDoubleComplex*") double2 x, 
                               int incx, @Cast("cuDoubleComplex*") double2 y, int incy);
/*------------------------------------------------------------------------*/
/* COPY */
public static native void cublasScopy(int n, @Const FloatPointer x, int incx, FloatPointer y, 
                               int incy);
public static native void cublasScopy(int n, @Const FloatBuffer x, int incx, FloatBuffer y, 
                               int incy);
public static native void cublasScopy(int n, @Const float[] x, int incx, float[] y, 
                               int incy);
public static native void cublasDcopy(int n, @Const DoublePointer x, int incx, DoublePointer y, 
                               int incy);
public static native void cublasDcopy(int n, @Const DoubleBuffer x, int incx, DoubleBuffer y, 
                               int incy);
public static native void cublasDcopy(int n, @Const double[] x, int incx, double[] y, 
                               int incy);
public static native void cublasCcopy(int n, @Cast("const cuComplex*") float2 x, int incx, @Cast("cuComplex*") float2 y,
                               int incy);
public static native void cublasZcopy(int n, @Cast("const cuDoubleComplex*") double2 x, int incx, @Cast("cuDoubleComplex*") double2 y,
                               int incy);
/*------------------------------------------------------------------------*/
/* SWAP */
public static native void cublasSswap(int n, FloatPointer x, int incx, FloatPointer y, int incy);
public static native void cublasSswap(int n, FloatBuffer x, int incx, FloatBuffer y, int incy);
public static native void cublasSswap(int n, float[] x, int incx, float[] y, int incy);
public static native void cublasDswap(int n, DoublePointer x, int incx, DoublePointer y, int incy);
public static native void cublasDswap(int n, DoubleBuffer x, int incx, DoubleBuffer y, int incy);
public static native void cublasDswap(int n, double[] x, int incx, double[] y, int incy);
public static native void cublasCswap(int n, @Cast("cuComplex*") float2 x, int incx, @Cast("cuComplex*") float2 y, int incy);
public static native void cublasZswap(int n, @Cast("cuDoubleComplex*") double2 x, int incx, @Cast("cuDoubleComplex*") double2 y, int incy);           
/*------------------------------------------------------------------------*/
/* AMAX */
public static native int cublasIsamax(int n, @Const FloatPointer x, int incx);
public static native int cublasIsamax(int n, @Const FloatBuffer x, int incx);
public static native int cublasIsamax(int n, @Const float[] x, int incx);
public static native int cublasIdamax(int n, @Const DoublePointer x, int incx);
public static native int cublasIdamax(int n, @Const DoubleBuffer x, int incx);
public static native int cublasIdamax(int n, @Const double[] x, int incx);
public static native int cublasIcamax(int n, @Cast("const cuComplex*") float2 x, int incx);
public static native int cublasIzamax(int n, @Cast("const cuDoubleComplex*") double2 x, int incx);
/*------------------------------------------------------------------------*/
/* AMIN */
public static native int cublasIsamin(int n, @Const FloatPointer x, int incx);
public static native int cublasIsamin(int n, @Const FloatBuffer x, int incx);
public static native int cublasIsamin(int n, @Const float[] x, int incx);
public static native int cublasIdamin(int n, @Const DoublePointer x, int incx);
public static native int cublasIdamin(int n, @Const DoubleBuffer x, int incx);
public static native int cublasIdamin(int n, @Const double[] x, int incx);

public static native int cublasIcamin(int n, @Cast("const cuComplex*") float2 x, int incx);
public static native int cublasIzamin(int n, @Cast("const cuDoubleComplex*") double2 x, int incx);
/*------------------------------------------------------------------------*/
/* ASUM */
public static native float cublasSasum(int n, @Const FloatPointer x, int incx);
public static native float cublasSasum(int n, @Const FloatBuffer x, int incx);
public static native float cublasSasum(int n, @Const float[] x, int incx);
public static native double cublasDasum(int n, @Const DoublePointer x, int incx);
public static native double cublasDasum(int n, @Const DoubleBuffer x, int incx);
public static native double cublasDasum(int n, @Const double[] x, int incx);
public static native float cublasScasum(int n, @Cast("const cuComplex*") float2 x, int incx);
public static native double cublasDzasum(int n, @Cast("const cuDoubleComplex*") double2 x, int incx);
/*------------------------------------------------------------------------*/
/* ROT */
public static native void cublasSrot(int n, FloatPointer x, int incx, FloatPointer y, int incy, 
                              float sc, float ss);
public static native void cublasSrot(int n, FloatBuffer x, int incx, FloatBuffer y, int incy, 
                              float sc, float ss);
public static native void cublasSrot(int n, float[] x, int incx, float[] y, int incy, 
                              float sc, float ss);
public static native void cublasDrot(int n, DoublePointer x, int incx, DoublePointer y, int incy, 
                              double sc, double ss);
public static native void cublasDrot(int n, DoubleBuffer x, int incx, DoubleBuffer y, int incy, 
                              double sc, double ss);
public static native void cublasDrot(int n, double[] x, int incx, double[] y, int incy, 
                              double sc, double ss);
public static native void cublasCrot(int n, @Cast("cuComplex*") float2 x, int incx, @Cast("cuComplex*") float2 y, 
                              int incy, float c, @ByVal @Cast("cuComplex*") float2 s);
public static native void cublasZrot(int n, @Cast("cuDoubleComplex*") double2 x, int incx, 
                              @Cast("cuDoubleComplex*") double2 y, int incy, double sc, 
                              @ByVal @Cast("cuDoubleComplex*") double2 cs);
public static native void cublasCsrot(int n, @Cast("cuComplex*") float2 x, int incx, @Cast("cuComplex*") float2 y,
                               int incy, float c, float s);
public static native void cublasZdrot(int n, @Cast("cuDoubleComplex*") double2 x, int incx, 
                               @Cast("cuDoubleComplex*") double2 y, int incy, double c, double s);
/*------------------------------------------------------------------------*/
/* ROTG */
public static native void cublasSrotg(FloatPointer sa, FloatPointer sb, FloatPointer sc, FloatPointer ss);
public static native void cublasSrotg(FloatBuffer sa, FloatBuffer sb, FloatBuffer sc, FloatBuffer ss);
public static native void cublasSrotg(float[] sa, float[] sb, float[] sc, float[] ss);
public static native void cublasDrotg(DoublePointer sa, DoublePointer sb, DoublePointer sc, DoublePointer ss);
public static native void cublasDrotg(DoubleBuffer sa, DoubleBuffer sb, DoubleBuffer sc, DoubleBuffer ss);
public static native void cublasDrotg(double[] sa, double[] sb, double[] sc, double[] ss);
public static native void cublasCrotg(@Cast("cuComplex*") float2 ca, @ByVal @Cast("cuComplex*") float2 cb, FloatPointer sc,
                               @Cast("cuComplex*") float2 cs);
public static native void cublasCrotg(@Cast("cuComplex*") float2 ca, @ByVal @Cast("cuComplex*") float2 cb, FloatBuffer sc,
                               @Cast("cuComplex*") float2 cs);
public static native void cublasCrotg(@Cast("cuComplex*") float2 ca, @ByVal @Cast("cuComplex*") float2 cb, float[] sc,
                               @Cast("cuComplex*") float2 cs);                                     
public static native void cublasZrotg(@Cast("cuDoubleComplex*") double2 ca, @ByVal @Cast("cuDoubleComplex*") double2 cb, DoublePointer sc,
                               @Cast("cuDoubleComplex*") double2 cs);
public static native void cublasZrotg(@Cast("cuDoubleComplex*") double2 ca, @ByVal @Cast("cuDoubleComplex*") double2 cb, DoubleBuffer sc,
                               @Cast("cuDoubleComplex*") double2 cs);
public static native void cublasZrotg(@Cast("cuDoubleComplex*") double2 ca, @ByVal @Cast("cuDoubleComplex*") double2 cb, double[] sc,
                               @Cast("cuDoubleComplex*") double2 cs);                                                               
/*------------------------------------------------------------------------*/
/* ROTM */
public static native void cublasSrotm(int n, FloatPointer x, int incx, FloatPointer y, int incy, 
                              @Const FloatPointer sparam);
public static native void cublasSrotm(int n, FloatBuffer x, int incx, FloatBuffer y, int incy, 
                              @Const FloatBuffer sparam);
public static native void cublasSrotm(int n, float[] x, int incx, float[] y, int incy, 
                              @Const float[] sparam);
public static native void cublasDrotm(int n, DoublePointer x, int incx, DoublePointer y, int incy, 
                              @Const DoublePointer sparam);
public static native void cublasDrotm(int n, DoubleBuffer x, int incx, DoubleBuffer y, int incy, 
                              @Const DoubleBuffer sparam);
public static native void cublasDrotm(int n, double[] x, int incx, double[] y, int incy, 
                              @Const double[] sparam);
/*------------------------------------------------------------------------*/
/* ROTMG */
public static native void cublasSrotmg(FloatPointer sd1, FloatPointer sd2, FloatPointer sx1, 
                                @Const FloatPointer sy1, FloatPointer sparam);
public static native void cublasSrotmg(FloatBuffer sd1, FloatBuffer sd2, FloatBuffer sx1, 
                                @Const FloatBuffer sy1, FloatBuffer sparam);
public static native void cublasSrotmg(float[] sd1, float[] sd2, float[] sx1, 
                                @Const float[] sy1, float[] sparam);
public static native void cublasDrotmg(DoublePointer sd1, DoublePointer sd2, DoublePointer sx1, 
                                @Const DoublePointer sy1, DoublePointer sparam);
public static native void cublasDrotmg(DoubleBuffer sd1, DoubleBuffer sd2, DoubleBuffer sx1, 
                                @Const DoubleBuffer sy1, DoubleBuffer sparam);
public static native void cublasDrotmg(double[] sd1, double[] sd2, double[] sx1, 
                                @Const double[] sy1, double[] sparam);
                           
/* --------------- CUBLAS BLAS2 functions  ---------------- */
/* GEMV */
public static native void cublasSgemv(@Cast("char") byte trans, int m, int n, float alpha,
                               @Const FloatPointer A, int lda, @Const FloatPointer x, int incx,
                               float beta, FloatPointer y, int incy);
public static native void cublasSgemv(@Cast("char") byte trans, int m, int n, float alpha,
                               @Const FloatBuffer A, int lda, @Const FloatBuffer x, int incx,
                               float beta, FloatBuffer y, int incy);
public static native void cublasSgemv(@Cast("char") byte trans, int m, int n, float alpha,
                               @Const float[] A, int lda, @Const float[] x, int incx,
                               float beta, float[] y, int incy);
public static native void cublasDgemv(@Cast("char") byte trans, int m, int n, double alpha,
                               @Const DoublePointer A, int lda, @Const DoublePointer x, int incx,
                               double beta, DoublePointer y, int incy);
public static native void cublasDgemv(@Cast("char") byte trans, int m, int n, double alpha,
                               @Const DoubleBuffer A, int lda, @Const DoubleBuffer x, int incx,
                               double beta, DoubleBuffer y, int incy);
public static native void cublasDgemv(@Cast("char") byte trans, int m, int n, double alpha,
                               @Const double[] A, int lda, @Const double[] x, int incx,
                               double beta, double[] y, int incy);
public static native void cublasCgemv(@Cast("char") byte trans, int m, int n, @ByVal @Cast("cuComplex*") float2 alpha,
                               @Cast("const cuComplex*") float2 A, int lda, @Cast("const cuComplex*") float2 x, int incx,
                               @ByVal @Cast("cuComplex*") float2 beta, @Cast("cuComplex*") float2 y, int incy);
public static native void cublasZgemv(@Cast("char") byte trans, int m, int n, @ByVal @Cast("cuDoubleComplex*") double2 alpha,
                               @Cast("const cuDoubleComplex*") double2 A, int lda, @Cast("const cuDoubleComplex*") double2 x, int incx,
                               @ByVal @Cast("cuDoubleComplex*") double2 beta, @Cast("cuDoubleComplex*") double2 y, int incy);
/*------------------------------------------------------------------------*/
/* GBMV */
public static native void cublasSgbmv(@Cast("char") byte trans, int m, int n, int kl, int ku, 
                               float alpha, @Const FloatPointer A, int lda, 
                               @Const FloatPointer x, int incx, float beta, FloatPointer y, 
                               int incy);
public static native void cublasSgbmv(@Cast("char") byte trans, int m, int n, int kl, int ku, 
                               float alpha, @Const FloatBuffer A, int lda, 
                               @Const FloatBuffer x, int incx, float beta, FloatBuffer y, 
                               int incy);
public static native void cublasSgbmv(@Cast("char") byte trans, int m, int n, int kl, int ku, 
                               float alpha, @Const float[] A, int lda, 
                               @Const float[] x, int incx, float beta, float[] y, 
                               int incy);
public static native void cublasDgbmv(@Cast("char") byte trans, int m, int n, int kl, int ku, 
                               double alpha, @Const DoublePointer A, int lda, 
                               @Const DoublePointer x, int incx, double beta, DoublePointer y, 
                               int incy);
public static native void cublasDgbmv(@Cast("char") byte trans, int m, int n, int kl, int ku, 
                               double alpha, @Const DoubleBuffer A, int lda, 
                               @Const DoubleBuffer x, int incx, double beta, DoubleBuffer y, 
                               int incy);
public static native void cublasDgbmv(@Cast("char") byte trans, int m, int n, int kl, int ku, 
                               double alpha, @Const double[] A, int lda, 
                               @Const double[] x, int incx, double beta, double[] y, 
                               int incy);
public static native void cublasCgbmv(@Cast("char") byte trans, int m, int n, int kl, int ku, 
                               @ByVal @Cast("cuComplex*") float2 alpha, @Cast("const cuComplex*") float2 A, int lda, 
                               @Cast("const cuComplex*") float2 x, int incx, @ByVal @Cast("cuComplex*") float2 beta, @Cast("cuComplex*") float2 y, 
                               int incy);
public static native void cublasZgbmv(@Cast("char") byte trans, int m, int n, int kl, int ku, 
                               @ByVal @Cast("cuDoubleComplex*") double2 alpha, @Cast("const cuDoubleComplex*") double2 A, int lda, 
                               @Cast("const cuDoubleComplex*") double2 x, int incx, @ByVal @Cast("cuDoubleComplex*") double2 beta, @Cast("cuDoubleComplex*") double2 y, 
                               int incy);                  
/*------------------------------------------------------------------------*/
/* TRMV */
public static native void cublasStrmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, 
                               @Const FloatPointer A, int lda, FloatPointer x, int incx);
public static native void cublasStrmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, 
                               @Const FloatBuffer A, int lda, FloatBuffer x, int incx);
public static native void cublasStrmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, 
                               @Const float[] A, int lda, float[] x, int incx);
public static native void cublasDtrmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, 
                               @Const DoublePointer A, int lda, DoublePointer x, int incx);
public static native void cublasDtrmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, 
                               @Const DoubleBuffer A, int lda, DoubleBuffer x, int incx);
public static native void cublasDtrmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, 
                               @Const double[] A, int lda, double[] x, int incx);
public static native void cublasCtrmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, 
                               @Cast("const cuComplex*") float2 A, int lda, @Cast("cuComplex*") float2 x, int incx);
public static native void cublasZtrmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, 
                               @Cast("const cuDoubleComplex*") double2 A, int lda, @Cast("cuDoubleComplex*") double2 x, int incx);
/*------------------------------------------------------------------------*/
/* TBMV */
public static native void cublasStbmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, int k, 
                               @Const FloatPointer A, int lda, FloatPointer x, int incx);
public static native void cublasStbmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, int k, 
                               @Const FloatBuffer A, int lda, FloatBuffer x, int incx);
public static native void cublasStbmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, int k, 
                               @Const float[] A, int lda, float[] x, int incx);
public static native void cublasDtbmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, int k, 
                               @Const DoublePointer A, int lda, DoublePointer x, int incx);
public static native void cublasDtbmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, int k, 
                               @Const DoubleBuffer A, int lda, DoubleBuffer x, int incx);
public static native void cublasDtbmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, int k, 
                               @Const double[] A, int lda, double[] x, int incx);
public static native void cublasCtbmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, int k, 
                               @Cast("const cuComplex*") float2 A, int lda, @Cast("cuComplex*") float2 x, int incx);
public static native void cublasZtbmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, int k, 
                               @Cast("const cuDoubleComplex*") double2 A, int lda, @Cast("cuDoubleComplex*") double2 x, int incx);
/*------------------------------------------------------------------------*/
/* TPMV */                                                    
public static native void cublasStpmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const FloatPointer AP, FloatPointer x, int incx);
public static native void cublasStpmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const FloatBuffer AP, FloatBuffer x, int incx);
public static native void cublasStpmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const float[] AP, float[] x, int incx);

public static native void cublasDtpmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const DoublePointer AP, DoublePointer x, int incx);
public static native void cublasDtpmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const DoubleBuffer AP, DoubleBuffer x, int incx);
public static native void cublasDtpmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const double[] AP, double[] x, int incx);

public static native void cublasCtpmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Cast("const cuComplex*") float2 AP, @Cast("cuComplex*") float2 x, int incx);
                                         
public static native void cublasZtpmv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Cast("const cuDoubleComplex*") double2 AP, @Cast("cuDoubleComplex*") double2 x, int incx);
/*------------------------------------------------------------------------*/
/* TRSV */
public static native void cublasStrsv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const FloatPointer A, int lda, FloatPointer x, int incx);
public static native void cublasStrsv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const FloatBuffer A, int lda, FloatBuffer x, int incx);
public static native void cublasStrsv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const float[] A, int lda, float[] x, int incx);

public static native void cublasDtrsv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const DoublePointer A, int lda, DoublePointer x, int incx);
public static native void cublasDtrsv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const DoubleBuffer A, int lda, DoubleBuffer x, int incx);
public static native void cublasDtrsv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const double[] A, int lda, double[] x, int incx);

public static native void cublasCtrsv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Cast("const cuComplex*") float2 A, int lda, @Cast("cuComplex*") float2 x, int incx);

public static native void cublasZtrsv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Cast("const cuDoubleComplex*") double2 A, int lda, 
                              @Cast("cuDoubleComplex*") double2 x, int incx);       
/*------------------------------------------------------------------------*/
/* TPSV */
public static native void cublasStpsv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const FloatPointer AP, 
                              FloatPointer x, int incx);
public static native void cublasStpsv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const FloatBuffer AP, 
                              FloatBuffer x, int incx);
public static native void cublasStpsv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const float[] AP, 
                              float[] x, int incx);
                                                                                                            
public static native void cublasDtpsv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const DoublePointer AP, DoublePointer x, int incx);
public static native void cublasDtpsv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const DoubleBuffer AP, DoubleBuffer x, int incx);
public static native void cublasDtpsv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Const double[] AP, double[] x, int incx);

public static native void cublasCtpsv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Cast("const cuComplex*") float2 AP, @Cast("cuComplex*") float2 x, int incx);

public static native void cublasZtpsv(@Cast("char") byte uplo, @Cast("char") byte trans, @Cast("char") byte diag, int n, @Cast("const cuDoubleComplex*") double2 AP, 
                              @Cast("cuDoubleComplex*") double2 x, int incx);
/*------------------------------------------------------------------------*/                                         
/* TBSV */                                         
public static native void cublasStbsv(@Cast("char") byte uplo, @Cast("char") byte trans, 
                              @Cast("char") byte diag, int n, int k, @Const FloatPointer A, 
                              int lda, FloatPointer x, int incx);
public static native void cublasStbsv(@Cast("char") byte uplo, @Cast("char") byte trans, 
                              @Cast("char") byte diag, int n, int k, @Const FloatBuffer A, 
                              int lda, FloatBuffer x, int incx);
public static native void cublasStbsv(@Cast("char") byte uplo, @Cast("char") byte trans, 
                              @Cast("char") byte diag, int n, int k, @Const float[] A, 
                              int lda, float[] x, int incx);
    
public static native void cublasDtbsv(@Cast("char") byte uplo, @Cast("char") byte trans, 
                              @Cast("char") byte diag, int n, int k, @Const DoublePointer A, 
                              int lda, DoublePointer x, int incx);
public static native void cublasDtbsv(@Cast("char") byte uplo, @Cast("char") byte trans, 
                              @Cast("char") byte diag, int n, int k, @Const DoubleBuffer A, 
                              int lda, DoubleBuffer x, int incx);
public static native void cublasDtbsv(@Cast("char") byte uplo, @Cast("char") byte trans, 
                              @Cast("char") byte diag, int n, int k, @Const double[] A, 
                              int lda, double[] x, int incx);
public static native void cublasCtbsv(@Cast("char") byte uplo, @Cast("char") byte trans, 
                              @Cast("char") byte diag, int n, int k, @Cast("const cuComplex*") float2 A, 
                              int lda, @Cast("cuComplex*") float2 x, int incx);      
                                         
public static native void cublasZtbsv(@Cast("char") byte uplo, @Cast("char") byte trans, 
                              @Cast("char") byte diag, int n, int k, @Cast("const cuDoubleComplex*") double2 A, 
                              int lda, @Cast("cuDoubleComplex*") double2 x, int incx);  
/*------------------------------------------------------------------------*/                                         
/* SYMV/HEMV */
public static native void cublasSsymv(@Cast("char") byte uplo, int n, float alpha, @Const FloatPointer A,
                               int lda, @Const FloatPointer x, int incx, float beta, 
                               FloatPointer y, int incy);
public static native void cublasSsymv(@Cast("char") byte uplo, int n, float alpha, @Const FloatBuffer A,
                               int lda, @Const FloatBuffer x, int incx, float beta, 
                               FloatBuffer y, int incy);
public static native void cublasSsymv(@Cast("char") byte uplo, int n, float alpha, @Const float[] A,
                               int lda, @Const float[] x, int incx, float beta, 
                               float[] y, int incy);
public static native void cublasDsymv(@Cast("char") byte uplo, int n, double alpha, @Const DoublePointer A,
                               int lda, @Const DoublePointer x, int incx, double beta, 
                               DoublePointer y, int incy);
public static native void cublasDsymv(@Cast("char") byte uplo, int n, double alpha, @Const DoubleBuffer A,
                               int lda, @Const DoubleBuffer x, int incx, double beta, 
                               DoubleBuffer y, int incy);
public static native void cublasDsymv(@Cast("char") byte uplo, int n, double alpha, @Const double[] A,
                               int lda, @Const double[] x, int incx, double beta, 
                               double[] y, int incy);
public static native void cublasChemv(@Cast("char") byte uplo, int n, @ByVal @Cast("cuComplex*") float2 alpha, @Cast("const cuComplex*") float2 A,
                               int lda, @Cast("const cuComplex*") float2 x, int incx, @ByVal @Cast("cuComplex*") float2 beta, 
                               @Cast("cuComplex*") float2 y, int incy);
public static native void cublasZhemv(@Cast("char") byte uplo, int n, @ByVal @Cast("cuDoubleComplex*") double2 alpha, @Cast("const cuDoubleComplex*") double2 A,
                               int lda, @Cast("const cuDoubleComplex*") double2 x, int incx, @ByVal @Cast("cuDoubleComplex*") double2 beta, 
                               @Cast("cuDoubleComplex*") double2 y, int incy);
/*------------------------------------------------------------------------*/       
/* SBMV/HBMV */
public static native void cublasSsbmv(@Cast("char") byte uplo, int n, int k, float alpha, 
                               @Const FloatPointer A, int lda, @Const FloatPointer x, int incx, 
                               float beta, FloatPointer y, int incy);
public static native void cublasSsbmv(@Cast("char") byte uplo, int n, int k, float alpha, 
                               @Const FloatBuffer A, int lda, @Const FloatBuffer x, int incx, 
                               float beta, FloatBuffer y, int incy);
public static native void cublasSsbmv(@Cast("char") byte uplo, int n, int k, float alpha, 
                               @Const float[] A, int lda, @Const float[] x, int incx, 
                               float beta, float[] y, int incy);
public static native void cublasDsbmv(@Cast("char") byte uplo, int n, int k, double alpha, 
                               @Const DoublePointer A, int lda, @Const DoublePointer x, int incx, 
                               double beta, DoublePointer y, int incy);
public static native void cublasDsbmv(@Cast("char") byte uplo, int n, int k, double alpha, 
                               @Const DoubleBuffer A, int lda, @Const DoubleBuffer x, int incx, 
                               double beta, DoubleBuffer y, int incy);
public static native void cublasDsbmv(@Cast("char") byte uplo, int n, int k, double alpha, 
                               @Const double[] A, int lda, @Const double[] x, int incx, 
                               double beta, double[] y, int incy);
public static native void cublasChbmv(@Cast("char") byte uplo, int n, int k, @ByVal @Cast("cuComplex*") float2 alpha, 
                               @Cast("const cuComplex*") float2 A, int lda, @Cast("const cuComplex*") float2 x, int incx, 
                               @ByVal @Cast("cuComplex*") float2 beta, @Cast("cuComplex*") float2 y, int incy);
public static native void cublasZhbmv(@Cast("char") byte uplo, int n, int k, @ByVal @Cast("cuDoubleComplex*") double2 alpha, 
                               @Cast("const cuDoubleComplex*") double2 A, int lda, @Cast("const cuDoubleComplex*") double2 x, int incx, 
                               @ByVal @Cast("cuDoubleComplex*") double2 beta, @Cast("cuDoubleComplex*") double2 y, int incy);
/*------------------------------------------------------------------------*/       
/* SPMV/HPMV */
public static native void cublasSspmv(@Cast("char") byte uplo, int n, float alpha,
                              @Const FloatPointer AP, @Const FloatPointer x,
                              int incx, float beta, FloatPointer y, int incy);
public static native void cublasSspmv(@Cast("char") byte uplo, int n, float alpha,
                              @Const FloatBuffer AP, @Const FloatBuffer x,
                              int incx, float beta, FloatBuffer y, int incy);
public static native void cublasSspmv(@Cast("char") byte uplo, int n, float alpha,
                              @Const float[] AP, @Const float[] x,
                              int incx, float beta, float[] y, int incy);
public static native void cublasDspmv(@Cast("char") byte uplo, int n, double alpha,
                              @Const DoublePointer AP, @Const DoublePointer x,
                              int incx, double beta, DoublePointer y, int incy);
public static native void cublasDspmv(@Cast("char") byte uplo, int n, double alpha,
                              @Const DoubleBuffer AP, @Const DoubleBuffer x,
                              int incx, double beta, DoubleBuffer y, int incy);
public static native void cublasDspmv(@Cast("char") byte uplo, int n, double alpha,
                              @Const double[] AP, @Const double[] x,
                              int incx, double beta, double[] y, int incy);
public static native void cublasChpmv(@Cast("char") byte uplo, int n, @ByVal @Cast("cuComplex*") float2 alpha,
                              @Cast("const cuComplex*") float2 AP, @Cast("const cuComplex*") float2 x,
                              int incx, @ByVal @Cast("cuComplex*") float2 beta, @Cast("cuComplex*") float2 y, int incy);
public static native void cublasZhpmv(@Cast("char") byte uplo, int n, @ByVal @Cast("cuDoubleComplex*") double2 alpha,
                              @Cast("const cuDoubleComplex*") double2 AP, @Cast("const cuDoubleComplex*") double2 x,
                              int incx, @ByVal @Cast("cuDoubleComplex*") double2 beta, @Cast("cuDoubleComplex*") double2 y, int incy);

/*------------------------------------------------------------------------*/       
/* GER */
public static native void cublasSger(int m, int n, float alpha, @Const FloatPointer x, int incx,
                              @Const FloatPointer y, int incy, FloatPointer A, int lda);
public static native void cublasSger(int m, int n, float alpha, @Const FloatBuffer x, int incx,
                              @Const FloatBuffer y, int incy, FloatBuffer A, int lda);
public static native void cublasSger(int m, int n, float alpha, @Const float[] x, int incx,
                              @Const float[] y, int incy, float[] A, int lda);
public static native void cublasDger(int m, int n, double alpha, @Const DoublePointer x, int incx,
                              @Const DoublePointer y, int incy, DoublePointer A, int lda);
public static native void cublasDger(int m, int n, double alpha, @Const DoubleBuffer x, int incx,
                              @Const DoubleBuffer y, int incy, DoubleBuffer A, int lda);
public static native void cublasDger(int m, int n, double alpha, @Const double[] x, int incx,
                              @Const double[] y, int incy, double[] A, int lda);

public static native void cublasCgeru(int m, int n, @ByVal @Cast("cuComplex*") float2 alpha, @Cast("const cuComplex*") float2 x,
                               int incx, @Cast("const cuComplex*") float2 y, int incy,
                               @Cast("cuComplex*") float2 A, int lda);
public static native void cublasCgerc(int m, int n, @ByVal @Cast("cuComplex*") float2 alpha, @Cast("const cuComplex*") float2 x,
                               int incx, @Cast("const cuComplex*") float2 y, int incy,
                               @Cast("cuComplex*") float2 A, int lda);
public static native void cublasZgeru(int m, int n, @ByVal @Cast("cuDoubleComplex*") double2 alpha, @Cast("const cuDoubleComplex*") double2 x,
                               int incx, @Cast("const cuDoubleComplex*") double2 y, int incy,
                               @Cast("cuDoubleComplex*") double2 A, int lda);
public static native void cublasZgerc(int m, int n, @ByVal @Cast("cuDoubleComplex*") double2 alpha, @Cast("const cuDoubleComplex*") double2 x,
                               int incx, @Cast("const cuDoubleComplex*") double2 y, int incy,
                               @Cast("cuDoubleComplex*") double2 A, int lda);
/*------------------------------------------------------------------------*/       
/* SYR/HER */
public static native void cublasSsyr(@Cast("char") byte uplo, int n, float alpha, @Const FloatPointer x,
                              int incx, FloatPointer A, int lda);
public static native void cublasSsyr(@Cast("char") byte uplo, int n, float alpha, @Const FloatBuffer x,
                              int incx, FloatBuffer A, int lda);
public static native void cublasSsyr(@Cast("char") byte uplo, int n, float alpha, @Const float[] x,
                              int incx, float[] A, int lda);
public static native void cublasDsyr(@Cast("char") byte uplo, int n, double alpha, @Const DoublePointer x,
                              int incx, DoublePointer A, int lda);
public static native void cublasDsyr(@Cast("char") byte uplo, int n, double alpha, @Const DoubleBuffer x,
                              int incx, DoubleBuffer A, int lda);
public static native void cublasDsyr(@Cast("char") byte uplo, int n, double alpha, @Const double[] x,
                              int incx, double[] A, int lda);

public static native void cublasCher(@Cast("char") byte uplo, int n, float alpha, 
                              @Cast("const cuComplex*") float2 x, int incx, @Cast("cuComplex*") float2 A, int lda);
public static native void cublasZher(@Cast("char") byte uplo, int n, double alpha, 
                              @Cast("const cuDoubleComplex*") double2 x, int incx, @Cast("cuDoubleComplex*") double2 A, int lda);

/*------------------------------------------------------------------------*/       
/* SPR/HPR */
public static native void cublasSspr(@Cast("char") byte uplo, int n, float alpha, @Const FloatPointer x,
                              int incx, FloatPointer AP);
public static native void cublasSspr(@Cast("char") byte uplo, int n, float alpha, @Const FloatBuffer x,
                              int incx, FloatBuffer AP);
public static native void cublasSspr(@Cast("char") byte uplo, int n, float alpha, @Const float[] x,
                              int incx, float[] AP);
public static native void cublasDspr(@Cast("char") byte uplo, int n, double alpha, @Const DoublePointer x,
                              int incx, DoublePointer AP);
public static native void cublasDspr(@Cast("char") byte uplo, int n, double alpha, @Const DoubleBuffer x,
                              int incx, DoubleBuffer AP);
public static native void cublasDspr(@Cast("char") byte uplo, int n, double alpha, @Const double[] x,
                              int incx, double[] AP);
public static native void cublasChpr(@Cast("char") byte uplo, int n, float alpha, @Cast("const cuComplex*") float2 x,
                              int incx, @Cast("cuComplex*") float2 AP);
public static native void cublasZhpr(@Cast("char") byte uplo, int n, double alpha, @Cast("const cuDoubleComplex*") double2 x,
                              int incx, @Cast("cuDoubleComplex*") double2 AP);
/*------------------------------------------------------------------------*/       
/* SYR2/HER2 */
public static native void cublasSsyr2(@Cast("char") byte uplo, int n, float alpha, @Const FloatPointer x, 
                               int incx, @Const FloatPointer y, int incy, FloatPointer A, 
                               int lda);
public static native void cublasSsyr2(@Cast("char") byte uplo, int n, float alpha, @Const FloatBuffer x, 
                               int incx, @Const FloatBuffer y, int incy, FloatBuffer A, 
                               int lda);
public static native void cublasSsyr2(@Cast("char") byte uplo, int n, float alpha, @Const float[] x, 
                               int incx, @Const float[] y, int incy, float[] A, 
                               int lda);
public static native void cublasDsyr2(@Cast("char") byte uplo, int n, double alpha, @Const DoublePointer x, 
                               int incx, @Const DoublePointer y, int incy, DoublePointer A, 
                               int lda);
public static native void cublasDsyr2(@Cast("char") byte uplo, int n, double alpha, @Const DoubleBuffer x, 
                               int incx, @Const DoubleBuffer y, int incy, DoubleBuffer A, 
                               int lda);
public static native void cublasDsyr2(@Cast("char") byte uplo, int n, double alpha, @Const double[] x, 
                               int incx, @Const double[] y, int incy, double[] A, 
                               int lda);
public static native void cublasCher2(@Cast("char") byte uplo, int n, @ByVal @Cast("cuComplex*") float2 alpha, @Cast("const cuComplex*") float2 x, 
                               int incx, @Cast("const cuComplex*") float2 y, int incy, @Cast("cuComplex*") float2 A, 
                               int lda);
public static native void cublasZher2(@Cast("char") byte uplo, int n, @ByVal @Cast("cuDoubleComplex*") double2 alpha, @Cast("const cuDoubleComplex*") double2 x, 
                               int incx, @Cast("const cuDoubleComplex*") double2 y, int incy, @Cast("cuDoubleComplex*") double2 A, 
                               int lda);

/*------------------------------------------------------------------------*/       
/* SPR2/HPR2 */
public static native void cublasSspr2(@Cast("char") byte uplo, int n, float alpha, @Const FloatPointer x, 
                               int incx, @Const FloatPointer y, int incy, FloatPointer AP);
public static native void cublasSspr2(@Cast("char") byte uplo, int n, float alpha, @Const FloatBuffer x, 
                               int incx, @Const FloatBuffer y, int incy, FloatBuffer AP);
public static native void cublasSspr2(@Cast("char") byte uplo, int n, float alpha, @Const float[] x, 
                               int incx, @Const float[] y, int incy, float[] AP);
public static native void cublasDspr2(@Cast("char") byte uplo, int n, double alpha,
                               @Const DoublePointer x, int incx, @Const DoublePointer y,
                               int incy, DoublePointer AP);
public static native void cublasDspr2(@Cast("char") byte uplo, int n, double alpha,
                               @Const DoubleBuffer x, int incx, @Const DoubleBuffer y,
                               int incy, DoubleBuffer AP);
public static native void cublasDspr2(@Cast("char") byte uplo, int n, double alpha,
                               @Const double[] x, int incx, @Const double[] y,
                               int incy, double[] AP);
public static native void cublasChpr2(@Cast("char") byte uplo, int n, @ByVal @Cast("cuComplex*") float2 alpha,
                               @Cast("const cuComplex*") float2 x, int incx, @Cast("const cuComplex*") float2 y,
                               int incy, @Cast("cuComplex*") float2 AP);
public static native void cublasZhpr2(@Cast("char") byte uplo, int n, @ByVal @Cast("cuDoubleComplex*") double2 alpha,
                               @Cast("const cuDoubleComplex*") double2 x, int incx, @Cast("const cuDoubleComplex*") double2 y,
                               int incy, @Cast("cuDoubleComplex*") double2 AP);
/* ------------------------BLAS3 Functions ------------------------------- */
/* GEMM */
public static native void cublasSgemm(@Cast("char") byte transa, @Cast("char") byte transb, int m, int n, int k, 
                               float alpha, @Const FloatPointer A, int lda, 
                               @Const FloatPointer B, int ldb, float beta, FloatPointer C, 
                               int ldc);
public static native void cublasSgemm(@Cast("char") byte transa, @Cast("char") byte transb, int m, int n, int k, 
                               float alpha, @Const FloatBuffer A, int lda, 
                               @Const FloatBuffer B, int ldb, float beta, FloatBuffer C, 
                               int ldc);
public static native void cublasSgemm(@Cast("char") byte transa, @Cast("char") byte transb, int m, int n, int k, 
                               float alpha, @Const float[] A, int lda, 
                               @Const float[] B, int ldb, float beta, float[] C, 
                               int ldc);
public static native void cublasDgemm(@Cast("char") byte transa, @Cast("char") byte transb, int m, int n, int k,
                               double alpha, @Const DoublePointer A, int lda, 
                               @Const DoublePointer B, int ldb, double beta, DoublePointer C, 
                               int ldc);
public static native void cublasDgemm(@Cast("char") byte transa, @Cast("char") byte transb, int m, int n, int k,
                               double alpha, @Const DoubleBuffer A, int lda, 
                               @Const DoubleBuffer B, int ldb, double beta, DoubleBuffer C, 
                               int ldc);
public static native void cublasDgemm(@Cast("char") byte transa, @Cast("char") byte transb, int m, int n, int k,
                               double alpha, @Const double[] A, int lda, 
                               @Const double[] B, int ldb, double beta, double[] C, 
                               int ldc);              
public static native void cublasCgemm(@Cast("char") byte transa, @Cast("char") byte transb, int m, int n, int k, 
                               @ByVal @Cast("cuComplex*") float2 alpha, @Cast("const cuComplex*") float2 A, int lda,
                               @Cast("const cuComplex*") float2 B, int ldb, @ByVal @Cast("cuComplex*") float2 beta,
                               @Cast("cuComplex*") float2 C, int ldc);
public static native void cublasZgemm(@Cast("char") byte transa, @Cast("char") byte transb, int m, int n,
                               int k, @ByVal @Cast("cuDoubleComplex*") double2 alpha,
                               @Cast("const cuDoubleComplex*") double2 A, int lda,
                               @Cast("const cuDoubleComplex*") double2 B, int ldb,
                               @ByVal @Cast("cuDoubleComplex*") double2 beta, @Cast("cuDoubleComplex*") double2 C,
                               int ldc);                   
/* -------------------------------------------------------*/
/* SYRK */
public static native void cublasSsyrk(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k, float alpha, 
                               @Const FloatPointer A, int lda, float beta, FloatPointer C, 
                               int ldc);
public static native void cublasSsyrk(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k, float alpha, 
                               @Const FloatBuffer A, int lda, float beta, FloatBuffer C, 
                               int ldc);
public static native void cublasSsyrk(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k, float alpha, 
                               @Const float[] A, int lda, float beta, float[] C, 
                               int ldc);
public static native void cublasDsyrk(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k,
                               double alpha, @Const DoublePointer A, int lda,
                               double beta, DoublePointer C, int ldc);
public static native void cublasDsyrk(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k,
                               double alpha, @Const DoubleBuffer A, int lda,
                               double beta, DoubleBuffer C, int ldc);
public static native void cublasDsyrk(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k,
                               double alpha, @Const double[] A, int lda,
                               double beta, double[] C, int ldc);

public static native void cublasCsyrk(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k,
                               @ByVal @Cast("cuComplex*") float2 alpha, @Cast("const cuComplex*") float2 A, int lda,
                               @ByVal @Cast("cuComplex*") float2 beta, @Cast("cuComplex*") float2 C, int ldc);
public static native void cublasZsyrk(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k,
                               @ByVal @Cast("cuDoubleComplex*") double2 alpha,
                               @Cast("const cuDoubleComplex*") double2 A, int lda,
                               @ByVal @Cast("cuDoubleComplex*") double2 beta,
                               @Cast("cuDoubleComplex*") double2 C, int ldc);
/* ------------------------------------------------------- */
/* HERK */
public static native void cublasCherk(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k,
                               float alpha, @Cast("const cuComplex*") float2 A, int lda,
                               float beta, @Cast("cuComplex*") float2 C, int ldc);
public static native void cublasZherk(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k,
                               double alpha,
                               @Cast("const cuDoubleComplex*") double2 A, int lda,
                               double beta,
                               @Cast("cuDoubleComplex*") double2 C, int ldc);
/* ------------------------------------------------------- */
/* SYR2K */
public static native void cublasSsyr2k(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k, float alpha, 
                                @Const FloatPointer A, int lda, @Const FloatPointer B, int ldb, 
                                float beta, FloatPointer C, int ldc);
public static native void cublasSsyr2k(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k, float alpha, 
                                @Const FloatBuffer A, int lda, @Const FloatBuffer B, int ldb, 
                                float beta, FloatBuffer C, int ldc);
public static native void cublasSsyr2k(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k, float alpha, 
                                @Const float[] A, int lda, @Const float[] B, int ldb, 
                                float beta, float[] C, int ldc);

public static native void cublasDsyr2k(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k,
                                double alpha, @Const DoublePointer A, int lda,
                                @Const DoublePointer B, int ldb, double beta,
                                DoublePointer C, int ldc);
public static native void cublasDsyr2k(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k,
                                double alpha, @Const DoubleBuffer A, int lda,
                                @Const DoubleBuffer B, int ldb, double beta,
                                DoubleBuffer C, int ldc);
public static native void cublasDsyr2k(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k,
                                double alpha, @Const double[] A, int lda,
                                @Const double[] B, int ldb, double beta,
                                double[] C, int ldc);
public static native void cublasCsyr2k(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k,
                                @ByVal @Cast("cuComplex*") float2 alpha, @Cast("const cuComplex*") float2 A, int lda,
                                @Cast("const cuComplex*") float2 B, int ldb, @ByVal @Cast("cuComplex*") float2 beta,
                                @Cast("cuComplex*") float2 C, int ldc);

public static native void cublasZsyr2k(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k,
                                @ByVal @Cast("cuDoubleComplex*") double2 alpha, @Cast("const cuDoubleComplex*") double2 A, int lda,
                                @Cast("const cuDoubleComplex*") double2 B, int ldb, @ByVal @Cast("cuDoubleComplex*") double2 beta,
                                @Cast("cuDoubleComplex*") double2 C, int ldc);                             
/* ------------------------------------------------------- */
/* HER2K */
public static native void cublasCher2k(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k,
                                @ByVal @Cast("cuComplex*") float2 alpha, @Cast("const cuComplex*") float2 A, int lda,
                                @Cast("const cuComplex*") float2 B, int ldb, float beta,
                                @Cast("cuComplex*") float2 C, int ldc);

public static native void cublasZher2k(@Cast("char") byte uplo, @Cast("char") byte trans, int n, int k,
                                @ByVal @Cast("cuDoubleComplex*") double2 alpha, @Cast("const cuDoubleComplex*") double2 A, int lda,
                                @Cast("const cuDoubleComplex*") double2 B, int ldb, double beta,
                                @Cast("cuDoubleComplex*") double2 C, int ldc); 

/*------------------------------------------------------------------------*/       
/* SYMM*/
public static native void cublasSsymm(@Cast("char") byte side, @Cast("char") byte uplo, int m, int n, float alpha, 
                               @Const FloatPointer A, int lda, @Const FloatPointer B, int ldb,
                               float beta, FloatPointer C, int ldc);
public static native void cublasSsymm(@Cast("char") byte side, @Cast("char") byte uplo, int m, int n, float alpha, 
                               @Const FloatBuffer A, int lda, @Const FloatBuffer B, int ldb,
                               float beta, FloatBuffer C, int ldc);
public static native void cublasSsymm(@Cast("char") byte side, @Cast("char") byte uplo, int m, int n, float alpha, 
                               @Const float[] A, int lda, @Const float[] B, int ldb,
                               float beta, float[] C, int ldc);
public static native void cublasDsymm(@Cast("char") byte side, @Cast("char") byte uplo, int m, int n, double alpha, 
                               @Const DoublePointer A, int lda, @Const DoublePointer B, int ldb,
                               double beta, DoublePointer C, int ldc);
public static native void cublasDsymm(@Cast("char") byte side, @Cast("char") byte uplo, int m, int n, double alpha, 
                               @Const DoubleBuffer A, int lda, @Const DoubleBuffer B, int ldb,
                               double beta, DoubleBuffer C, int ldc);
public static native void cublasDsymm(@Cast("char") byte side, @Cast("char") byte uplo, int m, int n, double alpha, 
                               @Const double[] A, int lda, @Const double[] B, int ldb,
                               double beta, double[] C, int ldc);
          
public static native void cublasCsymm(@Cast("char") byte side, @Cast("char") byte uplo, int m, int n, @ByVal @Cast("cuComplex*") float2 alpha, 
                               @Cast("const cuComplex*") float2 A, int lda, @Cast("const cuComplex*") float2 B, int ldb,
                               @ByVal @Cast("cuComplex*") float2 beta, @Cast("cuComplex*") float2 C, int ldc);
          
public static native void cublasZsymm(@Cast("char") byte side, @Cast("char") byte uplo, int m, int n, @ByVal @Cast("cuDoubleComplex*") double2 alpha, 
                               @Cast("const cuDoubleComplex*") double2 A, int lda, @Cast("const cuDoubleComplex*") double2 B, int ldb,
                               @ByVal @Cast("cuDoubleComplex*") double2 beta, @Cast("cuDoubleComplex*") double2 C, int ldc);
/*------------------------------------------------------------------------*/       
/* HEMM*/
public static native void cublasChemm(@Cast("char") byte side, @Cast("char") byte uplo, int m, int n,
                               @ByVal @Cast("cuComplex*") float2 alpha, @Cast("const cuComplex*") float2 A, int lda,
                               @Cast("const cuComplex*") float2 B, int ldb, @ByVal @Cast("cuComplex*") float2 beta,
                               @Cast("cuComplex*") float2 C, int ldc);
public static native void cublasZhemm(@Cast("char") byte side, @Cast("char") byte uplo, int m, int n,
                               @ByVal @Cast("cuDoubleComplex*") double2 alpha, @Cast("const cuDoubleComplex*") double2 A, int lda,
                               @Cast("const cuDoubleComplex*") double2 B, int ldb, @ByVal @Cast("cuDoubleComplex*") double2 beta,
                               @Cast("cuDoubleComplex*") double2 C, int ldc);  

/*------------------------------------------------------------------------*/       
/* TRSM*/
public static native void cublasStrsm(@Cast("char") byte side, @Cast("char") byte uplo, @Cast("char") byte transa, @Cast("char") byte diag,
                               int m, int n, float alpha, @Const FloatPointer A, int lda,
                               FloatPointer B, int ldb);
public static native void cublasStrsm(@Cast("char") byte side, @Cast("char") byte uplo, @Cast("char") byte transa, @Cast("char") byte diag,
                               int m, int n, float alpha, @Const FloatBuffer A, int lda,
                               FloatBuffer B, int ldb);
public static native void cublasStrsm(@Cast("char") byte side, @Cast("char") byte uplo, @Cast("char") byte transa, @Cast("char") byte diag,
                               int m, int n, float alpha, @Const float[] A, int lda,
                               float[] B, int ldb);

public static native void cublasDtrsm(@Cast("char") byte side, @Cast("char") byte uplo, @Cast("char") byte transa,
                               @Cast("char") byte diag, int m, int n, double alpha,
                               @Const DoublePointer A, int lda, DoublePointer B,
                               int ldb);
public static native void cublasDtrsm(@Cast("char") byte side, @Cast("char") byte uplo, @Cast("char") byte transa,
                               @Cast("char") byte diag, int m, int n, double alpha,
                               @Const DoubleBuffer A, int lda, DoubleBuffer B,
                               int ldb);
public static native void cublasDtrsm(@Cast("char") byte side, @Cast("char") byte uplo, @Cast("char") byte transa,
                               @Cast("char") byte diag, int m, int n, double alpha,
                               @Const double[] A, int lda, double[] B,
                               int ldb);

public static native void cublasCtrsm(@Cast("char") byte side, @Cast("char") byte uplo, @Cast("char") byte transa, @Cast("char") byte diag,
                               int m, int n, @ByVal @Cast("cuComplex*") float2 alpha, @Cast("const cuComplex*") float2 A,
                               int lda, @Cast("cuComplex*") float2 B, int ldb);

public static native void cublasZtrsm(@Cast("char") byte side, @Cast("char") byte uplo, @Cast("char") byte transa,
                               @Cast("char") byte diag, int m, int n, @ByVal @Cast("cuDoubleComplex*") double2 alpha,
                               @Cast("const cuDoubleComplex*") double2 A, int lda,
                               @Cast("cuDoubleComplex*") double2 B, int ldb);                                                        
/*------------------------------------------------------------------------*/       
/* TRMM*/
public static native void cublasStrmm(@Cast("char") byte side, @Cast("char") byte uplo, @Cast("char") byte transa, @Cast("char") byte diag,
                               int m, int n, float alpha, @Const FloatPointer A, int lda,
                               FloatPointer B, int ldb);
public static native void cublasStrmm(@Cast("char") byte side, @Cast("char") byte uplo, @Cast("char") byte transa, @Cast("char") byte diag,
                               int m, int n, float alpha, @Const FloatBuffer A, int lda,
                               FloatBuffer B, int ldb);
public static native void cublasStrmm(@Cast("char") byte side, @Cast("char") byte uplo, @Cast("char") byte transa, @Cast("char") byte diag,
                               int m, int n, float alpha, @Const float[] A, int lda,
                               float[] B, int ldb);
public static native void cublasDtrmm(@Cast("char") byte side, @Cast("char") byte uplo, @Cast("char") byte transa,
                               @Cast("char") byte diag, int m, int n, double alpha,
                               @Const DoublePointer A, int lda, DoublePointer B,
                               int ldb);
public static native void cublasDtrmm(@Cast("char") byte side, @Cast("char") byte uplo, @Cast("char") byte transa,
                               @Cast("char") byte diag, int m, int n, double alpha,
                               @Const DoubleBuffer A, int lda, DoubleBuffer B,
                               int ldb);
public static native void cublasDtrmm(@Cast("char") byte side, @Cast("char") byte uplo, @Cast("char") byte transa,
                               @Cast("char") byte diag, int m, int n, double alpha,
                               @Const double[] A, int lda, double[] B,
                               int ldb);
public static native void cublasCtrmm(@Cast("char") byte side, @Cast("char") byte uplo, @Cast("char") byte transa, @Cast("char") byte diag,
                               int m, int n, @ByVal @Cast("cuComplex*") float2 alpha, @Cast("const cuComplex*") float2 A,
                               int lda, @Cast("cuComplex*") float2 B, int ldb);
public static native void cublasZtrmm(@Cast("char") byte side, @Cast("char") byte uplo, @Cast("char") byte transa,
                               @Cast("char") byte diag, int m, int n, @ByVal @Cast("cuDoubleComplex*") double2 alpha,
                               @Cast("const cuDoubleComplex*") double2 A, int lda, @Cast("cuDoubleComplex*") double2 B,
                               int ldb);

// #if defined(__cplusplus)
// #endif /* __cplusplus */

// #endif /* !defined(CUBLAS_H_) */


// Parsed from <cublasLt.h>

/*
 * Copyright 1993-2021 NVIDIA Corporation. All rights reserved.
 *
 * NOTICE TO LICENSEE:
 *
 * This source code and/or documentation ("Licensed Deliverables") are
 * subject to NVIDIA intellectual property rights under U.S. and
 * international Copyright laws.
 *
 * These Licensed Deliverables contained herein is PROPRIETARY and
 * CONFIDENTIAL to NVIDIA and is being provided under the terms and
 * conditions of a form of NVIDIA software license agreement by and
 * between NVIDIA and Licensee ("License Agreement") or electronically
 * accepted by Licensee.  Notwithstanding any terms or conditions to
 * the contrary in the License Agreement, reproduction or disclosure
 * of the Licensed Deliverables to any third party without the express
 * written consent of NVIDIA is prohibited.
 *
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THESE LICENSED DELIVERABLES.
 *
 * U.S. Government End Users.  These Licensed Deliverables are a
 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 * 1995), consisting of "commercial computer software" and "commercial
 * computer software documentation" as such terms are used in 48
 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 * U.S. Government End Users acquire the Licensed Deliverables with
 * only those rights set forth herein.
 *
 * Any use of the Licensed Deliverables in individual and commercial
 * software must include, in the user documentation and internal
 * comments to the code, the above Disclaimer and U.S. Government End
 * Users Notice.
 */
// #pragma once

// #ifndef CUBLASAPI
// #ifdef __CUDACC__
// #define CUBLASAPI __host__ __device__
// #else
// #define CUBLASAPI
// #endif
// #endif

// #include <cublas_api.h>

// #include <stdint.h>
// #include <stddef.h>
// #include <stdio.h>

// #if defined(__cplusplus)
// Targeting ..\cublas\cublasLtContext.java



public static native @Cast("cublasStatus_t") int cublasLtCreate(@ByPtrPtr cublasLtContext lightHandle);

public static native @Cast("cublasStatus_t") int cublasLtDestroy(cublasLtContext lightHandle);

public static native @Cast("size_t") long cublasLtGetVersion();

public static native @Cast("size_t") long cublasLtGetCudartVersion();

public static native @Cast("cublasStatus_t") int cublasLtGetProperty(@Cast("libraryPropertyType") int type, IntPointer value);
public static native @Cast("cublasStatus_t") int cublasLtGetProperty(@Cast("libraryPropertyType") int type, IntBuffer value);
public static native @Cast("cublasStatus_t") int cublasLtGetProperty(@Cast("libraryPropertyType") int type, int[] value);
// Targeting ..\cublas\cublasLtMatrixLayoutOpaque_t.java



/** Opaque descriptor for matrix memory layout
 */
// Targeting ..\cublas\cublasLtMatmulAlgo_t.java


// Targeting ..\cublas\cublasLtMatmulDescOpaque_t.java



/** Opaque descriptor for cublasLtMatmul() operation details
 */
// Targeting ..\cublas\cublasLtMatrixTransformDescOpaque_t.java



/** Opaque descriptor for cublasLtMatrixTransform() operation details
 */
// Targeting ..\cublas\cublasLtMatmulPreferenceOpaque_t.java



/** Opaque descriptor for cublasLtMatmulAlgoGetHeuristic() configuration
 */

/** Tile size (in C/D matrix Rows x Cols)
 *
 * General order of tile IDs is sorted by size first and by first dimension second.
 */
/** enum cublasLtMatmulTile_t */
public static final int
    CUBLASLT_MATMUL_TILE_UNDEFINED = 0,
    CUBLASLT_MATMUL_TILE_8x8       = 1,
    CUBLASLT_MATMUL_TILE_8x16      = 2,
    CUBLASLT_MATMUL_TILE_16x8      = 3,
    CUBLASLT_MATMUL_TILE_8x32      = 4,
    CUBLASLT_MATMUL_TILE_16x16     = 5,
    CUBLASLT_MATMUL_TILE_32x8      = 6,
    CUBLASLT_MATMUL_TILE_8x64      = 7,
    CUBLASLT_MATMUL_TILE_16x32     = 8,
    CUBLASLT_MATMUL_TILE_32x16     = 9,
    CUBLASLT_MATMUL_TILE_64x8      = 10,
    CUBLASLT_MATMUL_TILE_32x32     = 11,
    CUBLASLT_MATMUL_TILE_32x64     = 12,
    CUBLASLT_MATMUL_TILE_64x32     = 13,
    CUBLASLT_MATMUL_TILE_32x128    = 14,
    CUBLASLT_MATMUL_TILE_64x64     = 15,
    CUBLASLT_MATMUL_TILE_128x32    = 16,
    CUBLASLT_MATMUL_TILE_64x128    = 17,
    CUBLASLT_MATMUL_TILE_128x64    = 18,
    CUBLASLT_MATMUL_TILE_64x256    = 19,
    CUBLASLT_MATMUL_TILE_128x128   = 20,
    CUBLASLT_MATMUL_TILE_256x64    = 21,
    CUBLASLT_MATMUL_TILE_64x512    = 22,
    CUBLASLT_MATMUL_TILE_128x256   = 23,
    CUBLASLT_MATMUL_TILE_256x128   = 24,
    CUBLASLT_MATMUL_TILE_512x64    = 25,
    CUBLASLT_MATMUL_TILE_64x96     = 26,
    CUBLASLT_MATMUL_TILE_96x64     = 27,
    CUBLASLT_MATMUL_TILE_96x128    = 28,
    CUBLASLT_MATMUL_TILE_128x160   = 29,
    CUBLASLT_MATMUL_TILE_160x128   = 30,
    CUBLASLT_MATMUL_TILE_192x128   = 31,
    CUBLASLT_MATMUL_TILE_END = 32;

/** Size and number of stages in which elements are read into shared memory
 *
 * General order of stages IDs is sorted by stage size first and by number of stages second.
 */
/** enum cublasLtMatmulStages_t */
public static final int
    CUBLASLT_MATMUL_STAGES_UNDEFINED = 0,
    CUBLASLT_MATMUL_STAGES_16x1      = 1,
    CUBLASLT_MATMUL_STAGES_16x2      = 2,
    CUBLASLT_MATMUL_STAGES_16x3      = 3,
    CUBLASLT_MATMUL_STAGES_16x4      = 4,
    CUBLASLT_MATMUL_STAGES_16x5      = 5,
    CUBLASLT_MATMUL_STAGES_16x6      = 6,
    CUBLASLT_MATMUL_STAGES_32x1      = 7,
    CUBLASLT_MATMUL_STAGES_32x2      = 8,
    CUBLASLT_MATMUL_STAGES_32x3      = 9,
    CUBLASLT_MATMUL_STAGES_32x4      = 10,
    CUBLASLT_MATMUL_STAGES_32x5      = 11,
    CUBLASLT_MATMUL_STAGES_32x6      = 12,
    CUBLASLT_MATMUL_STAGES_64x1      = 13,
    CUBLASLT_MATMUL_STAGES_64x2      = 14,
    CUBLASLT_MATMUL_STAGES_64x3      = 15,
    CUBLASLT_MATMUL_STAGES_64x4      = 16,
    CUBLASLT_MATMUL_STAGES_64x5      = 17,
    CUBLASLT_MATMUL_STAGES_64x6      = 18,
    CUBLASLT_MATMUL_STAGES_128x1     = 19,
    CUBLASLT_MATMUL_STAGES_128x2     = 20,
    CUBLASLT_MATMUL_STAGES_128x3     = 21,
    CUBLASLT_MATMUL_STAGES_128x4     = 22,
    CUBLASLT_MATMUL_STAGES_128x5     = 23,
    CUBLASLT_MATMUL_STAGES_128x6     = 24,
    CUBLASLT_MATMUL_STAGES_32x10     = 25,
    CUBLASLT_MATMUL_STAGES_8x4       = 26,
    CUBLASLT_MATMUL_STAGES_16x10     = 27,
    CUBLASLT_MATMUL_STAGES_8x5       = 28,
    CUBLASLT_MATMUL_STAGES_16x80     = 29,
    CUBLASLT_MATMUL_STAGES_64x80     = 30,
    CUBLASLT_MATMUL_STAGES_END = 31;

/** Pointer mode to use for alpha/beta */
/** enum cublasLtPointerMode_t */
public static final int
    /** matches CUBLAS_POINTER_MODE_HOST, pointer targets a single value host memory */
    CUBLASLT_POINTER_MODE_HOST = CUBLAS_POINTER_MODE_HOST,
    /** matches CUBLAS_POINTER_MODE_DEVICE, pointer targets a single value device memory */
    CUBLASLT_POINTER_MODE_DEVICE = CUBLAS_POINTER_MODE_DEVICE,
    /** pointer targets an array in device memory */
    CUBLASLT_POINTER_MODE_DEVICE_VECTOR = 2,
    /** alpha pointer targets an array in device memory, beta is zero */
    CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO = 3;

/** Mask to define and query pointer mode capability */
/** enum cublasLtPointerModeMask_t */
public static final int
    /** no initial filtering is performed when querying pointer mode capabilities, will use gemm pointer mode defined in operation description **/
    CUBLASLT_POINTER_MODE_MASK_NO_FILTERING = 0,
    /** see CUBLASLT_POINTER_MODE_HOST */
    CUBLASLT_POINTER_MODE_MASK_HOST = 1,
    /** see CUBLASLT_POINTER_MODE_DEVICE */
    CUBLASLT_POINTER_MODE_MASK_DEVICE = 2,
    /** see CUBLASLT_POINTER_MODE_DEVICE_VECTOR */
    CUBLASLT_POINTER_MODE_MASK_DEVICE_VECTOR = 4,
    /** see CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO */
    CUBLASLT_POINTER_MODE_MASK_ALPHA_DEVICE_VECTOR_BETA_ZERO = 8;

/** Implementation details that may affect numerical behavior of algorithms. */
public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_FMA =               (0x01L << 0);
public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_HMMA =              (0x02L << 0);
public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_IMMA =              (0x04L << 0);
public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_DMMA =              (0x08L << 0);
public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_TENSOR_OP_MASK =    (0xfeL << 0);
public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_OP_TYPE_MASK =      (0xffL << 0);

public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_16F =   (0x01L << 8);
public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_32F =   (0x02L << 8);
public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_64F =   (0x04L << 8);
public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_32I =   (0x08L << 8);
public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_TYPE_MASK =  (0xffL << 8);

public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_16F =         (0x01L << 16);
public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_16BF =        (0x02L << 16);
public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_TF32 =        (0x04L << 16);
public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_32F =         (0x08L << 16);
public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_64F =         (0x10L << 16);
public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8I =          (0x20L << 16);
public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_OP_INPUT_TYPE_MASK =  (0xffL << 16);

public static final long CUBLASLT_NUMERICAL_IMPL_FLAGS_GAUSSIAN =          (0x01L << 32);

/** Execute matrix multiplication (D = alpha * op(A) * op(B) + beta * C).
 *
 * \retval     CUBLAS_STATUS_NOT_INITIALIZED   if cuBLASLt handle has not been initialized
 * \retval     CUBLAS_STATUS_INVALID_VALUE     if parameters are in conflict or in an impossible configuration; e.g.
 *                                             when workspaceSizeInBytes is less than workspace required by configured
 *                                             algo
 * \retval     CUBLAS_STATUS_NOT_SUPPORTED     if current implementation on selected device doesn't support configured
 *                                             operation
 * \retval     CUBLAS_STATUS_ARCH_MISMATCH     if configured operation cannot be run using selected device
 * \retval     CUBLAS_STATUS_EXECUTION_FAILED  if cuda reported execution error from the device
 * \retval     CUBLAS_STATUS_SUCCESS           if the operation completed successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmul(cublasLtContext lightHandle,
               @Cast("cublasLtMatmulDesc_t") cublasLtMatmulDescOpaque_t computeDesc,
               @Const Pointer alpha,
               @Const Pointer A,
               @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Adesc,
               @Const Pointer B,
               @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Bdesc,
               @Const Pointer beta,
               @Const Pointer C,
               @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Cdesc,
               Pointer D,
               @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Ddesc,
               @Const cublasLtMatmulAlgo_t algo,
               Pointer workspace,
               @Cast("size_t") long workspaceSizeInBytes,
               CUstream_st stream);

/** Matrix layout conversion helper (C = alpha * op(A) + beta * op(B))
 *
 * Can be used to change memory order of data or to scale and shift the values.
 *
 * \retval     CUBLAS_STATUS_NOT_INITIALIZED   if cuBLASLt handle has not been initialized
 * \retval     CUBLAS_STATUS_INVALID_VALUE     if parameters are in conflict or in an impossible configuration; e.g.
 *                                             when A is not NULL, but Adesc is NULL
 * \retval     CUBLAS_STATUS_NOT_SUPPORTED     if current implementation on selected device doesn't support configured
 *                                             operation
 * \retval     CUBLAS_STATUS_ARCH_MISMATCH     if configured operation cannot be run using selected device
 * \retval     CUBLAS_STATUS_EXECUTION_FAILED  if cuda reported execution error from the device
 * \retval     CUBLAS_STATUS_SUCCESS           if the operation completed successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtMatrixTransform(cublasLtContext lightHandle,
                        @Cast("cublasLtMatrixTransformDesc_t") cublasLtMatrixTransformDescOpaque_t transformDesc,
                        @Const Pointer alpha,
                        @Const Pointer A,
                        @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Adesc,
                        @Const Pointer beta,
                        @Const Pointer B,
                        @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Bdesc,
                        Pointer C,
                        @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Cdesc,
                        CUstream_st stream);

/* ---------------------------------------------------------------------------------------*/
/* Helper functions for cublasLtMatrixLayout_t */
/* ---------------------------------------------------------------------------------------*/

/** Enum for data ordering */
/** enum cublasLtOrder_t */
public static final int
  /** Column-major
   *
   * Leading dimension is the stride (in elements) to the beginning of next column in memory.
   */
  CUBLASLT_ORDER_COL = 0,
  /** Row major
   *
   * Leading dimension is the stride (in elements) to the beginning of next row in memory.
   */
  CUBLASLT_ORDER_ROW = 1,
  /** Column-major ordered tiles of 32 columns.
   *
   * Leading dimension is the stride (in elements) to the beginning of next group of 32-columns. E.g. if matrix has 33
   * columns and 2 rows, ld must be at least (32) * 2 = 64.
   */
  CUBLASLT_ORDER_COL32 = 2,
  /** Column-major ordered tiles of composite tiles with total 32 columns and 8 rows, tile composed of interleaved
   * inner tiles of 4 columns within 4 even or odd rows in an alternating pattern.
   *
   * Leading dimension is the stride (in elements) to the beginning of the first 32 column x 8 row tile for the next
   * 32-wide group of columns. E.g. if matrix has 33 columns and 1 row, ld must be at least (32 * 8) * 1 = 256.
   */
  CUBLASLT_ORDER_COL4_4R2_8C = 3,
  /** Column-major ordered tiles of composite tiles with total 32 columns ands 32 rows.
   * Element offset within the tile is calculated as (((row%8)/2*4+row/8)*2+row%2)*32+col.
   *
   * Leading dimension is the stride (in elements) to the beginning of the first 32 column x 32 row tile for the next
   * 32-wide group of columns. E.g. if matrix has 33 columns and 1 row, ld must be at least (32*32)*1 = 1024.
   */
  CUBLASLT_ORDER_COL32_2R_4R4 = 4;

/** Attributes of memory layout */
/** enum cublasLtMatrixLayoutAttribute_t */
public static final int
    /** Data type, see cudaDataType.
     *
     * uint32_t
     */
    CUBLASLT_MATRIX_LAYOUT_TYPE = 0,

    /** Memory order of the data, see cublasLtOrder_t.
     *
     * int32_t, default: CUBLASLT_ORDER_COL
     */
    CUBLASLT_MATRIX_LAYOUT_ORDER = 1,

    /** Number of rows.
     *
     * Usually only values that can be expressed as int32_t are supported.
     *
     * uint64_t
     */
    CUBLASLT_MATRIX_LAYOUT_ROWS = 2,

    /** Number of columns.
     *
     * Usually only values that can be expressed as int32_t are supported.
     *
     * uint64_t
     */
    CUBLASLT_MATRIX_LAYOUT_COLS = 3,

    /** Matrix leading dimension.
     *
     * For CUBLASLT_ORDER_COL this is stride (in elements) of matrix column, for more details and documentation for
     * other memory orders see documentation for cublasLtOrder_t values.
     *
     * Currently only non-negative values are supported, must be large enough so that matrix memory locations are not
     * overlapping (e.g. greater or equal to CUBLASLT_MATRIX_LAYOUT_ROWS in case of CUBLASLT_ORDER_COL).
     *
     * int64_t;
     */
    CUBLASLT_MATRIX_LAYOUT_LD = 4,

    /** Number of matmul operations to perform in the batch.
     *
     * See also CUBLASLT_ALGO_CAP_STRIDED_BATCH_SUPPORT
     *
     * int32_t, default: 1
     */
    CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT = 5,

    /** Stride (in elements) to the next matrix for strided batch operation.
     *
     * When matrix type is planar-complex (CUBLASLT_MATRIX_LAYOUT_PLANE_OFFSET != 0), batch stride
     * is interpreted by cublasLtMatmul() in number of real valued sub-elements. E.g. for data of type CUDA_C_16F,
     * offset of 1024B is encoded as a stride of value 512 (since each element of the real and imaginary matrices
     * is a 2B (16bit) floating point type).
     *
     * NOTE: A bug in cublasLtMatrixTransform() causes it to interpret the batch stride for a planar-complex matrix
     * as if it was specified in number of complex elements. Therefore an offset of 1024B must be encoded as stride
     * value 256 when calling cublasLtMatrixTransform() (each complex element is 4B with real and imaginary values 2B each).
     * This behavior is expected to be corrected in the next major cuBLAS version.
     *
     * int64_t, default: 0
     */
    CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET = 6,

    /** Stride (in bytes) to the imaginary plane for planar complex layout.
     *
     * int64_t, default: 0 - 0 means that layout is regular (real and imaginary parts of complex numbers are interleaved
     * in memory in each element)
     */
    CUBLASLT_MATRIX_LAYOUT_PLANE_OFFSET = 7;

/** Internal. Do not use directly.
 */
public static native @Cast("cublasStatus_t") int cublasLtMatrixLayoutInit_internal(
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t matLayout,
    @Cast("size_t") long size,
    @Cast("cudaDataType") int type,
    @Cast("uint64_t") long rows,
    @Cast("uint64_t") long cols,
    @Cast("int64_t") long ld);

/** Initialize matrix layout descriptor in pre-allocated space.
 *
 * \retval     CUBLAS_STATUS_ALLOC_FAILED  if size of the pre-allocated space is insufficient
 * \retval     CUBLAS_STATUS_SUCCESS       if desciptor was created successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtMatrixLayoutInit(
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t matLayout,
    @Cast("cudaDataType") int type,
    @Cast("uint64_t") long rows,
    @Cast("uint64_t") long cols,
    @Cast("int64_t") long ld);

/** Create new matrix layout descriptor.
 *
 * \retval     CUBLAS_STATUS_ALLOC_FAILED  if memory could not be allocated
 * \retval     CUBLAS_STATUS_SUCCESS       if desciptor was created successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtMatrixLayoutCreate(
    @Cast("cublasLtMatrixLayout_t*") PointerPointer matLayout,
    @Cast("cudaDataType") int type,
    @Cast("uint64_t") long rows,
    @Cast("uint64_t") long cols,
    @Cast("int64_t") long ld);

/** Destroy matrix layout descriptor.
 *
 * \retval     CUBLAS_STATUS_SUCCESS  if operation was successful
 */
public static native @Cast("cublasStatus_t") int cublasLtMatrixLayoutDestroy(@Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t matLayout);

/** Set matrix layout descriptor attribute.
 *
 * @param matLayout [in]    The descriptor
 * @param attr [in]         The attribute
 * @param buf [in]          memory address containing the new value
 * @param sizeInBytes [in]  size of buf buffer for verification (in bytes)
 *
 * \retval     CUBLAS_STATUS_INVALID_VALUE  if buf is NULL or sizeInBytes doesn't match size of internal storage for
 *                                          selected attribute
 * \retval     CUBLAS_STATUS_SUCCESS        if attribute was set successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtMatrixLayoutSetAttribute(
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t matLayout,
    @Cast("cublasLtMatrixLayoutAttribute_t") int attr,
    @Const Pointer buf,
    @Cast("size_t") long sizeInBytes);

/** Get matrix layout descriptor attribute.
 *
 * @param matLayout [in]    The descriptor
 * @param attr [in]         The attribute
 * @param buf [out]          memory address containing the new value
 * @param sizeInBytes [in]  size of buf buffer for verification (in bytes)
 * @param sizeWritten [out]  only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
 *                          bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
 *
 * \retval     CUBLAS_STATUS_INVALID_VALUE  if sizeInBytes is 0 and sizeWritten is NULL, or if  sizeInBytes is non-zero
 *                                          and buf is NULL or sizeInBytes doesn't match size of internal storage for
 *                                          selected attribute
 * \retval     CUBLAS_STATUS_SUCCESS        if attribute's value was successfully written to user memory
 */
public static native @Cast("cublasStatus_t") int cublasLtMatrixLayoutGetAttribute(
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t matLayout,
    @Cast("cublasLtMatrixLayoutAttribute_t") int attr,
    Pointer buf,
    @Cast("size_t") long sizeInBytes,
    @Cast("size_t*") SizeTPointer sizeWritten);

/* ---------------------------------------------------------------------------------------*/
/* Helper functions for cublasLtMatmulDesc_t */
/* ---------------------------------------------------------------------------------------*/

/** Matmul descriptor attributes to define details of the operation. */
/** enum cublasLtMatmulDescAttributes_t */
public static final int
    /** Compute type, see cudaDataType. Defines data type used for multiply and accumulate operations and the
     * accumulator during matrix multiplication.
     *
     * int32_t
     */
    CUBLASLT_MATMUL_DESC_COMPUTE_TYPE = 0,

    /** Scale type, see cudaDataType. Defines data type of alpha and beta. Accumulator and value from matrix C are
     * typically converted to scale type before final scaling. Value is then converted from scale type to type of matrix
     * D before being stored in memory.
     *
     * int32_t, default: same as CUBLASLT_MATMUL_DESC_COMPUTE_TYPE
     */
    CUBLASLT_MATMUL_DESC_SCALE_TYPE = 1,

    /** Pointer mode of alpha and beta, see cublasLtPointerMode_t. When CUBLASLT_POINTER_MODE_DEVICE_VECTOR is in use,
     * alpha/beta vector lenghts must match number of output matrix rows.
     *
     * int32_t, default: CUBLASLT_POINTER_MODE_HOST
     */
    CUBLASLT_MATMUL_DESC_POINTER_MODE = 2,

    /** Transform of matrix A, see cublasOperation_t.
     *
     * int32_t, default: CUBLAS_OP_N
     */
    CUBLASLT_MATMUL_DESC_TRANSA = 3,

    /** Transform of matrix B, see cublasOperation_t.
     *
     * int32_t, default: CUBLAS_OP_N
     */
    CUBLASLT_MATMUL_DESC_TRANSB = 4,

    /** Transform of matrix C, see cublasOperation_t.
     *
     * Currently only CUBLAS_OP_N is supported.
     *
     * int32_t, default: CUBLAS_OP_N
     */
    CUBLASLT_MATMUL_DESC_TRANSC = 5,

    /** Matrix fill mode, see cublasFillMode_t.
     *
     * int32_t, default: CUBLAS_FILL_MODE_FULL
     */
    CUBLASLT_MATMUL_DESC_FILL_MODE = 6,

    /** Epilogue function, see cublasLtEpilogue_t.
     *
     * uint32_t, default: CUBLASLT_EPILOGUE_DEFAULT
     */
    CUBLASLT_MATMUL_DESC_EPILOGUE = 7,

    /** Bias or bias gradient vector pointer in the device memory.
     *
     * Bias case. See CUBLASLT_EPILOGUE_BIAS.
     * Bias vector elements are the same type as
     * the output elements (Ctype) with the exception of IMMA kernels with computeType=CUDA_R_32I and Ctype=CUDA_R_8I
     * where the bias vector elements are the same type as alpha, beta (CUBLASLT_MATMUL_DESC_SCALE_TYPE=CUDA_R_32F).
     * Bias vector length must match matrix D rows count.
     *
     * Bias gradient case. See CUBLASLT_EPILOGUE_DRELU_BGRAD and CUBLASLT_EPILOGUE_DGELU_BGRAD.
     * Bias gradient vector elements are the same type as the output elements
     * (Ctype) with the exception of IMMA kernels (see above).
     *
     * Routines that don't dereference this pointer, like cublasLtMatmulAlgoGetHeuristic()
     * depend on its value to determine expected pointer alignment.
     *
     * Bias case: const void *, default: NULL
     * Bias gradient case: void *, default: NULL
     */
    CUBLASLT_MATMUL_DESC_BIAS_POINTER = 8,

    /** Batch stride for bias or bias gradient vector.
     *
     * Used together with CUBLASLT_MATMUL_DESC_BIAS_POINTER when matrix D's CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT > 1.
     *
     * int64_t, default: 0
     */
    CUBLASLT_MATMUL_DESC_BIAS_BATCH_STRIDE = 10,

    /** Pointer for epilogue auxiliary buffer.
     *
     * - Output vector for ReLu bit-mask in forward pass when CUBLASLT_EPILOGUE_RELU_AUX
     *   or CUBLASLT_EPILOGUE_RELU_AUX_BIAS epilogue is used.
     * - Input vector for ReLu bit-mask in backward pass when
     *   CUBLASLT_EPILOGUE_DRELU_BGRAD epilogue is used.
     *
     * - Output of GELU input matrix in forward pass when
     *   CUBLASLT_EPILOGUE_GELU_AUX_BIAS epilogue is used.
     * - Input of GELU input matrix for backward pass when
     *   CUBLASLT_EPILOGUE_DGELU_BGRAD epilogue is used.
     *
     * GELU input matrix elements type is the same as the type of elements of
     * the output matrix.
     *
     * Routines that don't dereference this pointer, like cublasLtMatmulAlgoGetHeuristic()
     * depend on its value to determine expected pointer alignment.
     *
     * Requires setting CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_LD attribute.
     *
     * Forward pass: void *, default: NULL
     * Backward pass: const void *, default: NULL
     */
    CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER = 11,

    /** Leading dimension for epilogue auxiliary buffer.
     *
     * - ReLu bit-mask matrix leading dimension in elements (i.e. bits)
     *   when CUBLASLT_EPILOGUE_RELU_AUX, CUBLASLT_EPILOGUE_RELU_AUX_BIAS or CUBLASLT_EPILOGUE_DRELU_BGRAD epilogue is used.
     *   Must be divisible by 128 and be no less than the number of rows in the output matrix.
     *
     * - GELU input matrix leading dimension in elements
     *   when CUBLASLT_EPILOGUE_GELU_AUX_BIAS or CUBLASLT_EPILOGUE_DGELU_BGRAD epilogue used.
     *   Must be divisible by 8 and be no less than the number of rows in the output matrix.
     *
     * int64_t, default: 0
     */
    CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_LD = 12,

    /** Batch stride for epilogue auxiliary buffer.
     *
     * - ReLu bit-mask matrix batch stride in elements (i.e. bits)
     *   when CUBLASLT_EPILOGUE_RELU_AUX, CUBLASLT_EPILOGUE_RELU_AUX_BIAS or CUBLASLT_EPILOGUE_DRELU_BGRAD epilogue is used.
     *   Must be divisible by 128.
     *
     * - GELU input matrix batch stride in elements
     *   when CUBLASLT_EPILOGUE_GELU_AUX_BIAS or CUBLASLT_EPILOGUE_DGELU_BGRAD epilogue used.
     *   Must be divisible by 8.
     *
     * int64_t, default: 0
     */
    CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_BATCH_STRIDE = 13;

/** Internal. Do not use directly.
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulDescInit_internal(
    @Cast("cublasLtMatmulDesc_t") cublasLtMatmulDescOpaque_t matmulDesc,
    @Cast("size_t") long size,
    @Cast("cublasComputeType_t") int computeType,
    @Cast("cudaDataType_t") int scaleType);

/** Initialize matmul operation descriptor in pre-allocated space.
 *
 * \retval     CUBLAS_STATUS_ALLOC_FAILED  if size of the pre-allocated space is insufficient
 * \retval     CUBLAS_STATUS_SUCCESS       if desciptor was initialized successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulDescInit(
    @Cast("cublasLtMatmulDesc_t") cublasLtMatmulDescOpaque_t matmulDesc,
    @Cast("cublasComputeType_t") int computeType,
    @Cast("cudaDataType_t") int scaleType);

/** Create new matmul operation descriptor.
 *
 * \retval     CUBLAS_STATUS_ALLOC_FAILED  if memory could not be allocated
 * \retval     CUBLAS_STATUS_SUCCESS       if desciptor was created successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulDescCreate(@Cast("cublasLtMatmulDesc_t*") PointerPointer matmulDesc, @Cast("cublasComputeType_t") int computeType, @Cast("cudaDataType_t") int scaleType);

/** Destroy matmul operation descriptor.
 *
 * \retval     CUBLAS_STATUS_SUCCESS  if operation was successful
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulDescDestroy(@Cast("cublasLtMatmulDesc_t") cublasLtMatmulDescOpaque_t matmulDesc);

/** Set matmul operation descriptor attribute.
 *
 * @param matmulDesc [in]   The descriptor
 * @param attr [in]         The attribute
 * @param buf [in]          memory address containing the new value
 * @param sizeInBytes [in]  size of buf buffer for verification (in bytes)
 *
 * \retval     CUBLAS_STATUS_INVALID_VALUE  if buf is NULL or sizeInBytes doesn't match size of internal storage for
 *                                          selected attribute
 * \retval     CUBLAS_STATUS_SUCCESS        if attribute was set successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulDescSetAttribute(
    @Cast("cublasLtMatmulDesc_t") cublasLtMatmulDescOpaque_t matmulDesc,
    @Cast("cublasLtMatmulDescAttributes_t") int attr,
    @Const Pointer buf,
    @Cast("size_t") long sizeInBytes);

/** Get matmul operation descriptor attribute.
 *
 * @param matmulDesc [in]   The descriptor
 * @param attr [in]         The attribute
 * @param buf [out]          memory address containing the new value
 * @param sizeInBytes [in]  size of buf buffer for verification (in bytes)
 * @param sizeWritten [out]  only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
 *                          bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
 *
 * \retval     CUBLAS_STATUS_INVALID_VALUE  if sizeInBytes is 0 and sizeWritten is NULL, or if  sizeInBytes is non-zero
 *                                          and buf is NULL or sizeInBytes doesn't match size of internal storage for
 *                                          selected attribute
 * \retval     CUBLAS_STATUS_SUCCESS        if attribute's value was successfully written to user memory
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulDescGetAttribute(
    @Cast("cublasLtMatmulDesc_t") cublasLtMatmulDescOpaque_t matmulDesc,
    @Cast("cublasLtMatmulDescAttributes_t") int attr,
    Pointer buf,
    @Cast("size_t") long sizeInBytes,
    @Cast("size_t*") SizeTPointer sizeWritten);

/* ---------------------------------------------------------------------------------------*/
/* Helper functions for cublasLtMatrixTransformDesc_t */
/* ---------------------------------------------------------------------------------------*/

/** Matrix transform descriptor attributes to define details of the operation.
 */
/** enum cublasLtMatrixTransformDescAttributes_t */
public static final int
    /** Scale type, see cudaDataType. Inputs are converted to scale type for scaling and summation and results are then
     * converted to output type to store in memory.
     *
     * int32_t
     */
    CUBLASLT_MATRIX_TRANSFORM_DESC_SCALE_TYPE = 0,

    /** Pointer mode of alpha and beta, see cublasLtPointerMode_t.
     *
     * int32_t, default: CUBLASLT_POINTER_MODE_HOST
     */
    CUBLASLT_MATRIX_TRANSFORM_DESC_POINTER_MODE = 1,

    /** Transform of matrix A, see cublasOperation_t.
     *
     * int32_t, default: CUBLAS_OP_N
     */
    CUBLASLT_MATRIX_TRANSFORM_DESC_TRANSA = 2,

    /** Transform of matrix B, see cublasOperation_t.
     *
     * int32_t, default: CUBLAS_OP_N
     */
    CUBLASLT_MATRIX_TRANSFORM_DESC_TRANSB = 3;

/** Internal. Do not use directly.
 */
public static native @Cast("cublasStatus_t") int cublasLtMatrixTransformDescInit_internal(@Cast("cublasLtMatrixTransformDesc_t") cublasLtMatrixTransformDescOpaque_t transformDesc, @Cast("size_t") long size, @Cast("cudaDataType") int scaleType);

/** Initialize matrix transform operation descriptor in pre-allocated space.
 *
 * \retval     CUBLAS_STATUS_ALLOC_FAILED  if size of the pre-allocated space is insufficient
 * \retval     CUBLAS_STATUS_SUCCESS       if desciptor was created successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtMatrixTransformDescInit(@Cast("cublasLtMatrixTransformDesc_t") cublasLtMatrixTransformDescOpaque_t transformDesc, @Cast("cudaDataType") int scaleType);

/** Create new matrix transform operation descriptor.
 *
 * \retval     CUBLAS_STATUS_ALLOC_FAILED  if memory could not be allocated
 * \retval     CUBLAS_STATUS_SUCCESS       if desciptor was created successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtMatrixTransformDescCreate(@Cast("cublasLtMatrixTransformDesc_t*") PointerPointer transformDesc, @Cast("cudaDataType") int scaleType);

/** Destroy matrix transform operation descriptor.
 *
 * \retval     CUBLAS_STATUS_SUCCESS  if operation was successful
 */
public static native @Cast("cublasStatus_t") int cublasLtMatrixTransformDescDestroy(@Cast("cublasLtMatrixTransformDesc_t") cublasLtMatrixTransformDescOpaque_t transformDesc);

/** Set matrix transform operation descriptor attribute.
 *
 * @param transformDesc [in]  The descriptor
 * @param attr [in]           The attribute
 * @param buf [in]            memory address containing the new value
 * @param sizeInBytes [in]    size of buf buffer for verification (in bytes)
 *
 * \retval     CUBLAS_STATUS_INVALID_VALUE  if buf is NULL or sizeInBytes doesn't match size of internal storage for
 *                                          selected attribute
 * \retval     CUBLAS_STATUS_SUCCESS        if attribute was set successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtMatrixTransformDescSetAttribute(
    @Cast("cublasLtMatrixTransformDesc_t") cublasLtMatrixTransformDescOpaque_t transformDesc,
    @Cast("cublasLtMatrixTransformDescAttributes_t") int attr,
    @Const Pointer buf,
    @Cast("size_t") long sizeInBytes);

/** Get matrix transform operation descriptor attribute.
 *
 * @param transformDesc [in]  The descriptor
 * @param attr [in]           The attribute
 * @param buf [out]            memory address containing the new value
 * @param sizeInBytes [in]    size of buf buffer for verification (in bytes)
 * @param sizeWritten [out]    only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
 *                            bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
 *
 * \retval     CUBLAS_STATUS_INVALID_VALUE  if sizeInBytes is 0 and sizeWritten is NULL, or if  sizeInBytes is non-zero
 *                                          and buf is NULL or sizeInBytes doesn't match size of internal storage for
 *                                          selected attribute
 * \retval     CUBLAS_STATUS_SUCCESS        if attribute's value was successfully written to user memory
 */
public static native @Cast("cublasStatus_t") int cublasLtMatrixTransformDescGetAttribute(
    @Cast("cublasLtMatrixTransformDesc_t") cublasLtMatrixTransformDescOpaque_t transformDesc,
    @Cast("cublasLtMatrixTransformDescAttributes_t") int attr,
    Pointer buf,
    @Cast("size_t") long sizeInBytes,
    @Cast("size_t*") SizeTPointer sizeWritten);

/** For computation with complex numbers, this enum allows to apply the Gauss Complexity reduction algorithm
 */
/** enum cublasLt3mMode_t */
public static final int
    CUBLASLT_3M_MODE_DISALLOWED = 0,
    CUBLASLT_3M_MODE_ALLOWED    = 1;

/** Reduction scheme for portions of the dot-product calculated in parallel (a. k. a. "split - K").
 */
/** enum cublasLtReductionScheme_t */
public static final int
    /** No reduction scheme, dot-product shall be performed in one sequence.
     */
    CUBLASLT_REDUCTION_SCHEME_NONE = 0,

    /** Reduction is performed "in place" - using the output buffer (and output data type) and counters (in workspace) to guarantee the
     * sequentiality.
     */
    CUBLASLT_REDUCTION_SCHEME_INPLACE = 1,

    /** Intermediate results are stored in compute type in the workspace and reduced in a separate step.
     */
    CUBLASLT_REDUCTION_SCHEME_COMPUTE_TYPE = 2,

    /** Intermediate results are stored in output type in the workspace and reduced in a separate step.
     */
    CUBLASLT_REDUCTION_SCHEME_OUTPUT_TYPE  = 4,

    CUBLASLT_REDUCTION_SCHEME_MASK         = 0x7;

/** Postprocessing options for the epilogue
 */
/** enum cublasLtEpilogue_t */
public static final int
    /** No special postprocessing, just scale and quantize results if necessary.
     */
    CUBLASLT_EPILOGUE_DEFAULT = 1,

    /** ReLu, apply ReLu point-wise transform to the results (x:=max(x, 0)).
     */
    CUBLASLT_EPILOGUE_RELU = 2,

    /** ReLu, apply ReLu point-wise transform to the results (x:=max(x, 0)).
     *
     * This epilogue mode produces an extra output, a ReLu bit-mask matrix,
     * see CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
     */
    CUBLASLT_EPILOGUE_RELU_AUX = (CUBLASLT_EPILOGUE_RELU | 128),

    /** Bias, apply (broadcasted) Bias from bias vector. Bias vector length must match matrix D rows, it must be packed
     * (stride between vector elements is 1). Bias vector is broadcasted to all columns and added before applying final
     * postprocessing.
     */
    CUBLASLT_EPILOGUE_BIAS = 4,

    /** ReLu and Bias, apply Bias and then ReLu transform
     */
    CUBLASLT_EPILOGUE_RELU_BIAS = (CUBLASLT_EPILOGUE_RELU | CUBLASLT_EPILOGUE_BIAS),

    /** ReLu and Bias, apply Bias and then ReLu transform
     *
     * This epilogue mode produces an extra output, a ReLu bit-mask matrix,
     * see CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
     */
    CUBLASLT_EPILOGUE_RELU_AUX_BIAS = (CUBLASLT_EPILOGUE_RELU_AUX | CUBLASLT_EPILOGUE_BIAS),

    /* ReLu and Bias gradients. Apply independently ReLu and Bias gradient to
     * matmul output. Store ReLu gradient in the output matrix, and Bias gradient
     * in the auxiliary output (see CUBLASLT_MATMUL_DESC_BIAS_POINTER).
     *
     * This epilogue mode requires an extra input,
     * see CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
     */
    CUBLASLT_EPILOGUE_DRELU_BGRAD = 8 | 16 | 128,

    /** GELU, apply GELU point-wise transform to the results (x:=GELU(x)).
     */
    CUBLASLT_EPILOGUE_GELU = 32,

    /** GELU, apply GELU point-wise transform to the results (x:=GELU(x)).
     *
     * This epilogue mode outputs GELU input as a separate matrix (useful for training).
     * See CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
     */
    CUBLASLT_EPILOGUE_GELU_AUX = (CUBLASLT_EPILOGUE_GELU | 128),

    /** GELU and Bias, apply Bias and then GELU transform
     */
    CUBLASLT_EPILOGUE_GELU_BIAS = (CUBLASLT_EPILOGUE_GELU | CUBLASLT_EPILOGUE_BIAS),

    /** GELU and Bias, apply Bias and then GELU transform
     *
     * This epilogue mode outputs GELU input as a separate matrix (useful for training).
     * See CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
     */
    CUBLASLT_EPILOGUE_GELU_AUX_BIAS = (CUBLASLT_EPILOGUE_GELU_AUX | CUBLASLT_EPILOGUE_BIAS),

    /* GELU and Bias gradients. Apply independently GELU and Bias gradient to
     * matmul output. Store GELU gradient in the output matrix, and Bias gradient
     * in the auxiliary output (see CUBLASLT_MATMUL_DESC_BIAS_POINTER).
     *
     * This epilogue mode requires an extra input,
     * see CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.
     */
    CUBLASLT_EPILOGUE_DGELU_BGRAD = 16 | 64 | 128;

/** Matmul heuristic search mode
 */
/** enum cublasLtMatmulSearch_t */
public static final int
    /** ask heuristics for best algo for given usecase
     */
    CUBLASLT_SEARCH_BEST_FIT = 0,
    /** only try to find best config for preconfigured algo id
     */
    CUBLASLT_SEARCH_LIMITED_BY_ALGO_ID = 1,
    /** reserved for future use
     */
    CUBLASLT_SEARCH_RESERVED_02 = 2,
    /** reserved for future use
     */
    CUBLASLT_SEARCH_RESERVED_03 = 3,
    /** reserved for future use
     */
    CUBLASLT_SEARCH_RESERVED_04 = 4,
    /** reserved for future use
     */
    CUBLASLT_SEARCH_RESERVED_05 = 5;

/** Algo search preference to fine tune the heuristic function. */
/** enum cublasLtMatmulPreferenceAttributes_t */
public static final int
    /** Search mode, see cublasLtMatmulSearch_t.
     *
     * uint32_t, default: CUBLASLT_SEARCH_BEST_FIT
     */
    CUBLASLT_MATMUL_PREF_SEARCH_MODE = 0,

    /** Maximum allowed workspace size in bytes.
     *
     * uint64_t, default: 0 - no workspace allowed
     */
    CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES = 1,

    /** Math mode mask, see cublasMath_t.
     *
     * Only algorithms with CUBLASLT_ALGO_CAP_MATHMODE_IMPL that is not masked out by this attribute are allowed.
     *
     * uint32_t, default: 1 (allows both default and tensor op math)
     * DEPRECATED, will be removed in a future release, see cublasLtNumericalImplFlags_t for replacement
     */
    CUBLASLT_MATMUL_PREF_MATH_MODE_MASK = 2,

    /** Reduction scheme mask, see cublasLtReductionScheme_t. Filters heuristic result to only include algo configs that use one of the required modes.
     *
     * E.g. mask value of 0x03 will allow only INPLACE and COMPUTE_TYPE reduction schemes.
     *
     * uint32_t, default: CUBLASLT_REDUCTION_SCHEME_MASK (allows all reduction schemes)
     */
    CUBLASLT_MATMUL_PREF_REDUCTION_SCHEME_MASK = 3,

    /** Gaussian mode mask, see cublasLt3mMode_t.
     *
     * Only algorithms with CUBLASLT_ALGO_CAP_GAUSSIAN_IMPL that is not masked out by this attribute are allowed.
     *
     * uint32_t, default: CUBLASLT_3M_MODE_ALLOWED (allows both gaussian and non-gaussian algorithms)
     * DEPRECATED, will be removed in a future release, see cublasLtNumericalImplFlags_t for replacement
     */
    CUBLASLT_MATMUL_PREF_GAUSSIAN_MODE_MASK = 4,

    /** Minimum buffer alignment for matrix A (in bytes).
     *
     * Selecting a smaller value will exclude algorithms that can not work with matrix A that is not as strictly aligned as they need.
     *
     * uint32_t, default: 256
     */
    CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_A_BYTES = 5,

    /** Minimum buffer alignment for matrix B (in bytes).
     *
     * Selecting a smaller value will exclude algorithms that can not work with matrix B that is not as strictly aligned as they need.
     *
     * uint32_t, default: 256
     */
    CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_B_BYTES = 6,

    /** Minimum buffer alignment for matrix C (in bytes).
     *
     * Selecting a smaller value will exclude algorithms that can not work with matrix C that is not as strictly aligned as they need.
     *
     * uint32_t, default: 256
     */
    CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_C_BYTES = 7,

    /** Minimum buffer alignment for matrix D (in bytes).
     *
     * Selecting a smaller value will exclude algorithms that can not work with matrix D that is not as strictly aligned as they need.
     *
     * uint32_t, default: 256
     */
    CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_D_BYTES = 8,

    /** Maximum wave count.
     *
     * See cublasLtMatmulHeuristicResult_t::wavesCount.
     *
     * Selecting a non-zero value will exclude algorithms that report device utilization higher than specified.
     *
     * float, default: 0.0f
     */
    CUBLASLT_MATMUL_PREF_MAX_WAVES_COUNT = 9,

    /** Pointer mode mask, see cublasLtPointerModeMask_t. Filters heuristic result to only include algorithms that support all required modes.
     *
     * uint32_t, default: (CUBLASLT_POINTER_MODE_MASK_HOST | CUBLASLT_POINTER_MODE_MASK_DEVICE) (only allows algorithms that support both regular host and device pointers)
     */
    CUBLASLT_MATMUL_PREF_POINTER_MODE_MASK = 10,

    /** Epilogue selector mask, see cublasLtEpilogue_t. Filters heuristic result to only include algorithms that support all required operations.
     *
     * uint32_t, default: CUBLASLT_EPILOGUE_DEFAULT (only allows algorithms that support default epilogue)
     */
    CUBLASLT_MATMUL_PREF_EPILOGUE_MASK = 11,

    /** Numerical implementation details mask, see cublasLtNumericalImplFlags_t. Filters heuristic result to only include algorithms that use the allowed implementations.
     *
     * uint64_t, default: uint64_t(-1) (allow everything)
     */
    CUBLASLT_MATMUL_PREF_IMPL_MASK = 12,

    /** Number of SMs to target for parallel execution. Optimizes heuristics for execution in smaller number of SM when
     * user expects a concurrent stream to be using some of the device resources.
     *
     * int32_t, default: 0 - use the number reported by the device.
     */
    CUBLASLT_MATMUL_PREF_SM_COUNT_TARGET = 13;

/** Internal. Do not use directly.
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulPreferenceInit_internal(@Cast("cublasLtMatmulPreference_t") cublasLtMatmulPreferenceOpaque_t pref, @Cast("size_t") long size);

/** Initialize matmul heuristic search preference descriptor in pre-allocated space.
 *
 * \retval     CUBLAS_STATUS_ALLOC_FAILED  if size of the pre-allocated space is insufficient
 * \retval     CUBLAS_STATUS_SUCCESS       if desciptor was created successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulPreferenceInit(@Cast("cublasLtMatmulPreference_t") cublasLtMatmulPreferenceOpaque_t pref);

/** Create new matmul heuristic search preference descriptor.
 *
 * \retval     CUBLAS_STATUS_ALLOC_FAILED  if memory could not be allocated
 * \retval     CUBLAS_STATUS_SUCCESS       if desciptor was created successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulPreferenceCreate(@Cast("cublasLtMatmulPreference_t*") PointerPointer pref);

/** Destroy matmul heuristic search preference descriptor.
 *
 * \retval     CUBLAS_STATUS_SUCCESS  if operation was successful
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulPreferenceDestroy(@Cast("cublasLtMatmulPreference_t") cublasLtMatmulPreferenceOpaque_t pref);

/** Set matmul heuristic search preference descriptor attribute.
 *
 * @param pref [in]         The descriptor
 * @param attr [in]         The attribute
 * @param buf [in]          memory address containing the new value
 * @param sizeInBytes [in]  size of buf buffer for verification (in bytes)
 *
 * \retval     CUBLAS_STATUS_INVALID_VALUE  if buf is NULL or sizeInBytes doesn't match size of internal storage for
 *                                          selected attribute
 * \retval     CUBLAS_STATUS_SUCCESS        if attribute was set successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulPreferenceSetAttribute(
    @Cast("cublasLtMatmulPreference_t") cublasLtMatmulPreferenceOpaque_t pref,
    @Cast("cublasLtMatmulPreferenceAttributes_t") int attr,
    @Const Pointer buf,
    @Cast("size_t") long sizeInBytes);

/** Get matmul heuristic search preference descriptor attribute.
 *
 * @param pref [in]         The descriptor
 * @param attr [in]         The attribute
 * @param buf [out]          memory address containing the new value
 * @param sizeInBytes [in]  size of buf buffer for verification (in bytes)
 * @param sizeWritten [out]  only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
 *                          bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
 *
 * \retval     CUBLAS_STATUS_INVALID_VALUE  if sizeInBytes is 0 and sizeWritten is NULL, or if  sizeInBytes is non-zero
 *                                          and buf is NULL or sizeInBytes doesn't match size of internal storage for
 *                                          selected attribute
 * \retval     CUBLAS_STATUS_SUCCESS        if attribute's value was successfully written to user memory
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulPreferenceGetAttribute(
    @Cast("cublasLtMatmulPreference_t") cublasLtMatmulPreferenceOpaque_t pref,
    @Cast("cublasLtMatmulPreferenceAttributes_t") int attr,
    Pointer buf,
    @Cast("size_t") long sizeInBytes,
    @Cast("size_t*") SizeTPointer sizeWritten);
// Targeting ..\cublas\cublasLtMatmulHeuristicResult_t.java



/** Query cublasLt heuristic for algorithm appropriate for given use case.
 *
 * @param lightHandle [in]            Pointer to the allocated cuBLASLt handle for the cuBLASLt
 *                                        context. See cublasLtHandle_t.
 * @param operationDesc [in]          Handle to the matrix multiplication descriptor.
 * @param Adesc [in]                  Handle to the layout descriptors for matrix A.
 * @param Bdesc [in]                  Handle to the layout descriptors for matrix B.
 * @param Cdesc [in]                  Handle to the layout descriptors for matrix C.
 * @param Ddesc [in]                  Handle to the layout descriptors for matrix D.
 * @param preference [in]             Pointer to the structure holding the heuristic search
 *                                        preferences descriptor. See cublasLtMatrixLayout_t.
 * @param requestedAlgoCount [in]     Size of heuristicResultsArray (in elements) and requested
 *                                        maximum number of algorithms to return.
 * @param heuristicResultsArray [in, out]  Output algorithms and associated runtime characteristics,
 *                                        ordered in increasing estimated compute time.
 * @param returnAlgoCount [out]        The number of heuristicResultsArray elements written.
 *
 * \retval  CUBLAS_STATUS_INVALID_VALUE   if requestedAlgoCount is less or equal to zero
 * \retval  CUBLAS_STATUS_NOT_SUPPORTED   if no heuristic function available for current configuration
 * \retval  CUBLAS_STATUS_SUCCESS         if query was successful, inspect
 *                                        heuristicResultsArray[0 to (returnAlgoCount - 1)].state
 *                                        for detail status of results
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulAlgoGetHeuristic(
    cublasLtContext lightHandle,
    @Cast("cublasLtMatmulDesc_t") cublasLtMatmulDescOpaque_t operationDesc,
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Adesc,
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Bdesc,
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Cdesc,
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Ddesc,
    @Cast("cublasLtMatmulPreference_t") cublasLtMatmulPreferenceOpaque_t preference,
    int requestedAlgoCount,
    cublasLtMatmulHeuristicResult_t heuristicResultsArray,
    IntPointer returnAlgoCount);
public static native @Cast("cublasStatus_t") int cublasLtMatmulAlgoGetHeuristic(
    cublasLtContext lightHandle,
    @Cast("cublasLtMatmulDesc_t") cublasLtMatmulDescOpaque_t operationDesc,
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Adesc,
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Bdesc,
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Cdesc,
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Ddesc,
    @Cast("cublasLtMatmulPreference_t") cublasLtMatmulPreferenceOpaque_t preference,
    int requestedAlgoCount,
    cublasLtMatmulHeuristicResult_t heuristicResultsArray,
    IntBuffer returnAlgoCount);
public static native @Cast("cublasStatus_t") int cublasLtMatmulAlgoGetHeuristic(
    cublasLtContext lightHandle,
    @Cast("cublasLtMatmulDesc_t") cublasLtMatmulDescOpaque_t operationDesc,
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Adesc,
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Bdesc,
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Cdesc,
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Ddesc,
    @Cast("cublasLtMatmulPreference_t") cublasLtMatmulPreferenceOpaque_t preference,
    int requestedAlgoCount,
    cublasLtMatmulHeuristicResult_t heuristicResultsArray,
    int[] returnAlgoCount);


/* ---------------------------------------------------------------------------------------*/
/* Lower level API to be able to implement own Heuristic and Find routines                */
/* ---------------------------------------------------------------------------------------*/

/** Routine to get all algo IDs that can potentially run
 *
 * @param int [in]              requestedAlgoCount requested number of algos (must be less or equal to size of algoIdsA (in
 *                              elements))
 * @param algoIdsA [out]         array to write algoIds to
 * @param returnAlgoCount [out]  number of algoIds actually written
 *
 * \retval     CUBLAS_STATUS_INVALID_VALUE  if requestedAlgoCount is less or equal to zero
 * \retval     CUBLAS_STATUS_SUCCESS        if query was successful, inspect returnAlgoCount to get actual number of IDs
 *                                          available
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulAlgoGetIds(
    cublasLtContext lightHandle,
    @Cast("cublasComputeType_t") int computeType,
    @Cast("cudaDataType_t") int scaleType,
    @Cast("cudaDataType_t") int Atype,
    @Cast("cudaDataType_t") int Btype,
    @Cast("cudaDataType_t") int Ctype,
    @Cast("cudaDataType_t") int Dtype,
    int requestedAlgoCount,
    IntPointer algoIdsArray,
    IntPointer returnAlgoCount);
public static native @Cast("cublasStatus_t") int cublasLtMatmulAlgoGetIds(
    cublasLtContext lightHandle,
    @Cast("cublasComputeType_t") int computeType,
    @Cast("cudaDataType_t") int scaleType,
    @Cast("cudaDataType_t") int Atype,
    @Cast("cudaDataType_t") int Btype,
    @Cast("cudaDataType_t") int Ctype,
    @Cast("cudaDataType_t") int Dtype,
    int requestedAlgoCount,
    IntBuffer algoIdsArray,
    IntBuffer returnAlgoCount);
public static native @Cast("cublasStatus_t") int cublasLtMatmulAlgoGetIds(
    cublasLtContext lightHandle,
    @Cast("cublasComputeType_t") int computeType,
    @Cast("cudaDataType_t") int scaleType,
    @Cast("cudaDataType_t") int Atype,
    @Cast("cudaDataType_t") int Btype,
    @Cast("cudaDataType_t") int Ctype,
    @Cast("cudaDataType_t") int Dtype,
    int requestedAlgoCount,
    int[] algoIdsArray,
    int[] returnAlgoCount);

/** Initialize algo structure
 *
 * \retval     CUBLAS_STATUS_INVALID_VALUE  if algo is NULL or algoId is outside of recognized range
 * \retval     CUBLAS_STATUS_NOT_SUPPORTED  if algoId is not supported for given combination of data types
 * \retval     CUBLAS_STATUS_SUCCESS        if the structure was successfully initialized
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulAlgoInit( cublasLtContext lightHandle,
                         @Cast("cublasComputeType_t") int computeType,
                         @Cast("cudaDataType_t") int scaleType,
                         @Cast("cudaDataType_t") int Atype,
                         @Cast("cudaDataType_t") int Btype,
                         @Cast("cudaDataType_t") int Ctype,
                         @Cast("cudaDataType_t") int Dtype,
                         int algoId,
                         cublasLtMatmulAlgo_t algo);

/** Check configured algo descriptor for correctness and support on current device.
 *
 * Result includes required workspace size and calculated wave count.
 *
 * CUBLAS_STATUS_SUCCESS doesn't fully guarantee algo will run (will fail if e.g. buffers are not correctly aligned);
 * but if cublasLtMatmulAlgoCheck fails, the algo will not run.
 *
 * @param algo [in]    algo configuration to check
 * @param result [out]  result structure to report algo runtime characteristics; algo field is never updated
 *
 * \retval     CUBLAS_STATUS_INVALID_VALUE  if matrix layout descriptors or operation descriptor don't match algo
 *                                          descriptor
 * \retval     CUBLAS_STATUS_NOT_SUPPORTED  if algo configuration or data type combination is not currently supported on
 *                                          given device
 * \retval     CUBLAS_STATUS_ARCH_MISMATCH  if algo configuration cannot be run using the selected device
 * \retval     CUBLAS_STATUS_SUCCESS        if check was successful
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulAlgoCheck(
    cublasLtContext lightHandle,
    @Cast("cublasLtMatmulDesc_t") cublasLtMatmulDescOpaque_t operationDesc,
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Adesc,
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Bdesc,
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Cdesc,
    @Cast("cublasLtMatrixLayout_t") cublasLtMatrixLayoutOpaque_t Ddesc,
    @Const cublasLtMatmulAlgo_t algo,
    cublasLtMatmulHeuristicResult_t result);

/** Capabilities Attributes that can be retrieved from an initialized Algo structure
 */
/** enum cublasLtMatmulAlgoCapAttributes_t */
public static final int
    /** support for split K, see CUBLASLT_ALGO_CONFIG_SPLITK_NUM
     *
     * int32_t, 0 means no support, supported otherwise
     */
    CUBLASLT_ALGO_CAP_SPLITK_SUPPORT = 0,
    /** reduction scheme mask, see cublasLtReductionScheme_t; shows supported reduction schemes, if reduction scheme is not masked out it is supported.
     *
     * e.g. int isReductionSchemeComputeTypeSupported ? (reductionSchemeMask & CUBLASLT_REDUCTION_SCHEME_COMPUTE_TYPE) == CUBLASLT_REDUCTION_SCHEME_COMPUTE_TYPE ? 1 : 0;
     *
     * uint32_t
     */
    CUBLASLT_ALGO_CAP_REDUCTION_SCHEME_MASK = 1,
    /** support for cta swizzling, see CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING
     *
     * uint32_t, 0 means no support, 1 means supported value of 1, other values are reserved
     */
    CUBLASLT_ALGO_CAP_CTA_SWIZZLING_SUPPORT = 2,
    /** support strided batch
     *
     * int32_t, 0 means no support, supported otherwise
     */
    CUBLASLT_ALGO_CAP_STRIDED_BATCH_SUPPORT = 3,
    /** support results out of place (D != C in D = alpha.A.B + beta.C)
     *
     * int32_t, 0 means no support, supported otherwise
     */
    CUBLASLT_ALGO_CAP_OUT_OF_PLACE_RESULT_SUPPORT = 4,
    /** syrk/herk support (on top of regular gemm)
     *
     * int32_t, 0 means no support, supported otherwise
     */
    CUBLASLT_ALGO_CAP_UPLO_SUPPORT = 5,
    /** tile ids possible to use, see cublasLtMatmulTile_t; if no tile ids are supported use CUBLASLT_MATMUL_TILE_UNDEFINED
     *
     * use cublasLtMatmulAlgoCapGetAttribute() with sizeInBytes=0 to query actual count
     *
     * array of uint32_t
     */
    CUBLASLT_ALGO_CAP_TILE_IDS = 6,
    /** custom option range is from 0 to CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX (inclusive), see CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION
     *
     * int32_t
     */
    CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX = 7,
    /** whether algorithm is using regular compute or tensor operations
     *
     * int32_t 0 means regular compute, 1 means tensor operations;
     * DEPRECATED
     */
    CUBLASLT_ALGO_CAP_MATHMODE_IMPL = 8,
    /** whether algorithm implements gaussian optimization of complex matrix multiplication, see cublasMath_t
     *
     * int32_t 0 means regular compute, 1 means gaussian;
     * DEPRECATED
     */
    CUBLASLT_ALGO_CAP_GAUSSIAN_IMPL = 9,
    /** whether algorithm supports custom (not COL or ROW memory order), see cublasLtOrder_t
     *
     * int32_t 0 means only COL and ROW memory order is allowed, non-zero means that algo might have different requirements;
     */
    CUBLASLT_ALGO_CAP_CUSTOM_MEMORY_ORDER = 10,

    /** bitmask enumerating pointer modes algorithm supports
     *
     * uint32_t, see cublasLtPointerModeMask_t
     */
    CUBLASLT_ALGO_CAP_POINTER_MODE_MASK = 11,

    /** bitmask enumerating kinds of postprocessing algorithm supports in the epilogue
     *
     * uint32_t, see cublasLtEpilogue_t
     */
    CUBLASLT_ALGO_CAP_EPILOGUE_MASK = 12,
    /** stages ids possible to use, see cublasLtMatmulStages_t; if no stages ids are supported use CUBLASLT_MATMUL_STAGES_UNDEFINED
     *
     * use cublasLtMatmulAlgoCapGetAttribute() with sizeInBytes=0 to query actual count
     *
     * array of uint32_t
     */
    CUBLASLT_ALGO_CAP_STAGES_IDS = 13,
    /** support for nagative ld for all of the matrices
     *
     * int32_t 0 means no support, supported otherwise
     */
    CUBLASLT_ALGO_CAP_LD_NEGATIVE = 14,
    /** details about algorithm's implementation that affect it's numerical behavior
     *
     * uint64_t, see cublasLtNumericalImplFlags_t
     */
    CUBLASLT_ALGO_CAP_NUMERICAL_IMPL_FLAGS = 15,
    /** minimum alignment required for A matrix in bytes
     *  (required for buffer pointer, leading dimension, and possibly other strides defined for matrix memory order)
     *
     * uint32_t
     */
    CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_A_BYTES = 16,
    /** minimum alignment required for B matrix in bytes
     *  (required for buffer pointer, leading dimension, and possibly other strides defined for matrix memory order)
     *
     * uint32_t
     */
    CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_B_BYTES = 17,
    /** minimum alignment required for C matrix in bytes
     *  (required for buffer pointer, leading dimension, and possibly other strides defined for matrix memory order)
     *
     * uint32_t
     */
    CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_C_BYTES = 18,
    /** minimum alignment required for D matrix in bytes
     *  (required for buffer pointer, leading dimension, and possibly other strides defined for matrix memory order)
     *
     * uint32_t
     */
    CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_D_BYTES = 19;

/** Get algo capability attribute.
 *
 * E.g. to get list of supported Tile IDs:
 *      cublasLtMatmulTile_t tiles[CUBLASLT_MATMUL_TILE_END];
 *      size_t num_tiles, size_written;
 *      if (cublasLtMatmulAlgoCapGetAttribute(algo, CUBLASLT_ALGO_CAP_TILE_IDS, tiles, sizeof(tiles), size_written) == CUBLAS_STATUS_SUCCESS) {
 *        num_tiles = size_written / sizeof(tiles[0]);
 *      }
 *
 * @param algo [in]         The algo descriptor
 * @param attr [in]         The attribute
 * @param buf [out]          memory address containing the new value
 * @param sizeInBytes [in]  size of buf buffer for verification (in bytes)
 * @param sizeWritten [out]  only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
 *                          bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
 *
 * \retval     CUBLAS_STATUS_INVALID_VALUE  if sizeInBytes is 0 and sizeWritten is NULL, or if  sizeInBytes is non-zero
 *                                          and buf is NULL or sizeInBytes doesn't match size of internal storage for
 *                                          selected attribute
 * \retval     CUBLAS_STATUS_SUCCESS        if attribute's value was successfully written to user memory
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulAlgoCapGetAttribute(
    @Const cublasLtMatmulAlgo_t algo,
    @Cast("cublasLtMatmulAlgoCapAttributes_t") int attr,
    Pointer buf,
    @Cast("size_t") long sizeInBytes,
    @Cast("size_t*") SizeTPointer sizeWritten);

/** Algo Configuration Attributes that can be set according to the Algo capabilities
 */
/** enum cublasLtMatmulAlgoConfigAttributes_t */
public static final int
    /** algorithm index, see cublasLtMatmulAlgoGetIds()
     *
     * readonly, set by cublasLtMatmulAlgoInit()
     * int32_t
     */
    CUBLASLT_ALGO_CONFIG_ID = 0,
    /** tile id, see cublasLtMatmulTile_t
     *
     * uint32_t, default: CUBLASLT_MATMUL_TILE_UNDEFINED
     */
    CUBLASLT_ALGO_CONFIG_TILE_ID = 1,
    /** number of K splits, if != 1, SPLITK_NUM parts of matrix multiplication will be computed in parallel,
     * and then results accumulated according to REDUCTION_SCHEME
     *
     * uint32_t, default: 1
     */
    CUBLASLT_ALGO_CONFIG_SPLITK_NUM = 2,
    /** reduction scheme, see cublasLtReductionScheme_t
     *
     * uint32_t, default: CUBLASLT_REDUCTION_SCHEME_NONE
     */
    CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME = 3,
    /** cta swizzling, change mapping from CUDA grid coordinates to parts of the matrices
     *
     * possible values: 0, 1, other values reserved
     *
     * uint32_t, default: 0
     */
    CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING = 4,
    /** custom option, each algorithm can support some custom options that don't fit description of the other config
     * attributes, see CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX to get accepted range for any specific case
     *
     * uint32_t, default: 0
     */
    CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION = 5,
    /** stages id, see cublasLtMatmulStages_t
     *
     * uint32_t, default: CUBLASLT_MATMUL_STAGES_UNDEFINED
     */
    CUBLASLT_ALGO_CONFIG_STAGES_ID = 6;

/** Set algo configuration attribute.
 *
 * @param algo [in]         The algo descriptor
 * @param attr [in]         The attribute
 * @param buf [in]          memory address containing the new value
 * @param sizeInBytes [in]  size of buf buffer for verification (in bytes)
 *
 * \retval     CUBLAS_STATUS_INVALID_VALUE  if buf is NULL or sizeInBytes doesn't match size of internal storage for
 *                                          selected attribute
 * \retval     CUBLAS_STATUS_SUCCESS        if attribute was set successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulAlgoConfigSetAttribute(
    cublasLtMatmulAlgo_t algo,
    @Cast("cublasLtMatmulAlgoConfigAttributes_t") int attr,
    @Const Pointer buf,
    @Cast("size_t") long sizeInBytes);

/** Get algo configuration attribute.
 *
 * @param algo [in]         The algo descriptor
 * @param attr [in]         The attribute
 * @param buf [out]          memory address containing the new value
 * @param sizeInBytes [in]  size of buf buffer for verification (in bytes)
 * @param sizeWritten [out]  only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
 *                          bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
 *
 * \retval     CUBLAS_STATUS_INVALID_VALUE  if sizeInBytes is 0 and sizeWritten is NULL, or if  sizeInBytes is non-zero
 *                                          and buf is NULL or sizeInBytes doesn't match size of internal storage for
 *                                          selected attribute
 * \retval     CUBLAS_STATUS_SUCCESS        if attribute's value was successfully written to user memory
 */
public static native @Cast("cublasStatus_t") int cublasLtMatmulAlgoConfigGetAttribute(
        @Const cublasLtMatmulAlgo_t algo,
        @Cast("cublasLtMatmulAlgoConfigAttributes_t") int attr,
        Pointer buf,
        @Cast("size_t") long sizeInBytes,
        @Cast("size_t*") SizeTPointer sizeWritten);
// Targeting ..\cublas\cublasLtLoggerCallback_t.java



/** Experimental: Logger callback setter.
 *
 * @param callback [in]                     a user defined callback function to be called by the logger
 *
 * \retval     CUBLAS_STATUS_SUCCESS        if callback was set successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtLoggerSetCallback(
        cublasLtLoggerCallback_t callback);

/** Experimental: Log file setter.
 *
 * @param file [in]                         an open file with write permissions
 *
 * \retval     CUBLAS_STATUS_SUCCESS        if log file was set successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtLoggerSetFile(
        @Cast("FILE*") Pointer file);

/** Experimental: Open log file.
 *
 * @param logFile [in]                      log file path. if the log file does not exist, it will be created
 *
 * \retval     CUBLAS_STATUS_SUCCESS        if log file was created successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtLoggerOpenFile(
        @Cast("const char*") BytePointer logFile);
public static native @Cast("cublasStatus_t") int cublasLtLoggerOpenFile(
        String logFile);

/** Experimental: Log level setter.
 *
 * @param level [in]                        log level, should be one of the following:
 *                                          0. Off
 *                                          1. Errors
 *                                          2. Performance Trace
 *                                          3. Performance Hints
 *                                          4. Heuristics Trace
 *                                          5. API Trace
 *
 * \retval     CUBLAS_STATUS_INVALID_VALUE  if log level is not one of the above levels
 * 
 * \retval     CUBLAS_STATUS_SUCCESS        if log level was set successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtLoggerSetLevel(
        int level);

/** Experimental: Log mask setter.
 *
 * @param mask [in]                         log mask, should be a combination of the following masks:
 *                                          0.  Off
 *                                          1.  Errors
 *                                          2.  Performance Trace
 *                                          4.  Performance Hints
 *                                          8.  Heuristics Trace
 *                                          16. API Trace
 * 
 * \retval     CUBLAS_STATUS_SUCCESS        if log mask was set successfully
 */
public static native @Cast("cublasStatus_t") int cublasLtLoggerSetMask(
        int mask);

/** Experimental: Disable logging for the entire session.
 * 
 * \retval     CUBLAS_STATUS_SUCCESS        if disabled logging
 */
public static native @Cast("cublasStatus_t") int cublasLtLoggerForceDisable();

// #if defined(__cplusplus)
// #endif /* __cplusplus */


// Parsed from <cublasXt.h>

/*
 * Copyright 1993-2019 NVIDIA Corporation. All rights reserved.
 *
 * NOTICE TO LICENSEE:
 *
 * This source code and/or documentation ("Licensed Deliverables") are
 * subject to NVIDIA intellectual property rights under U.S. and
 * international Copyright laws.
 *
 * These Licensed Deliverables contained herein is PROPRIETARY and
 * CONFIDENTIAL to NVIDIA and is being provided under the terms and
 * conditions of a form of NVIDIA software license agreement by and
 * between NVIDIA and Licensee ("License Agreement") or electronically
 * accepted by Licensee.  Notwithstanding any terms or conditions to
 * the contrary in the License Agreement, reproduction or disclosure
 * of the Licensed Deliverables to any third party without the express
 * written consent of NVIDIA is prohibited.
 *
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THESE LICENSED DELIVERABLES.
 *
 * U.S. Government End Users.  These Licensed Deliverables are a
 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 * 1995), consisting of "commercial computer software" and "commercial
 * computer software documentation" as such terms are used in 48
 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 * U.S. Government End Users acquire the Licensed Deliverables with
 * only those rights set forth herein.
 *
 * Any use of the Licensed Deliverables in individual and commercial
 * software must include, in the user documentation and internal
 * comments to the code, the above Disclaimer and U.S. Government End
 * Users Notice.
 */
 
 /*   cublasXt : Host API, Out of Core and Multi-GPU BLAS Library

 */
 
// #if !defined(CUBLAS_XT_H_)
// #define CUBLAS_XT_H_

// #include "driver_types.h"
// #include "cuComplex.h"   /* import complex data type */

// #include "cublas_v2.h"

// #if defined(__cplusplus)
// Targeting ..\cublas\cublasXtContext.java



public static native @Cast("cublasStatus_t") int cublasXtCreate(@ByPtrPtr cublasXtContext handle);
public static native @Cast("cublasStatus_t") int cublasXtDestroy(cublasXtContext handle);
public static native @Cast("cublasStatus_t") int cublasXtGetNumBoards(int nbDevices, IntPointer deviceId, IntPointer nbBoards);
public static native @Cast("cublasStatus_t") int cublasXtGetNumBoards(int nbDevices, IntBuffer deviceId, IntBuffer nbBoards);
public static native @Cast("cublasStatus_t") int cublasXtGetNumBoards(int nbDevices, int[] deviceId, int[] nbBoards);
public static native @Cast("cublasStatus_t") int cublasXtMaxBoards( IntPointer nbGpuBoards );
public static native @Cast("cublasStatus_t") int cublasXtMaxBoards( IntBuffer nbGpuBoards );
public static native @Cast("cublasStatus_t") int cublasXtMaxBoards( int[] nbGpuBoards );
/* This routine selects the Gpus that the user want to use for CUBLAS-XT */
public static native @Cast("cublasStatus_t") int cublasXtDeviceSelect(cublasXtContext handle, int nbDevices, IntPointer deviceId);
public static native @Cast("cublasStatus_t") int cublasXtDeviceSelect(cublasXtContext handle, int nbDevices, IntBuffer deviceId);
public static native @Cast("cublasStatus_t") int cublasXtDeviceSelect(cublasXtContext handle, int nbDevices, int[] deviceId);

/* This routine allows to change the dimension of the tiles ( blockDim x blockDim ) */
public static native @Cast("cublasStatus_t") int cublasXtSetBlockDim(cublasXtContext handle, int blockDim);
public static native @Cast("cublasStatus_t") int cublasXtGetBlockDim(cublasXtContext handle, IntPointer blockDim);
public static native @Cast("cublasStatus_t") int cublasXtGetBlockDim(cublasXtContext handle, IntBuffer blockDim);
public static native @Cast("cublasStatus_t") int cublasXtGetBlockDim(cublasXtContext handle, int[] blockDim);

/** enum cublasXtPinnedMemMode_t */
public static final int 
    CUBLASXT_PINNING_DISABLED   = 0,  
    CUBLASXT_PINNING_ENABLED    = 1;
/* This routine allows to CUBLAS-XT to pin the Host memory if it find out that some of the matrix passed
   are not pinned : Pinning/Unpinning the Host memory is still a costly operation
   It is better if the user controls the memory on its own (by pinning/unpinning oly when necessary)
*/
public static native @Cast("cublasStatus_t") int cublasXtGetPinningMemMode(cublasXtContext handle, @Cast("cublasXtPinnedMemMode_t*") IntPointer mode);
public static native @Cast("cublasStatus_t") int cublasXtGetPinningMemMode(cublasXtContext handle, @Cast("cublasXtPinnedMemMode_t*") IntBuffer mode);
public static native @Cast("cublasStatus_t") int cublasXtGetPinningMemMode(cublasXtContext handle, @Cast("cublasXtPinnedMemMode_t*") int[] mode);
public static native @Cast("cublasStatus_t") int cublasXtSetPinningMemMode(cublasXtContext handle, @Cast("cublasXtPinnedMemMode_t") int mode);         

/* This routines is to provide a CPU Blas routines, used for too small sizes or hybrid computation */
/** enum cublasXtOpType_t */
public static final int
    CUBLASXT_FLOAT = 0,
    CUBLASXT_DOUBLE = 1,  
    CUBLASXT_COMPLEX = 2,
    CUBLASXT_DOUBLECOMPLEX = 3;

/** enum cublasXtBlasOp_t */
public static final int
    CUBLASXT_GEMM = 0,
    CUBLASXT_SYRK = 1,  
    CUBLASXT_HERK = 2,
    CUBLASXT_SYMM = 3,
    CUBLASXT_HEMM = 4,
    CUBLASXT_TRSM = 5,
    CUBLASXT_SYR2K = 6,  
    CUBLASXT_HER2K = 7,        
        
    CUBLASXT_SPMM = 8,
    CUBLASXT_SYRKX = 9,
    CUBLASXT_HERKX = 10,  
    CUBLASXT_TRMM = 11,  
    CUBLASXT_ROUTINE_MAX = 12;


/* Currently only 32-bit integer BLAS routines are supported */
public static native @Cast("cublasStatus_t") int cublasXtSetCpuRoutine(cublasXtContext handle, @Cast("cublasXtBlasOp_t") int blasOp, @Cast("cublasXtOpType_t") int type, Pointer blasFunctor );

/* Specified the percentage of work that should done by the CPU, default is 0 (no work) */
public static native @Cast("cublasStatus_t") int cublasXtSetCpuRatio(cublasXtContext handle, @Cast("cublasXtBlasOp_t") int blasOp, @Cast("cublasXtOpType_t") int type, float ratio );


/* GEMM */
public static native @Cast("cublasStatus_t") int cublasXtSgemm(cublasXtContext handle, 
                                            @Cast("cublasOperation_t") int transa,
                                            @Cast("cublasOperation_t") int transb, 
                                            @Cast("size_t") long m,
                                            @Cast("size_t") long n,
                                            @Cast("size_t") long k,
                                            @Const FloatPointer alpha,
                                            @Const FloatPointer A, 
                                            @Cast("size_t") long lda,
                                            @Const FloatPointer B,
                                            @Cast("size_t") long ldb, 
                                            @Const FloatPointer beta,
                                            FloatPointer C,
                                            @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtSgemm(cublasXtContext handle, 
                                            @Cast("cublasOperation_t") int transa,
                                            @Cast("cublasOperation_t") int transb, 
                                            @Cast("size_t") long m,
                                            @Cast("size_t") long n,
                                            @Cast("size_t") long k,
                                            @Const FloatBuffer alpha,
                                            @Const FloatBuffer A, 
                                            @Cast("size_t") long lda,
                                            @Const FloatBuffer B,
                                            @Cast("size_t") long ldb, 
                                            @Const FloatBuffer beta,
                                            FloatBuffer C,
                                            @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtSgemm(cublasXtContext handle, 
                                            @Cast("cublasOperation_t") int transa,
                                            @Cast("cublasOperation_t") int transb, 
                                            @Cast("size_t") long m,
                                            @Cast("size_t") long n,
                                            @Cast("size_t") long k,
                                            @Const float[] alpha,
                                            @Const float[] A, 
                                            @Cast("size_t") long lda,
                                            @Const float[] B,
                                            @Cast("size_t") long ldb, 
                                            @Const float[] beta,
                                            float[] C,
                                            @Cast("size_t") long ldc);
                                            
public static native @Cast("cublasStatus_t") int cublasXtDgemm(cublasXtContext handle, 
                                            @Cast("cublasOperation_t") int transa,
                                            @Cast("cublasOperation_t") int transb, 
                                            @Cast("size_t") long m,
                                            @Cast("size_t") long n,
                                            @Cast("size_t") long k,
                                            @Const DoublePointer alpha,
                                            @Const DoublePointer A, 
                                            @Cast("size_t") long lda,
                                            @Const DoublePointer B,
                                            @Cast("size_t") long ldb, 
                                            @Const DoublePointer beta,
                                            DoublePointer C,
                                            @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtDgemm(cublasXtContext handle, 
                                            @Cast("cublasOperation_t") int transa,
                                            @Cast("cublasOperation_t") int transb, 
                                            @Cast("size_t") long m,
                                            @Cast("size_t") long n,
                                            @Cast("size_t") long k,
                                            @Const DoubleBuffer alpha,
                                            @Const DoubleBuffer A, 
                                            @Cast("size_t") long lda,
                                            @Const DoubleBuffer B,
                                            @Cast("size_t") long ldb, 
                                            @Const DoubleBuffer beta,
                                            DoubleBuffer C,
                                            @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtDgemm(cublasXtContext handle, 
                                            @Cast("cublasOperation_t") int transa,
                                            @Cast("cublasOperation_t") int transb, 
                                            @Cast("size_t") long m,
                                            @Cast("size_t") long n,
                                            @Cast("size_t") long k,
                                            @Const double[] alpha,
                                            @Const double[] A, 
                                            @Cast("size_t") long lda,
                                            @Const double[] B,
                                            @Cast("size_t") long ldb, 
                                            @Const double[] beta,
                                            double[] C,
                                            @Cast("size_t") long ldc);
                                            
public static native @Cast("cublasStatus_t") int cublasXtCgemm(cublasXtContext handle, 
                                            @Cast("cublasOperation_t") int transa,
                                            @Cast("cublasOperation_t") int transb, 
                                            @Cast("size_t") long m,
                                            @Cast("size_t") long n,
                                            @Cast("size_t") long k,
                                            @Cast("const cuComplex*") float2 alpha,
                                            @Cast("const cuComplex*") float2 A, 
                                            @Cast("size_t") long lda,
                                            @Cast("const cuComplex*") float2 B,
                                            @Cast("size_t") long ldb, 
                                            @Cast("const cuComplex*") float2 beta,
                                            @Cast("cuComplex*") float2 C,
                                            @Cast("size_t") long ldc);
                                            
public static native @Cast("cublasStatus_t") int cublasXtZgemm(cublasXtContext handle, 
                                            @Cast("cublasOperation_t") int transa,
                                            @Cast("cublasOperation_t") int transb, 
                                            @Cast("size_t") long m,
                                            @Cast("size_t") long n,
                                            @Cast("size_t") long k,
                                            @Cast("const cuDoubleComplex*") double2 alpha,
                                            @Cast("const cuDoubleComplex*") double2 A, 
                                            @Cast("size_t") long lda,
                                            @Cast("const cuDoubleComplex*") double2 B,
                                            @Cast("size_t") long ldb, 
                                            @Cast("const cuDoubleComplex*") double2 beta,
                                            @Cast("cuDoubleComplex*") double2 C,
                                            @Cast("size_t") long ldc);                                                                                             
/* ------------------------------------------------------- */                                 
/* SYRK */
public static native @Cast("cublasStatus_t") int cublasXtSsyrk( cublasXtContext handle, 
                                             @Cast("cublasFillMode_t") int uplo, 
                                             @Cast("cublasOperation_t") int trans, 
                                             @Cast("size_t") long n,
                                             @Cast("size_t") long k,
                                             @Const FloatPointer alpha,
                                             @Const FloatPointer A,
                                             @Cast("size_t") long lda,
                                             @Const FloatPointer beta,
                                             FloatPointer C,
                                             @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtSsyrk( cublasXtContext handle, 
                                             @Cast("cublasFillMode_t") int uplo, 
                                             @Cast("cublasOperation_t") int trans, 
                                             @Cast("size_t") long n,
                                             @Cast("size_t") long k,
                                             @Const FloatBuffer alpha,
                                             @Const FloatBuffer A,
                                             @Cast("size_t") long lda,
                                             @Const FloatBuffer beta,
                                             FloatBuffer C,
                                             @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtSsyrk( cublasXtContext handle, 
                                             @Cast("cublasFillMode_t") int uplo, 
                                             @Cast("cublasOperation_t") int trans, 
                                             @Cast("size_t") long n,
                                             @Cast("size_t") long k,
                                             @Const float[] alpha,
                                             @Const float[] A,
                                             @Cast("size_t") long lda,
                                             @Const float[] beta,
                                             float[] C,
                                             @Cast("size_t") long ldc );
                                             
public static native @Cast("cublasStatus_t") int cublasXtDsyrk( cublasXtContext handle, 
                                             @Cast("cublasFillMode_t") int uplo, 
                                             @Cast("cublasOperation_t") int trans, 
                                             @Cast("size_t") long n,
                                             @Cast("size_t") long k,
                                             @Const DoublePointer alpha,
                                             @Const DoublePointer A,
                                             @Cast("size_t") long lda,
                                             @Const DoublePointer beta,
                                             DoublePointer C,
                                             @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtDsyrk( cublasXtContext handle, 
                                             @Cast("cublasFillMode_t") int uplo, 
                                             @Cast("cublasOperation_t") int trans, 
                                             @Cast("size_t") long n,
                                             @Cast("size_t") long k,
                                             @Const DoubleBuffer alpha,
                                             @Const DoubleBuffer A,
                                             @Cast("size_t") long lda,
                                             @Const DoubleBuffer beta,
                                             DoubleBuffer C,
                                             @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtDsyrk( cublasXtContext handle, 
                                             @Cast("cublasFillMode_t") int uplo, 
                                             @Cast("cublasOperation_t") int trans, 
                                             @Cast("size_t") long n,
                                             @Cast("size_t") long k,
                                             @Const double[] alpha,
                                             @Const double[] A,
                                             @Cast("size_t") long lda,
                                             @Const double[] beta,
                                             double[] C,
                                             @Cast("size_t") long ldc );
                                             
public static native @Cast("cublasStatus_t") int cublasXtCsyrk( cublasXtContext handle, 
                                             @Cast("cublasFillMode_t") int uplo, 
                                             @Cast("cublasOperation_t") int trans, 
                                             @Cast("size_t") long n,
                                             @Cast("size_t") long k,
                                             @Cast("const cuComplex*") float2 alpha,
                                             @Cast("const cuComplex*") float2 A,
                                             @Cast("size_t") long lda,
                                             @Cast("const cuComplex*") float2 beta,
                                             @Cast("cuComplex*") float2 C,
                                             @Cast("size_t") long ldc );
                                             
public static native @Cast("cublasStatus_t") int cublasXtZsyrk( cublasXtContext handle, 
                                             @Cast("cublasFillMode_t") int uplo, 
                                             @Cast("cublasOperation_t") int trans, 
                                             @Cast("size_t") long n,
                                             @Cast("size_t") long k,
                                             @Cast("const cuDoubleComplex*") double2 alpha,
                                             @Cast("const cuDoubleComplex*") double2 A,
                                             @Cast("size_t") long lda,
                                             @Cast("const cuDoubleComplex*") double2 beta,
                                             @Cast("cuDoubleComplex*") double2 C,
                                             @Cast("size_t") long ldc );
/* -------------------------------------------------------------------- */                                  
/* HERK */                                
public static native @Cast("cublasStatus_t") int cublasXtCherk( cublasXtContext handle, 
                                             @Cast("cublasFillMode_t") int uplo, 
                                             @Cast("cublasOperation_t") int trans, 
                                             @Cast("size_t") long n,
                                             @Cast("size_t") long k,
                                             @Const FloatPointer alpha,
                                             @Cast("const cuComplex*") float2 A,
                                             @Cast("size_t") long lda,
                                             @Const FloatPointer beta,
                                             @Cast("cuComplex*") float2 C,
                                             @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtCherk( cublasXtContext handle, 
                                             @Cast("cublasFillMode_t") int uplo, 
                                             @Cast("cublasOperation_t") int trans, 
                                             @Cast("size_t") long n,
                                             @Cast("size_t") long k,
                                             @Const FloatBuffer alpha,
                                             @Cast("const cuComplex*") float2 A,
                                             @Cast("size_t") long lda,
                                             @Const FloatBuffer beta,
                                             @Cast("cuComplex*") float2 C,
                                             @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtCherk( cublasXtContext handle, 
                                             @Cast("cublasFillMode_t") int uplo, 
                                             @Cast("cublasOperation_t") int trans, 
                                             @Cast("size_t") long n,
                                             @Cast("size_t") long k,
                                             @Const float[] alpha,
                                             @Cast("const cuComplex*") float2 A,
                                             @Cast("size_t") long lda,
                                             @Const float[] beta,
                                             @Cast("cuComplex*") float2 C,
                                             @Cast("size_t") long ldc );
                                             
public static native @Cast("cublasStatus_t") int cublasXtZherk( cublasXtContext handle, 
                                             @Cast("cublasFillMode_t") int uplo, 
                                             @Cast("cublasOperation_t") int trans, 
                                             @Cast("size_t") long n,
                                             @Cast("size_t") long k,
                                             @Const DoublePointer alpha,
                                             @Cast("const cuDoubleComplex*") double2 A,
                                             @Cast("size_t") long lda,
                                             @Const DoublePointer beta,
                                             @Cast("cuDoubleComplex*") double2 C,
                                             @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtZherk( cublasXtContext handle, 
                                             @Cast("cublasFillMode_t") int uplo, 
                                             @Cast("cublasOperation_t") int trans, 
                                             @Cast("size_t") long n,
                                             @Cast("size_t") long k,
                                             @Const DoubleBuffer alpha,
                                             @Cast("const cuDoubleComplex*") double2 A,
                                             @Cast("size_t") long lda,
                                             @Const DoubleBuffer beta,
                                             @Cast("cuDoubleComplex*") double2 C,
                                             @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtZherk( cublasXtContext handle, 
                                             @Cast("cublasFillMode_t") int uplo, 
                                             @Cast("cublasOperation_t") int trans, 
                                             @Cast("size_t") long n,
                                             @Cast("size_t") long k,
                                             @Const double[] alpha,
                                             @Cast("const cuDoubleComplex*") double2 A,
                                             @Cast("size_t") long lda,
                                             @Const double[] beta,
                                             @Cast("cuDoubleComplex*") double2 C,
                                             @Cast("size_t") long ldc );                                                           
/* -------------------------------------------------------------------- */                                              
/* SYR2K */                                     
public static native @Cast("cublasStatus_t") int cublasXtSsyr2k( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Const FloatPointer alpha,   
                                              @Const FloatPointer A,
                                              @Cast("size_t") long lda,
                                              @Const FloatPointer B,
                                              @Cast("size_t") long ldb,
                                              @Const FloatPointer beta,   
                                              FloatPointer C,
                                              @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtSsyr2k( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Const FloatBuffer alpha,   
                                              @Const FloatBuffer A,
                                              @Cast("size_t") long lda,
                                              @Const FloatBuffer B,
                                              @Cast("size_t") long ldb,
                                              @Const FloatBuffer beta,   
                                              FloatBuffer C,
                                              @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtSsyr2k( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Const float[] alpha,   
                                              @Const float[] A,
                                              @Cast("size_t") long lda,
                                              @Const float[] B,
                                              @Cast("size_t") long ldb,
                                              @Const float[] beta,   
                                              float[] C,
                                              @Cast("size_t") long ldc);  
            
public static native @Cast("cublasStatus_t") int cublasXtDsyr2k( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Const DoublePointer alpha,    
                                              @Const DoublePointer A,
                                              @Cast("size_t") long lda,
                                              @Const DoublePointer B,
                                              @Cast("size_t") long ldb,
                                              @Const DoublePointer beta,   
                                              DoublePointer C,
                                              @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtDsyr2k( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Const DoubleBuffer alpha,    
                                              @Const DoubleBuffer A,
                                              @Cast("size_t") long lda,
                                              @Const DoubleBuffer B,
                                              @Cast("size_t") long ldb,
                                              @Const DoubleBuffer beta,   
                                              DoubleBuffer C,
                                              @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtDsyr2k( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Const double[] alpha,    
                                              @Const double[] A,
                                              @Cast("size_t") long lda,
                                              @Const double[] B,
                                              @Cast("size_t") long ldb,
                                              @Const double[] beta,   
                                              double[] C,
                                              @Cast("size_t") long ldc);
            
public static native @Cast("cublasStatus_t") int cublasXtCsyr2k( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Cast("const cuComplex*") float2 alpha,   
                                              @Cast("const cuComplex*") float2 A,
                                              @Cast("size_t") long lda,
                                              @Cast("const cuComplex*") float2 B,
                                              @Cast("size_t") long ldb,
                                              @Cast("const cuComplex*") float2 beta,   
                                              @Cast("cuComplex*") float2 C,
                                              @Cast("size_t") long ldc);
            
public static native @Cast("cublasStatus_t") int cublasXtZsyr2k( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Cast("const cuDoubleComplex*") double2 alpha,
                                              @Cast("const cuDoubleComplex*") double2 A,
                                              @Cast("size_t") long lda,
                                              @Cast("const cuDoubleComplex*") double2 B,
                                              @Cast("size_t") long ldb,
                                              @Cast("const cuDoubleComplex*") double2 beta,   
                                              @Cast("cuDoubleComplex*") double2 C,
                                              @Cast("size_t") long ldc);  
/* -------------------------------------------------------------------- */                                                  
/* HERKX : variant extension of HERK */                                       
public static native @Cast("cublasStatus_t") int cublasXtCherkx( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Cast("const cuComplex*") float2 alpha,   
                                              @Cast("const cuComplex*") float2 A,
                                              @Cast("size_t") long lda,
                                              @Cast("const cuComplex*") float2 B,
                                              @Cast("size_t") long ldb,
                                              @Const FloatPointer beta,     
                                              @Cast("cuComplex*") float2 C,
                                              @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtCherkx( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Cast("const cuComplex*") float2 alpha,   
                                              @Cast("const cuComplex*") float2 A,
                                              @Cast("size_t") long lda,
                                              @Cast("const cuComplex*") float2 B,
                                              @Cast("size_t") long ldb,
                                              @Const FloatBuffer beta,     
                                              @Cast("cuComplex*") float2 C,
                                              @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtCherkx( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Cast("const cuComplex*") float2 alpha,   
                                              @Cast("const cuComplex*") float2 A,
                                              @Cast("size_t") long lda,
                                              @Cast("const cuComplex*") float2 B,
                                              @Cast("size_t") long ldb,
                                              @Const float[] beta,     
                                              @Cast("cuComplex*") float2 C,
                                              @Cast("size_t") long ldc);  
            
public static native @Cast("cublasStatus_t") int cublasXtZherkx( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans, 
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Cast("const cuDoubleComplex*") double2 alpha,  
                                              @Cast("const cuDoubleComplex*") double2 A, 
                                              @Cast("size_t") long lda,
                                              @Cast("const cuDoubleComplex*") double2 B,
                                              @Cast("size_t") long ldb,
                                              @Const DoublePointer beta,   
                                              @Cast("cuDoubleComplex*") double2 C,
                                              @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtZherkx( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans, 
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Cast("const cuDoubleComplex*") double2 alpha,  
                                              @Cast("const cuDoubleComplex*") double2 A, 
                                              @Cast("size_t") long lda,
                                              @Cast("const cuDoubleComplex*") double2 B,
                                              @Cast("size_t") long ldb,
                                              @Const DoubleBuffer beta,   
                                              @Cast("cuDoubleComplex*") double2 C,
                                              @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtZherkx( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans, 
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Cast("const cuDoubleComplex*") double2 alpha,  
                                              @Cast("const cuDoubleComplex*") double2 A, 
                                              @Cast("size_t") long lda,
                                              @Cast("const cuDoubleComplex*") double2 B,
                                              @Cast("size_t") long ldb,
                                              @Const double[] beta,   
                                              @Cast("cuDoubleComplex*") double2 C,
                                              @Cast("size_t") long ldc);       
                         
/* -------------------------------------------------------------------- */                                
/* TRSM */                                                                         
public static native @Cast("cublasStatus_t") int cublasXtStrsm( cublasXtContext handle, 
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("cublasOperation_t") int trans,
                                             @Cast("cublasDiagType_t") int diag,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const FloatPointer alpha,
                                             @Const FloatPointer A,
                                             @Cast("size_t") long lda,
                                             FloatPointer B,
                                             @Cast("size_t") long ldb);
public static native @Cast("cublasStatus_t") int cublasXtStrsm( cublasXtContext handle, 
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("cublasOperation_t") int trans,
                                             @Cast("cublasDiagType_t") int diag,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const FloatBuffer alpha,
                                             @Const FloatBuffer A,
                                             @Cast("size_t") long lda,
                                             FloatBuffer B,
                                             @Cast("size_t") long ldb);
public static native @Cast("cublasStatus_t") int cublasXtStrsm( cublasXtContext handle, 
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("cublasOperation_t") int trans,
                                             @Cast("cublasDiagType_t") int diag,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const float[] alpha,
                                             @Const float[] A,
                                             @Cast("size_t") long lda,
                                             float[] B,
                                             @Cast("size_t") long ldb);
    

public static native @Cast("cublasStatus_t") int cublasXtDtrsm( cublasXtContext handle,
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("cublasOperation_t") int trans,
                                             @Cast("cublasDiagType_t") int diag,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const DoublePointer alpha, 
                                             @Const DoublePointer A, 
                                             @Cast("size_t") long lda, 
                                             DoublePointer B,
                                             @Cast("size_t") long ldb);
public static native @Cast("cublasStatus_t") int cublasXtDtrsm( cublasXtContext handle,
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("cublasOperation_t") int trans,
                                             @Cast("cublasDiagType_t") int diag,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const DoubleBuffer alpha, 
                                             @Const DoubleBuffer A, 
                                             @Cast("size_t") long lda, 
                                             DoubleBuffer B,
                                             @Cast("size_t") long ldb);
public static native @Cast("cublasStatus_t") int cublasXtDtrsm( cublasXtContext handle,
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("cublasOperation_t") int trans,
                                             @Cast("cublasDiagType_t") int diag,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const double[] alpha, 
                                             @Const double[] A, 
                                             @Cast("size_t") long lda, 
                                             double[] B,
                                             @Cast("size_t") long ldb);
    
public static native @Cast("cublasStatus_t") int cublasXtCtrsm( cublasXtContext handle,
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("cublasOperation_t") int trans,
                                             @Cast("cublasDiagType_t") int diag,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Cast("const cuComplex*") float2 alpha, 
                                             @Cast("const cuComplex*") float2 A,
                                             @Cast("size_t") long lda,
                                             @Cast("cuComplex*") float2 B,
                                             @Cast("size_t") long ldb);
                  
public static native @Cast("cublasStatus_t") int cublasXtZtrsm( cublasXtContext handle, 
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("cublasOperation_t") int trans,
                                             @Cast("cublasDiagType_t") int diag,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Cast("const cuDoubleComplex*") double2 alpha, 
                                             @Cast("const cuDoubleComplex*") double2 A,                                        
                                             @Cast("size_t") long lda,
                                             @Cast("cuDoubleComplex*") double2 B,
                                             @Cast("size_t") long ldb);       
/* -------------------------------------------------------------------- */                                
/* SYMM : Symmetric Multiply Matrix*/                                                                         
public static native @Cast("cublasStatus_t") int cublasXtSsymm( cublasXtContext handle, 
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const FloatPointer alpha,
                                             @Const FloatPointer A,
                                             @Cast("size_t") long lda,
                                             @Const FloatPointer B,
                                             @Cast("size_t") long ldb,
                                             @Const FloatPointer beta,
                                             FloatPointer C,
                                             @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtSsymm( cublasXtContext handle, 
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const FloatBuffer alpha,
                                             @Const FloatBuffer A,
                                             @Cast("size_t") long lda,
                                             @Const FloatBuffer B,
                                             @Cast("size_t") long ldb,
                                             @Const FloatBuffer beta,
                                             FloatBuffer C,
                                             @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtSsymm( cublasXtContext handle, 
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const float[] alpha,
                                             @Const float[] A,
                                             @Cast("size_t") long lda,
                                             @Const float[] B,
                                             @Cast("size_t") long ldb,
                                             @Const float[] beta,
                                             float[] C,
                                             @Cast("size_t") long ldc );    

public static native @Cast("cublasStatus_t") int cublasXtDsymm( cublasXtContext handle,
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const DoublePointer alpha, 
                                             @Const DoublePointer A, 
                                             @Cast("size_t") long lda,
                                             @Const DoublePointer B,
                                             @Cast("size_t") long ldb,
                                             @Const DoublePointer beta,
                                             DoublePointer C,
                                             @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtDsymm( cublasXtContext handle,
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const DoubleBuffer alpha, 
                                             @Const DoubleBuffer A, 
                                             @Cast("size_t") long lda,
                                             @Const DoubleBuffer B,
                                             @Cast("size_t") long ldb,
                                             @Const DoubleBuffer beta,
                                             DoubleBuffer C,
                                             @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtDsymm( cublasXtContext handle,
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const double[] alpha, 
                                             @Const double[] A, 
                                             @Cast("size_t") long lda,
                                             @Const double[] B,
                                             @Cast("size_t") long ldb,
                                             @Const double[] beta,
                                             double[] C,
                                             @Cast("size_t") long ldc );                                 
    
public static native @Cast("cublasStatus_t") int cublasXtCsymm( cublasXtContext handle,
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Cast("const cuComplex*") float2 alpha, 
                                             @Cast("const cuComplex*") float2 A,
                                             @Cast("size_t") long lda,
                                             @Cast("const cuComplex*") float2 B,
                                             @Cast("size_t") long ldb,
                                             @Cast("const cuComplex*") float2 beta,
                                             @Cast("cuComplex*") float2 C,
                                             @Cast("size_t") long ldc );                                 
                  
public static native @Cast("cublasStatus_t") int cublasXtZsymm( cublasXtContext handle, 
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Cast("const cuDoubleComplex*") double2 alpha, 
                                             @Cast("const cuDoubleComplex*") double2 A,  
                                             @Cast("size_t") long lda,                                      
                                             @Cast("const cuDoubleComplex*") double2 B,
                                             @Cast("size_t") long ldb,
                                             @Cast("const cuDoubleComplex*") double2 beta,
                                             @Cast("cuDoubleComplex*") double2 C,
                                             @Cast("size_t") long ldc );  
/* -------------------------------------------------------------------- */                                         
/* HEMM : Hermitian Matrix Multiply */                                       
 public static native @Cast("cublasStatus_t") int cublasXtChemm( cublasXtContext handle,
                                              @Cast("cublasSideMode_t") int side,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("size_t") long m,
                                              @Cast("size_t") long n,
                                              @Cast("const cuComplex*") float2 alpha, 
                                              @Cast("const cuComplex*") float2 A,
                                              @Cast("size_t") long lda,
                                              @Cast("const cuComplex*") float2 B,
                                              @Cast("size_t") long ldb,
                                              @Cast("const cuComplex*") float2 beta,
                                              @Cast("cuComplex*") float2 C,
                                              @Cast("size_t") long ldc );                                 
                  
public static native @Cast("cublasStatus_t") int cublasXtZhemm( cublasXtContext handle, 
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Cast("const cuDoubleComplex*") double2 alpha, 
                                             @Cast("const cuDoubleComplex*") double2 A,  
                                             @Cast("size_t") long lda,                                      
                                             @Cast("const cuDoubleComplex*") double2 B,
                                             @Cast("size_t") long ldb,
                                             @Cast("const cuDoubleComplex*") double2 beta,
                                             @Cast("cuDoubleComplex*") double2 C,
                                             @Cast("size_t") long ldc );  

/* -------------------------------------------------------------------- */ 
/* SYRKX : variant extension of SYRK  */                                     
public static native @Cast("cublasStatus_t") int cublasXtSsyrkx( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Const FloatPointer alpha,   
                                              @Const FloatPointer A,
                                              @Cast("size_t") long lda,
                                              @Const FloatPointer B,
                                              @Cast("size_t") long ldb,
                                              @Const FloatPointer beta,   
                                              FloatPointer C,
                                              @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtSsyrkx( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Const FloatBuffer alpha,   
                                              @Const FloatBuffer A,
                                              @Cast("size_t") long lda,
                                              @Const FloatBuffer B,
                                              @Cast("size_t") long ldb,
                                              @Const FloatBuffer beta,   
                                              FloatBuffer C,
                                              @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtSsyrkx( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Const float[] alpha,   
                                              @Const float[] A,
                                              @Cast("size_t") long lda,
                                              @Const float[] B,
                                              @Cast("size_t") long ldb,
                                              @Const float[] beta,   
                                              float[] C,
                                              @Cast("size_t") long ldc);  
            
public static native @Cast("cublasStatus_t") int cublasXtDsyrkx( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Const DoublePointer alpha,    
                                              @Const DoublePointer A,
                                              @Cast("size_t") long lda,
                                              @Const DoublePointer B,
                                              @Cast("size_t") long ldb,
                                              @Const DoublePointer beta,   
                                              DoublePointer C,
                                              @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtDsyrkx( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Const DoubleBuffer alpha,    
                                              @Const DoubleBuffer A,
                                              @Cast("size_t") long lda,
                                              @Const DoubleBuffer B,
                                              @Cast("size_t") long ldb,
                                              @Const DoubleBuffer beta,   
                                              DoubleBuffer C,
                                              @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtDsyrkx( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Const double[] alpha,    
                                              @Const double[] A,
                                              @Cast("size_t") long lda,
                                              @Const double[] B,
                                              @Cast("size_t") long ldb,
                                              @Const double[] beta,   
                                              double[] C,
                                              @Cast("size_t") long ldc);
            
public static native @Cast("cublasStatus_t") int cublasXtCsyrkx( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Cast("const cuComplex*") float2 alpha,   
                                              @Cast("const cuComplex*") float2 A,
                                              @Cast("size_t") long lda,
                                              @Cast("const cuComplex*") float2 B,
                                              @Cast("size_t") long ldb,
                                              @Cast("const cuComplex*") float2 beta,   
                                              @Cast("cuComplex*") float2 C,
                                              @Cast("size_t") long ldc);
            
public static native @Cast("cublasStatus_t") int cublasXtZsyrkx( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Cast("const cuDoubleComplex*") double2 alpha,
                                              @Cast("const cuDoubleComplex*") double2 A,
                                              @Cast("size_t") long lda,
                                              @Cast("const cuDoubleComplex*") double2 B,
                                              @Cast("size_t") long ldb,
                                              @Cast("const cuDoubleComplex*") double2 beta,   
                                              @Cast("cuDoubleComplex*") double2 C,
                                              @Cast("size_t") long ldc);  
/* -------------------------------------------------------------------- */                                          
/* HER2K : variant extension of HERK  */                                    
public static native @Cast("cublasStatus_t") int cublasXtCher2k( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Cast("const cuComplex*") float2 alpha,   
                                              @Cast("const cuComplex*") float2 A,
                                              @Cast("size_t") long lda,
                                              @Cast("const cuComplex*") float2 B,
                                              @Cast("size_t") long ldb,
                                              @Const FloatPointer beta,     
                                              @Cast("cuComplex*") float2 C,
                                              @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtCher2k( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Cast("const cuComplex*") float2 alpha,   
                                              @Cast("const cuComplex*") float2 A,
                                              @Cast("size_t") long lda,
                                              @Cast("const cuComplex*") float2 B,
                                              @Cast("size_t") long ldb,
                                              @Const FloatBuffer beta,     
                                              @Cast("cuComplex*") float2 C,
                                              @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtCher2k( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans,
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Cast("const cuComplex*") float2 alpha,   
                                              @Cast("const cuComplex*") float2 A,
                                              @Cast("size_t") long lda,
                                              @Cast("const cuComplex*") float2 B,
                                              @Cast("size_t") long ldb,
                                              @Const float[] beta,     
                                              @Cast("cuComplex*") float2 C,
                                              @Cast("size_t") long ldc);  
            
public static native @Cast("cublasStatus_t") int cublasXtZher2k( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans, 
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Cast("const cuDoubleComplex*") double2 alpha,  
                                              @Cast("const cuDoubleComplex*") double2 A, 
                                              @Cast("size_t") long lda,
                                              @Cast("const cuDoubleComplex*") double2 B,
                                              @Cast("size_t") long ldb,
                                              @Const DoublePointer beta,   
                                              @Cast("cuDoubleComplex*") double2 C,
                                              @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtZher2k( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans, 
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Cast("const cuDoubleComplex*") double2 alpha,  
                                              @Cast("const cuDoubleComplex*") double2 A, 
                                              @Cast("size_t") long lda,
                                              @Cast("const cuDoubleComplex*") double2 B,
                                              @Cast("size_t") long ldb,
                                              @Const DoubleBuffer beta,   
                                              @Cast("cuDoubleComplex*") double2 C,
                                              @Cast("size_t") long ldc);
public static native @Cast("cublasStatus_t") int cublasXtZher2k( cublasXtContext handle,
                                              @Cast("cublasFillMode_t") int uplo,
                                              @Cast("cublasOperation_t") int trans, 
                                              @Cast("size_t") long n,
                                              @Cast("size_t") long k,
                                              @Cast("const cuDoubleComplex*") double2 alpha,  
                                              @Cast("const cuDoubleComplex*") double2 A, 
                                              @Cast("size_t") long lda,
                                              @Cast("const cuDoubleComplex*") double2 B,
                                              @Cast("size_t") long ldb,
                                              @Const double[] beta,   
                                              @Cast("cuDoubleComplex*") double2 C,
                                              @Cast("size_t") long ldc);       
                         
                                
/* -------------------------------------------------------------------- */                                              
/* SPMM : Symmetric Packed Multiply Matrix*/                                                                         
public static native @Cast("cublasStatus_t") int cublasXtSspmm( cublasXtContext handle, 
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const FloatPointer alpha,
                                             @Const FloatPointer AP,
                                             @Const FloatPointer B,
                                             @Cast("size_t") long ldb,
                                             @Const FloatPointer beta,
                                             FloatPointer C,
                                             @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtSspmm( cublasXtContext handle, 
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const FloatBuffer alpha,
                                             @Const FloatBuffer AP,
                                             @Const FloatBuffer B,
                                             @Cast("size_t") long ldb,
                                             @Const FloatBuffer beta,
                                             FloatBuffer C,
                                             @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtSspmm( cublasXtContext handle, 
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const float[] alpha,
                                             @Const float[] AP,
                                             @Const float[] B,
                                             @Cast("size_t") long ldb,
                                             @Const float[] beta,
                                             float[] C,
                                             @Cast("size_t") long ldc );    

public static native @Cast("cublasStatus_t") int cublasXtDspmm( cublasXtContext handle,
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const DoublePointer alpha, 
                                             @Const DoublePointer AP, 
                                             @Const DoublePointer B,
                                             @Cast("size_t") long ldb,
                                             @Const DoublePointer beta,
                                             DoublePointer C,
                                             @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtDspmm( cublasXtContext handle,
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const DoubleBuffer alpha, 
                                             @Const DoubleBuffer AP, 
                                             @Const DoubleBuffer B,
                                             @Cast("size_t") long ldb,
                                             @Const DoubleBuffer beta,
                                             DoubleBuffer C,
                                             @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtDspmm( cublasXtContext handle,
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Const double[] alpha, 
                                             @Const double[] AP, 
                                             @Const double[] B,
                                             @Cast("size_t") long ldb,
                                             @Const double[] beta,
                                             double[] C,
                                             @Cast("size_t") long ldc );                                 
    
public static native @Cast("cublasStatus_t") int cublasXtCspmm( cublasXtContext handle,
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Cast("const cuComplex*") float2 alpha, 
                                             @Cast("const cuComplex*") float2 AP,
                                             @Cast("const cuComplex*") float2 B,
                                             @Cast("size_t") long ldb,
                                             @Cast("const cuComplex*") float2 beta,
                                             @Cast("cuComplex*") float2 C,
                                             @Cast("size_t") long ldc );                                 
                  
public static native @Cast("cublasStatus_t") int cublasXtZspmm( cublasXtContext handle, 
                                             @Cast("cublasSideMode_t") int side,
                                             @Cast("cublasFillMode_t") int uplo,
                                             @Cast("size_t") long m,
                                             @Cast("size_t") long n,
                                             @Cast("const cuDoubleComplex*") double2 alpha, 
                                             @Cast("const cuDoubleComplex*") double2 AP,                                        
                                             @Cast("const cuDoubleComplex*") double2 B,
                                             @Cast("size_t") long ldb,
                                             @Cast("const cuDoubleComplex*") double2 beta,
                                             @Cast("cuDoubleComplex*") double2 C,
                                             @Cast("size_t") long ldc );                                                                                                       
                                             
/* -------------------------------------------------------------------- */   
/* TRMM */                                                                                    
public static native @Cast("cublasStatus_t") int cublasXtStrmm( cublasXtContext handle,
                                           @Cast("cublasSideMode_t") int side,
                                           @Cast("cublasFillMode_t") int uplo, 
                                           @Cast("cublasOperation_t") int trans,
                                           @Cast("cublasDiagType_t") int diag,                               
                                           @Cast("size_t") long m,
                                           @Cast("size_t") long n,
                                           @Const FloatPointer alpha, 
                                           @Const FloatPointer A,
                                           @Cast("size_t") long lda,
                                           @Const FloatPointer B,
                                           @Cast("size_t") long ldb,
                                           FloatPointer C,
                                           @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtStrmm( cublasXtContext handle,
                                           @Cast("cublasSideMode_t") int side,
                                           @Cast("cublasFillMode_t") int uplo, 
                                           @Cast("cublasOperation_t") int trans,
                                           @Cast("cublasDiagType_t") int diag,                               
                                           @Cast("size_t") long m,
                                           @Cast("size_t") long n,
                                           @Const FloatBuffer alpha, 
                                           @Const FloatBuffer A,
                                           @Cast("size_t") long lda,
                                           @Const FloatBuffer B,
                                           @Cast("size_t") long ldb,
                                           FloatBuffer C,
                                           @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtStrmm( cublasXtContext handle,
                                           @Cast("cublasSideMode_t") int side,
                                           @Cast("cublasFillMode_t") int uplo, 
                                           @Cast("cublasOperation_t") int trans,
                                           @Cast("cublasDiagType_t") int diag,                               
                                           @Cast("size_t") long m,
                                           @Cast("size_t") long n,
                                           @Const float[] alpha, 
                                           @Const float[] A,
                                           @Cast("size_t") long lda,
                                           @Const float[] B,
                                           @Cast("size_t") long ldb,
                                           float[] C,
                                           @Cast("size_t") long ldc );

public static native @Cast("cublasStatus_t") int cublasXtDtrmm( cublasXtContext handle,
                                           @Cast("cublasSideMode_t") int side,
                                           @Cast("cublasFillMode_t") int uplo, 
                                           @Cast("cublasOperation_t") int trans,
                                           @Cast("cublasDiagType_t") int diag,                               
                                           @Cast("size_t") long m,
                                           @Cast("size_t") long n,
                                           @Const DoublePointer alpha, 
                                           @Const DoublePointer A,
                                           @Cast("size_t") long lda,
                                           @Const DoublePointer B,
                                           @Cast("size_t") long ldb,
                                           DoublePointer C,
                                           @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtDtrmm( cublasXtContext handle,
                                           @Cast("cublasSideMode_t") int side,
                                           @Cast("cublasFillMode_t") int uplo, 
                                           @Cast("cublasOperation_t") int trans,
                                           @Cast("cublasDiagType_t") int diag,                               
                                           @Cast("size_t") long m,
                                           @Cast("size_t") long n,
                                           @Const DoubleBuffer alpha, 
                                           @Const DoubleBuffer A,
                                           @Cast("size_t") long lda,
                                           @Const DoubleBuffer B,
                                           @Cast("size_t") long ldb,
                                           DoubleBuffer C,
                                           @Cast("size_t") long ldc );
public static native @Cast("cublasStatus_t") int cublasXtDtrmm( cublasXtContext handle,
                                           @Cast("cublasSideMode_t") int side,
                                           @Cast("cublasFillMode_t") int uplo, 
                                           @Cast("cublasOperation_t") int trans,
                                           @Cast("cublasDiagType_t") int diag,                               
                                           @Cast("size_t") long m,
                                           @Cast("size_t") long n,
                                           @Const double[] alpha, 
                                           @Const double[] A,
                                           @Cast("size_t") long lda,
                                           @Const double[] B,
                                           @Cast("size_t") long ldb,
                                           double[] C,
                                           @Cast("size_t") long ldc );

public static native @Cast("cublasStatus_t") int cublasXtCtrmm( cublasXtContext handle,
                                           @Cast("cublasSideMode_t") int side,
                                           @Cast("cublasFillMode_t") int uplo, 
                                           @Cast("cublasOperation_t") int trans,
                                           @Cast("cublasDiagType_t") int diag,                               
                                           @Cast("size_t") long m,
                                           @Cast("size_t") long n,
                                           @Cast("const cuComplex*") float2 alpha, 
                                           @Cast("const cuComplex*") float2 A,
                                           @Cast("size_t") long lda,
                                           @Cast("const cuComplex*") float2 B,
                                           @Cast("size_t") long ldb,
                                           @Cast("cuComplex*") float2 C,
                                           @Cast("size_t") long ldc );

public static native @Cast("cublasStatus_t") int cublasXtZtrmm( cublasXtContext handle,
                                           @Cast("cublasSideMode_t") int side,
                                           @Cast("cublasFillMode_t") int uplo, 
                                           @Cast("cublasOperation_t") int trans,
                                           @Cast("cublasDiagType_t") int diag,                               
                                           @Cast("size_t") long m,
                                           @Cast("size_t") long n,
                                           @Cast("const cuDoubleComplex*") double2 alpha, 
                                           @Cast("const cuDoubleComplex*") double2 A,
                                           @Cast("size_t") long lda,
                                           @Cast("const cuDoubleComplex*") double2 B,
                                           @Cast("size_t") long ldb,
                                           @Cast("cuDoubleComplex*") double2 C,
                                           @Cast("size_t") long ldc );
                                             
                                
// #if defined(__cplusplus)
// #endif /* __cplusplus */


// #endif /* !defined(CUBLAS_XT_H_) */


}
