
    !&hM                     :   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ ej$                  j'                  ej$                  j)                  e            Zej$                  j-                  ed      gZej$                  j-                  ed	      Zd
gZ e j4                         d        Z e j4                         d        Z e j4                         d        Zd Z G d de      Z d Z!d Z" G d de      Z# G d de      Z$y)    N)Path)_build)get_cache_manager)_allocation)	GPUTarget)	GPUDriverincludelibcudac            	         t        j                  d      } | r| gS t        j                  ddg      j	                         }|j                         D cg c]  }d|v s|j                         d    }}|D cg c]!  }t         j                  j                  |      # }}t        j                  d      }|r^|s\|j                  d      D cg c]B  }t         j                  j                  t         j                  j                  |d            sA|D }}d}|r|d	t        |      z  z  }|d
z  }n
|dz  }|dz  }t        d |D              sJ |       |S c c}w c c}w c c}w )NTRITON_LIBCUDA_PATHz/sbin/ldconfigz-plibcuda.so.1LD_LIBRARY_PATH:zlibcuda.so cannot found!
z!Possible files are located at %s.z:Please create a symlink of libcuda.so to any of the files.z<Please make sure GPU is set up and then run "/sbin/ldconfig"z- (requires sudo) to refresh the linker cache.c              3      K   | ]A  }t         j                  j                  t         j                  j                  |d              C yw)r   N)ospathexistsjoin).0r   s     s/var/www/pru.catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/triton/backends/nvidia/driver.py	<genexpr>zlibcuda_dirs.<locals>.<genexpr>)   s,     Sdrww~~bggll4@ASs   AA	)r   getenv
subprocesscheck_outputdecode
splitlinessplitr   dirnamer   r   strany)	env_libcuda_pathlibslinelocslocdirsenv_ld_library_pathdirmsgs	            r   libcuda_dirsr,      sO   yy!67 !!""$4d#;<CCED *.):UnPT>TDJJLUDU,01SBGGOOC 1D1))$564288=sPRPWPWP\P\]`bpPqArss
&C2SY>>KKMM>>SdSSXUXXSK V1 ts   	EE7&EAEEc                  $    t         gt               S N)libdevice_dirr,        r   library_dirsr2   -   s    +LN++r1   c                  ^    ddl m} m}m} dj	                   |         |       g |             S )Nr   machinesystemarchitecture,)platformr5   r6   r7   r   r4   s      r   platform_keyr:   2   s'    6688WY:<>:;;r1   c           	         t        j                  | t               z   j                  d            j	                         }t        |      }t        j                  d      j                  d      d   }|j                  | d|       }|t        j                         5 }t        j                  j                  |d      }t        |d      5 }|j!                  |        d d d        t#        |||t%               t&        t(              }	t        |	d      5 }|j+                  |j-                         | d| d	      }d d d        d d d        d
d l}
|
j0                  j3                  ||      }|
j0                  j5                  |      }|j6                  j9                  |       |S # 1 sw Y   xY w# 1 sw Y   uxY w# 1 sw Y   yxY w)Nzutf-8
EXT_SUFFIX.r   zmain.cwrbT)binaryr   )hashlibsha256r:   encode	hexdigestr   	sysconfigget_config_varr   get_filetempfileTemporaryDirectoryr   r   r   openwriter   r2   include_dir	librariesputreadimportlib.utilutilspec_from_file_locationmodule_from_specloaderexec_module)srcnamekeycacheext
cache_pathtmpdirsrc_pathfso	importlibspecmods                r   compile_module_from_srcrc   8   s~   
..#.66w?
@
J
J
LCc"E

"
"<
0
6
6s
;B
?C4&#0J((* 	Ofww||FH5Hh$ hYWBb$ O1"YYqvvxD63%YN
O	O >>11$
CD
..
)
)$
/CKKC J O O	O 	Os<   -F7	F4F7(F+7F7F(	$F7+F4	0F77G c                   $     e Zd Z fdZd Z xZS )	CudaUtilsc                 d    t        | d      st        t        |   |       | _        | j                  S )Ninstance)hasattrsuperre   __new__rg   )cls	__class__s    r   rj   zCudaUtils.__new__S   s*    sJ' C8=CL||r1   c                 Z   t        t        t        j                  j	                  t
        d            j                         d      }|j                  | _        |j                  | _        |j                  | _	        |j                  | _
        |j                  | _        |j                  | _        y )Nzdriver.c
cuda_utils)rc   r   r   r   r   r    	read_textload_binaryget_device_propertiescuOccupancyMaxActiveClustersset_printf_fifo_sizefill_1d_tma_descriptorfill_2d_tma_descriptor)selfrb   s     r   __init__zCudaUtils.__init__X   s~    %d277<<+L&M&W&W&Y[gh??%(%>%>",/,L,L)$'$<$<!&)&@&@#&)&@&@#r1   )__name__
__module____qualname__rj   rw   __classcell__rl   s   @r   re   re   Q   s    
Ar1   re   c                 ~    | d   dk(  ryi dddddd	d
ddddddddddddddddddddddddd|    S )Nr   *CUdeviceptri1int32_ti8int8_ti16int16_ti32i64int64_tu1uint32_tu8uint8_tu16uint16_tu32u64uint64_tfp16floatbf16fp32f32fp64double	nvTmaDescCUtensorMapr0   )tys    r   	ty_to_cppr   g   s    	!u|ih 	y 	y	
 	y 	j 	i 	z 	z 	z 	 	 	 	w 	  	]!" 	#
 
r1   c                 :   fdfdfddj                  |j                         D cg c]
  } |       c}      }d|z   }dj                  t        |j                                     }t        t	        t
        |j                  d                  }t        |      D ci c]  \  }}||
 }}}t        |      dkD  r)ddj                  d	 |j                         D              z   nd}dj                  d
 |j                         D              }g }	|j                         D ]W  \  }}|d   dk(  r|	j                  d| d       $|dk(  r|	j                  d|        >|dk7  sD|	j                  d|        Y t        t        |            }
d}|j                         D cg c]  \  }}|d   dk(  rd| d| d| d| d	 }}}|j                         D cg c]  \  }}|dk(  rd| d| d| d }}}|j                         D cg c]  \  }}|dk7  sd|  }
}}|
j                  d       dt        |      dkD  rd|z   nd ddj                  |
       d|j                  |j                         D cg c]  \  }} |       d | d! c}}       d"| d#| d$|j                  |       d|j                  |       d%t        |	      dkD  rddj                  |	      z   nd d&}|S c c}w c c}}w c c}}w c c}}w c c}}w c c}}w )'Nc                 ^    t        | t              rdj                  t        |             S | S )Nr8   )
isinstancetupler   map)sig_serialize_signatures    r   r   z+make_launcher.<locals>._serialize_signature   s)    c5!88C 4c:;;
r1   c                     t        | t              r!dj                  t        |             }d| dS | d   dk(  ry| dv ryt	        |       S )Nr8   []r   r~   z	PyObject*	constexprr   r   r   r   r   r   )r   val_extracted_types     r   r   z&make_launcher.<locals>._extracted_type   sT    b% ((334Cse1:a5C<++}r1   c                     t        | t              r!dj                  t        |             }d| dS | d   dk(  ry| dv rydd	d
dddddddddt	        |          S )N ()r   r~   Or   r^   dlbhiLBHIK)r   r   longr   r   r   r   r   r   r   r   r   )r   r   	format_ofs     r   r   z make_launcher.<locals>.format_of   s    b% ''#i,-Cse1:a5C<++
 B- 	r1   r   iiiKKpOOOOOr8   r   z, c              3   ,   K   | ]  \  }}d |   yw)z&_argNr0   r   r   r   s      r   r   z make_launcher.<locals>.<genexpr>   s      LB5 Ls   c              3   N   K   | ]  \  }}|d k7  st        |       d|   yw)r   z argN)r   r   s      r   r   z make_launcher.<locals>.<genexpr>   s,     h2VX\gVgYr]O4s3hs   %%r~   ptr_infoz.dev_ptrr   z*tma_ptrr   _argz
  zDevicePtrInfo ptr_infoz = getPointer(_argz); if (!ptr_infoz.valid) return NULL;zCUtensorMap* tma_ptrz = getTmaDesc(_argz); if (!tma_ptrz) return NULL;z&argz&global_scratchaB  
#include "cuda.h"
#include <stdbool.h>
#include <Python.h>
#include <dlfcn.h>

static inline void gpuAssert(CUresult code, const char *file, int line)
{
   if (code != CUDA_SUCCESS)
   {
      const char* prefix = "Triton Error [CUDA]: ";
      const char* str;
      cuGetErrorString(code, &str);
      char err[1024] = {0};
      strcat(err, prefix);
      strcat(err, str);
      PyGILState_STATE gil_state;
      gil_state = PyGILState_Ensure();
      PyErr_SetString(PyExc_RuntimeError, err);
      PyGILState_Release(gil_state);
   }
}

#define CUDA_CHECK(ans) { gpuAssert((ans), __FILE__, __LINE__); }

typedef CUresult (*cuLaunchKernelEx_t)(const CUlaunchConfig* config, CUfunction f, void** kernelParams, void** extra);

static cuLaunchKernelEx_t getLaunchKernelExHandle() {
  // Open the shared library
  void* handle = dlopen("libcuda.so.1", RTLD_LAZY);
  if (!handle) {
    PyErr_SetString(PyExc_RuntimeError, "Failed to open libcuda.so.1");
    return NULL;
  }
  // Clear any existing error
  dlerror();
  cuLaunchKernelEx_t cuLaunchKernelExHandle = (cuLaunchKernelEx_t)dlsym(handle, "cuLaunchKernelEx");
  // Check for errors
  const char *dlsym_error = dlerror();
  if (dlsym_error) {
    PyErr_SetString(PyExc_RuntimeError, "Failed to retrieve cuLaunchKernelEx from libcuda.so.1");
    return NULL;
  }
  return cuLaunchKernelExHandle;
}

static void _launch(int gridX, int gridY, int gridZ, int num_warps, int num_ctas, int launch_cooperative_grid, int clusterDimX, int clusterDimY, int clusterDimZ, int shared_memory, CUstream stream, CUfunction function, CUdeviceptr global_scratchz) {
  void *params[] = { aO   };
  if (gridX*gridY*gridZ > 0) {
    if ((num_ctas == 1) && (0 == launch_cooperative_grid)) {
      CUDA_CHECK(cuLaunchKernel(function, gridX, gridY, gridZ, 32*num_warps, 1, 1, shared_memory, stream, params, 0));
    } else if ((num_ctas == 1) && (0 != launch_cooperative_grid)) {
      CUlaunchAttribute launchAttr[1];
      CUlaunchAttribute coopAttr = { .id = CU_LAUNCH_ATTRIBUTE_COOPERATIVE, .value = 1};
      launchAttr[0] = coopAttr;

      CUlaunchConfig config;
      config.gridDimX = gridX;
      config.gridDimY = gridY;
      config.gridDimZ = gridZ;
      config.blockDimX = 32 * num_warps;
      config.blockDimY = 1;
      config.blockDimZ = 1;
      config.sharedMemBytes = shared_memory;
      config.hStream = stream;
      config.attrs = launchAttr;
      config.numAttrs = 1;

      static cuLaunchKernelEx_t cuLaunchKernelExHandle = NULL;
      if (cuLaunchKernelExHandle == NULL) {
        cuLaunchKernelExHandle = getLaunchKernelExHandle();
      }
      CUDA_CHECK(cuLaunchKernelExHandle(&config, function, params, 0));

    } else {
      CUlaunchAttribute launchAttr[3];
      launchAttr[0].id = CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION;
      launchAttr[0].value.clusterDim.x = clusterDimX;
      launchAttr[0].value.clusterDim.y = clusterDimY;
      launchAttr[0].value.clusterDim.z = clusterDimZ;
      launchAttr[1].id = CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE;
      launchAttr[1].value.clusterSchedulingPolicyPreference = CU_CLUSTER_SCHEDULING_POLICY_SPREAD;

      unsigned numAttrs = 2;
      if (0 != launch_cooperative_grid) {
        CUlaunchAttribute coopAttr = { .id = CU_LAUNCH_ATTRIBUTE_COOPERATIVE, .value = 1};
        launchAttr[2] = coopAttr;
        numAttrs = 3;
      }

      CUlaunchConfig config;
      config.gridDimX = gridX * clusterDimX;
      config.gridDimY = gridY * clusterDimY;
      config.gridDimZ = gridZ * clusterDimZ;
      config.blockDimX = 32 * num_warps;
      config.blockDimY = 1;
      config.blockDimZ = 1;
      config.sharedMemBytes = shared_memory;
      config.hStream = stream;
      config.attrs = launchAttr;
      config.numAttrs = numAttrs;
      static cuLaunchKernelEx_t cuLaunchKernelExHandle = NULL;
      if (cuLaunchKernelExHandle == NULL) {
        cuLaunchKernelExHandle = getLaunchKernelExHandle();
      }
      CUDA_CHECK(cuLaunchKernelExHandle(&config, function, params, 0));
    }
  }
}

typedef struct _DevicePtrInfo {
    CUdeviceptr dev_ptr;
    bool valid;
} DevicePtrInfo;

static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {
  DevicePtrInfo ptr_info;
  ptr_info.dev_ptr = 0;
  ptr_info.valid = true;
  if (PyLong_Check(obj)) {
    ptr_info.dev_ptr = PyLong_AsUnsignedLongLong(obj);
    return ptr_info;
  }
  if (obj == Py_None) {
    // valid nullptr
    return ptr_info;
  }
  PyObject *ptr = PyObject_GetAttrString(obj, "data_ptr");
  if(ptr){
    PyObject *empty_tuple = PyTuple_New(0);
    PyObject *ret = PyObject_Call(ptr, empty_tuple, NULL);
    Py_DECREF(empty_tuple);
    Py_DECREF(ptr);
    if (!PyLong_Check(ret)) {
      PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
      ptr_info.valid = false;
      return ptr_info;
    }
    ptr_info.dev_ptr = PyLong_AsUnsignedLongLong(ret);
    if(!ptr_info.dev_ptr)
      return ptr_info;
    uint64_t dev_ptr;
    int status = cuPointerGetAttribute(&dev_ptr, CU_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
    if (status == CUDA_ERROR_INVALID_VALUE) {
        PyErr_Format(PyExc_ValueError,
                     "Pointer argument (at %d) cannot be accessed from Triton (cpu tensor?)", idx);
        ptr_info.valid = false;
    } else if (status != CUDA_SUCCESS) {
        CUDA_CHECK(status);  // Catch any other cuda API errors
        ptr_info.valid = false;
    }
    ptr_info.dev_ptr = dev_ptr;
    Py_DECREF(ret);  // Thanks ChatGPT!
    return ptr_info;
  }
  PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
  ptr_info.valid = false;
  return ptr_info;
}

static inline CUtensorMap* getTmaDesc(PyObject *obj) {
  if (sizeof(CUtensorMap*) != 8) {
    PyErr_SetString(PyExc_SystemError, "getTmaDesc() requires 64-bit compilation");
    return NULL;
  }

  PyObject *method_handle = PyObject_GetAttrString(obj, "tma_desc_cpu_ptr");
  if (!method_handle) {
    PyErr_SetString(PyExc_TypeError, "tma_desc_cpu_ptr() method does not exist");
    return NULL;
  }

  PyObject *empty_tuple = PyTuple_New(0);
  if (!empty_tuple) {
    Py_DECREF(method_handle);
    PyErr_SetString(PyExc_SystemError, "Internal Python error!");
    return NULL;
  }
  PyObject *method_ret = PyObject_Call(method_handle, empty_tuple, NULL);
  Py_DECREF(empty_tuple);
  Py_DECREF(method_handle);
  if (!method_ret) {
    PyErr_SetString(PyExc_SystemError, "Internal Python error!");
    return NULL;
  }

  if (!PyLong_Check(method_ret)) {
    PyErr_SetString(PyExc_TypeError, "tma_desc_cpu_ptr() must return 64-bit int");
    Py_DECREF(method_ret);
    return NULL;
  }

  uint64_t ptr_as_uint = PyLong_AsUnsignedLongLong(method_ret);
  Py_DECREF(method_ret);
  if (!ptr_as_uint) {
    PyErr_SetString(PyExc_ValueError, "received NULL ptr from tma_desc_cpu_ptr()");
    return NULL;
  }
  if (ptr_as_uint % 64 != 0) {
    PyErr_SetString(PyExc_ValueError, "tma_desc_cpu_ptr() must be 64-byte aligned");
    return NULL;
  }

  return (CUtensorMap*)(ptr_as_uint);
}

static void ensureCudaContext() {
  CUcontext pctx;
  CUDA_CHECK(cuCtxGetCurrent(&pctx));
  if (!pctx) {
    // Ensure device context.
    CUdevice device;
    CUDA_CHECK(cuDeviceGet(&device, 0));
    CUDA_CHECK(cuDevicePrimaryCtxRetain(&pctx, device));
    CUDA_CHECK(cuCtxSetCurrent(pctx));
  }
}

static PyObject* launch(PyObject* self, PyObject* args) {
  // ensure cuda context is valid before calling any CUDA APIs, e.g. before getPointer calls cuPointerGetAttributes
  ensureCudaContext();

  int gridX, gridY, gridZ;
  uint64_t _stream;
  uint64_t _function;
  int launch_cooperative_grid;
  PyObject *launch_enter_hook = NULL;
  PyObject *launch_exit_hook = NULL;
  PyObject *kernel_metadata = NULL;
  PyObject *launch_metadata = NULL;
  PyObject *global_scratch_obj = NULL;
  z _arg;z
  if(!PyArg_ParseTuple(args, "a*  ", &gridX, &gridY, &gridZ,
                                           &_stream, &_function, &launch_cooperative_grid, &global_scratch_obj,
                                           &kernel_metadata, &launch_metadata,
                                           &launch_enter_hook, &launch_exit_hookat  )) {
    return NULL;
  }

  int num_warps, num_ctas, shared_memory, clusterDimX, clusterDimY, clusterDimZ;
  if (!PyArg_ParseTuple(kernel_metadata, "iiiiii", &num_warps, &num_ctas, &shared_memory, &clusterDimX, &clusterDimY, &clusterDimZ)) {
    PyErr_SetString(PyExc_TypeError, "kernel_metadata must be a tuple");
    return NULL;
  }

  // extract launch metadata
  if (launch_enter_hook != Py_None){
    PyObject* args = Py_BuildValue("(O)", launch_metadata);
    PyObject* ret = PyObject_CallObject(launch_enter_hook, args);
    Py_DECREF(args);
    if (!ret)
      return NULL;
  }

  CUdeviceptr global_scratch = 0;
  if (global_scratch_obj != Py_None) {
    DevicePtrInfo global_scratch_info = getPointer(global_scratch_obj, -1);
    if (!global_scratch_info.valid) {
      return NULL;
    }
    global_scratch = global_scratch_info.dev_ptr;
  }

  // raise exception asap
  z
  Py_BEGIN_ALLOW_THREADS;
  _launch(gridX, gridY, gridZ, num_warps, num_ctas, launch_cooperative_grid, clusterDimX, clusterDimY, clusterDimZ, shared_memory, (CUstream)_stream, (CUfunction)_function, global_scratcha0  );
  Py_END_ALLOW_THREADS;
  if (PyErr_Occurred()) {
    return NULL;
  }

  if(launch_exit_hook != Py_None){
    PyObject* args = Py_BuildValue("(O)", launch_metadata);
    PyObject* ret = PyObject_CallObject(launch_exit_hook, args);
    Py_DECREF(args);
    if (!ret)
      return NULL;

  }

  Py_RETURN_NONE;
}

static PyMethodDef ModuleMethods[] = {
  {"launch", launch, METH_VARARGS, "Entry point for all kernels with this signature"},
  {NULL, NULL, 0, NULL} // sentinel
};

static struct PyModuleDef ModuleDef = {
  PyModuleDef_HEAD_INIT,
  "__triton_launcher",
  NULL, //documentation
  -1, //size
  ModuleMethods
};

PyMODINIT_FUNC PyInit___triton_launcher(void) {
  PyObject *m = PyModule_Create(&ModuleDef);
  if(m == NULL) {
    return NULL;
  }
  PyModule_AddFunctions(m, ModuleMethods);
  return m;
}
)r   valuesr   listfilterboolr   	enumeratelenitemsappendrange)	constants	signaturer   args_formatformatr   s	args_list	arg_declsinternal_args_listparamsnewline	ptr_decls	tma_declsrV   r   r   r   s                  @@@r   make_launcherr   ~   s   
, ''93C3C3EFR9R=FGK[(F193C3C3EFGIVD)//#"678I"+I"67$!QA7I7PST]P^abPbtyy L)//:K LLLhjI 		hARhhI" 22a5C<%%8&<=;%%n5;%%QCj12 3y>"F G __&Ara5C< !#5aS1#=MaSPdeI  foetetev\a\]_a qc!3A3oaSWI  '0oo&7MUQ2;LQCjMFM
MM#$.v\ KN  OX  KY  \]  K]  w{  ~G  wG  ce  vf fyy() x*p <<	@QRuq"OB'(aS2RST U  &x (Q R[P[ \: <<	 
<<	 | eh  i{  e|  @  e@  }A  DH  DM  DM  N`  Da  }a  FH  |I 'IWrCf	 Ji
 G 8$
 NR Ss)   K:*K?.!L%LL$LLc                       e Zd Zd Zd Zy)CudaLauncherc                    t        d      rj                  n	t               }fd}|j                         D ci c]  \  }} ||      | }}}j                  j                         D ci c]  \  }}||
 }}}t        ||      t        d      }|j                  | _        |j                  | _        |j                  | _	        |j                  | _
        y c c}}w c c}}w )Nr   c                 t    t        | t              r&j                  j                  j	                  |       fS | S r.   )r   r!   fn	arg_namesindex)xrV   s    r   <lambda>z'CudaLauncher.__init__.<locals>.<lambda>   s-    Z3=OSVV--33A69 UV r1   __triton_launcher)rh   r   dictr   r   r   rc   launchglobal_scratch_sizeglobal_scratch_alignlaunch_cooperative_grid)	rv   rV   metadatar   arg_idxidxvaluer   rb   s	    `       r   rw   zCudaLauncher.__init__  s    %,S+%>CMMDF	V;D??;LMZS%WS\5(M	M25--2E2E2GHJCS%ZH	HIy1%c+>?jj#+#?#? $,$A$A!'/'G'G$ NHs   C/C"c           	          | j                   dkD  r9||z  |z  }|| j                   z  }t        j                  || j                  |      }	nd }	 | j                  |||||| j
                  |	g|  y Nr   )r   r   
_allocatorr   r   r   )
rv   gridXgridYgridZstreamfunctionargs	grid_size
alloc_sizeglobal_scratchs
             r   __call__zCudaLauncher.__call__
  su    ##a'-I"T%=%==J(33J@Y@Y[abN!NE5%4;W;WYgojnor1   N)rx   ry   rz   rw   r   r0   r1   r   r   r     s    
Hpr1   r   c                   R     e Zd Z fdZd Zd Zd Zed        Zd Z	d Z
d Z xZS )	
CudaDriverc                 V    t               | _        t        | _        t        |           y r.   )re   utilsr   launcher_clsri   rw   )rv   rl   s    r   rw   zCudaDriver.__init__  s    [
(r1   c                 ~    | j                         }| j                  |      }|d   dz  |d   z   }d}t        d||      S )Nr   
          r   )get_current_deviceget_device_capabilityr   )rv   device
capability	warp_sizes       r   get_current_targetzCudaDriver.get_current_target  sK    ((*//7
]R'*Q-7
	Y77r1   c                 J    dd l }|j                  d| j                               S )Nr   r   )torchr  r  rv   r
  s     r   get_active_torch_devicez"CudaDriver.get_active_torch_device"  s    ||FD$;$;$=>>r1   c                 "    dd l }|j                  S r   )r
  r   r  s     r   get_device_interfacezCudaDriver.get_device_interface&  s    zzr1   c                      	 dd l } | j                  j                         xr | j                  j                  d u S # t
        $ r Y yw xY w)Nr   F)r
  r   is_availableversionhipImportError)r
  s    r   	is_activezCudaDriver.is_active*  sC    	::**,L%--2C2Ct2KL 		s   7: 	AAc                     ddl m} |S )Nr   )do_bench)triton.testingr  )rv   r  s     r   get_benchmarkerzCudaDriver.get_benchmarker2  s
    +r1   c                 b    dd l }d}|j                  t        |dz        |j                  d      S )Nr   i      r   )dtyper  )r
  emptyint)rv   r
  
cache_sizes      r   get_empty_cache_for_benchmarkz(CudaDriver.get_empty_cache_for_benchmark6  s.    
 '
{{3zQ/uyy{PPr1   c                 $    |j                          y r.   )zero_)rv   rY   s     r   clear_cachezCudaDriver.clear_cache?  s    r1   )rx   ry   rz   rw   r  r  r  staticmethodr  r  r  r"  r{   r|   s   @r   r   r     s;    
8?  Qr1   r   )%	functoolsr   rE   rA   r   rH   pathlibr   triton.runtime.buildr   triton.runtime.cacher   triton.runtimer   triton.backends.compilerr   triton.backends.driverr   r   r    realpath__file__r   rL   r/   rM   	lru_cacher,   r2   r:   rc   objectre   r   r   r   r   r0   r1   r   <module>r/     s    	      ' 2 & . ,
''//"''**84
5ww||GY/0We,H	  0 , , < <
2A A,
.{|p6 p0, ,r1   