
    %	&h.                         d dl Z d dlmZmZ d dlmZ ddlmZ erddlm	Z	 ddl
mZmZmZmZmZ dd	lmZmZ  e       rd dlZ ej(                  e      Z G d
 de      Zy)    N)TYPE_CHECKINGOptional)version   )HfQuantizer   )PreTrainedModel)is_auto_gptq_availableis_gptqmodel_availableis_optimum_availableis_torch_availablelogging)
GPTQConfigQuantizationConfigMixinc                        e Zd ZdZdZg dZdZdef fdZd Z	ddZ
d	 ZddZddZedd
ed   fd       ZddZ xZS )GptqHfQuantizerz
    Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
    `auto_gptq` or `gptqmodel` package. Quantization is done under the hood for users if they load a non-prequantized model.
    F)optimum	auto_gptq	gptqmodelNquantization_configc                     t        |   |fi | t               st        d      ddlm} |j                  | j                  j                               | _	        y )NGLoading a GPTQ quantized model requires optimum (`pip install optimum`)r   )GPTQQuantizer)
super__init__r   ImportErroroptimum.gptqr   	from_dictr   to_dict_optimumoptimum_quantizer)selfr   kwargsr   	__class__s       |/var/www/pru.catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/quantizers/quantizer_gptq.pyr   zGptqHfQuantizer.__init__-   sM    ,77#%ghh.!.!8!89Q9Q9a9a9c!d    c                    t               st        d      t               rt               rt        j                  d       t               xrH t        j                  t        j                  j                  d            t        j                  d      kD  xs
 t               }|s)t        j                  j                         st        d      t               st               st        d      t               rSt        j                  t        j                  j                  d            t        j                  d      k  rt        d      t               rt        j                  t        j                  j                  d	            t        j                  d
      k  sHt        j                  t        j                  j                  d            t        j                  d      k  rt        d      y y )Nr   z4Detected gptqmodel and auto-gptq, will use gptqmodelz	auto-gptqz0.4.2z2GPU is required to quantize or run quantize model.z|Loading a GPTQ quantized model requires gptqmodel (`pip install gptqmodel`) or auto-gptq (`pip install auto-gptq`) library. r   zYou need a version of auto_gptq >= 0.4.2 to use GPTQ: `pip install --upgrade auto-gptq` or use gptqmodel by `pip install gptqmodel>=1.4.3`.r   z1.4.3r   1.23.99zJThe gptqmodel version should be >= 1.4.3, optimum version should >= 1.24.0)r   r   r
   r   loggerwarningr   parse	importlibmetadatatorchcudais_availableRuntimeError)r!   argsr"   gptq_supports_cpus       r$   validate_environmentz$GptqHfQuantizer.validate_environment6   s   #%ghh!#(>(@NNQR #$ `i0088EFW^I__& $% 	 !)@)@)BSTT(*.D.F O  $%'--	8J8J8R8RS^8_*`cjcpcpd
 +
  ^  $%MM),,44[ABW]]SZE[[}}Y//77	BCgmmT]F^^jkk _ &r%   c                     |'t         j                  }t        j                  d       |S |t         j                  k7  rt        j                  d       |S )NzRLoading the model in `torch.float16`. To overwrite it, set `torch_dtype` manually.zRWe suggest you to set `torch_dtype=torch.float16` for better efficiency with GPTQ.)r-   float16r(   info)r!   torch_dtypes     r$   update_torch_dtypez"GptqHfQuantizer.update_torch_dtypeR   sG    --KKKlm  EMM)KKlmr%   c                     |dt        j                  d      i}t               s"|ddt        j                  d      ifv r|ddik(   |S )N cpur   )r-   devicer   )r!   
device_maps     r$   update_device_mapz!GptqHfQuantizer.update_device_mapZ   sN    ell512J%'J52u||TYGZB[:\,\2q'!r%   modelr	   c                 h   |j                   j                  dk7  rt        d      | j                  rt	        j
                  t        j                  j	                  d            t	        j
                  d      k  r| j                  j                  |      }y  | j                  j                  |fi |}y y )N	input_idsz%We can only quantize pure text model.r   r'   )
r#   main_input_namer0   pre_quantizedr   r*   r+   r,   r    convert_modelr!   r?   r"   s      r$   $_process_model_before_weight_loadingz4GptqHfQuantizer._process_model_before_weight_loadingb   s    ??**k9FGG}}Y//77	BCw}}U^G__..<<UC<..<<UMfM r%   c                    | j                   r| j                  j                  |      }y | j                  j                  |j
                  | j                  _        | j                  j                  || j                  j                         t        j                  | j                  j                               |j                  _        y N)rC   r    post_init_modelr   	tokenizername_or_pathquantize_modelr   r   to_dictconfigrE   s      r$   #_process_model_after_weight_loadingz3GptqHfQuantizer._process_model_after_weight_loadingm   s    **::5AE''1195:5G5G((2""11%9Q9Q9[9[\/9/C/CDDZDZDbDbDd/eELL,r%   c                      yNT )r!   r?   s     r$   is_trainablezGptqHfQuantizer.is_trainablew   s    r%   c                      yrQ   rR   )r!   safe_serializations     r$   is_serializablezGptqHfQuantizer.is_serializable{   s    r%   )r7   torch.dtypereturnrW   )r?   r	   rH   )__name__
__module____qualname____doc__requires_calibrationrequired_packagesr    r   r   r3   r8   r>   rF   rO   propertyr   rS   rV   __classcell__)r#   s   @r$   r   r   #   sm    
 !=e,C el8	Nf (+<"=  r%   r   )r+   typingr   r   	packagingr   baser   modeling_utilsr	   utilsr
   r   r   r   r   utils.quantization_configr   r   r-   
get_loggerrY   r(   r   rR   r%   r$   <module>rh      sO     *   0 u u K 			H	%Yk Yr%   