
    %	&h                        d dl mZmZmZ d dlZd dlZd dlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ d
dlmZmZmZmZmZmZmZmZmZ d
dlmZ ddlmZ  ej>                  e       Z! G d de      Z" G d de      Z# G d de      Z$ G d de      Z% G d de      Z& G d de      Z' G d de      Z( G d de      Z)y)    )CallableOptionalTupleN)nn   )Cache)FlashAttentionKwargs)ALL_ATTENTION_FUNCTIONS)Unpack)logging   )	LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForQuestionAnsweringLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLPapply_rotary_pos_embeager_attention_forward)MistralModel   )Qwen2Configc                        e Zd Z fdZ xZS )Qwen2MLPc                 J   t         |   |       t        j                  | j                  | j
                  d      | _        t        j                  | j                  | j
                  d      | _        t        j                  | j
                  | j                  d      | _        y )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_proj)selfconfig	__class__s     }/var/www/pru.catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/qwen2/modular_qwen2.pyr    zQwen2MLP.__init__   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWX    )__name__
__module____qualname__r    __classcell__r)   s   @r*   r   r      s    Y Yr+   r   c                   2    e Zd Zdedef fdZ	 	 ddej                  deej                  ej                  f   de	ej                     de	e
   de	ej                     d	ee   d
eej                  e	ej                     e	eej                        f   fdZ xZS )Qwen2Attentionr(   	layer_idxc                    t         |   ||       t        j                  |j                  |j
                  | j                  z  d      | _        t        j                  |j                  |j                  | j                  z  d      | _	        t        j                  |j                  |j                  | j                  z  d      | _
        t        j                  |j
                  | j                  z  |j                  d      | _        y )NTr   F)r   r    r   r!   r"   num_attention_headshead_dimq_projnum_key_value_headsk_projv_projo_projr'   r(   r3   r)   s      r*   r    zQwen2Attention.__init__'   s    +ii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii : :T]] JFL^L^ejkr+   hidden_statesposition_embeddingsattention_maskpast_key_valuecache_positionkwargsreturnc                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}d }| j                  j                  rPt        | j                  dd       9| j                  | j                  j                  k\  r| j                  j                  }t        }| j                  j                   dk7  r^| j                  j                   dk(  r(|j#                  dd	      rt$        j'                  d
       nt(        | j                  j                      } || |	|
||f| j*                  sdn| j,                  | j.                  |d|\  }} |j0                  g |d j3                         }| j5                  |      }||fS )Nr   r   )sincosrA   sliding_windoweagersdpaoutput_attentionsFz`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.g        )dropoutscalingrH   )shaper6   r7   view	transposer9   r:   r   updater3   r(   use_sliding_windowgetattrmax_window_layersrH   r   _attn_implementationgetloggerwarning_oncer
   trainingattention_dropoutrM   reshape
contiguousr;   )r'   r=   r>   r?   r@   rA   rB   input_shapehidden_shapequery_states
key_statesvalue_statesrG   rF   cache_kwargsrH   attention_interfaceattn_outputattn_weightss                      r*   forwardzQwen2Attention.forward.   s/    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j%#&snUL'5'<'<ZW[WeWegs't$JKK**%5t<H$++"?"??![[77N(?;;++w6{{//69fjjI\^c>d##L
 '>dkk>^>^&_#$7
%
  $}}C$2H2HLL)
%
 
%
!\ *k));;;;FFHkk+.L((r+   )NN)r,   r-   r.   r   intr    torchTensorr   r   r   
LongTensorr   r	   rf   r/   r0   s   @r*   r2   r2   &   s    l{ ls l +/598)||8) #5<<#=>8) !.	8)
 !8) !!1!128) -.8) 
u||Xell3XeELL>Q5RR	S8)r+   r2   c                   (     e Zd Zdedef fdZ xZS )Qwen2DecoderLayerr(   r3   c                     t         |           t        ||      | _        t	        |      | _        |j                  r4|j                  dk7  r$t        j                  d|j                   d       y y y )N)r(   r3   flash_attention_2z=Sliding Window Attention is enabled but not implemented for `z)`; unexpected results may be encountered.)
r   r    r2   	self_attnr   mlprH   rU   rW   rX   r<   s      r*   r    zQwen2DecoderLayer.__init__j   sp    'vKF#  V%@%@DW%WOPVPkPkOl m9 9 &X r+   )r,   r-   r.   r   rg   r    r/   r0   s   @r*   rl   rl   i   s    { s  r+   rl   c                       e Zd Zy)
Qwen2ModelNr,   r-   r.    r+   r*   rr   rr   u       r+   rr   c                       e Zd Zy)Qwen2ForCausalLMNrs   rt   r+   r*   rw   rw   y   ru   r+   rw   c                       e Zd Zy)Qwen2ForSequenceClassificationNrs   rt   r+   r*   ry   ry   }   ru   r+   ry   c                       e Zd Zy)Qwen2ForTokenClassificationNrs   rt   r+   r*   r{   r{      ru   r+   r{   c                       e Zd Zy)Qwen2ForQuestionAnsweringNrs   rt   r+   r*   r}   r}      ru   r+   r}   )*typingr   r   r   rh   torch.utils.checkpointr   cache_utilsr   modeling_flash_attention_utilsr	   modeling_utilsr
   processing_utilsr   utilsr   llama.modeling_llamar   r   r   r   r   r   r   r   r   mistral.modeling_mistralr   configuration_qwen2r   
get_loggerr,   rW   r   r2   rl   rr   rw   ry   r{   r}   rt   r+   r*   <module>r      s    , ,      B 5 & 
 
 
 4 , 
		H	%Yx Y@)^ @)F	) 		 		' 		%C 		"= 		 9 	r+   