Ë
    %	&h:3  ã                   ó°  — d Z ddlmZmZmZ ddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZmZmZmZmZ ddlmZ  ej<                  e«      Z dZ!dZ" G d„ dejF                  «      Z$d"d„Z% G d„ dejF                  «      Z& G d„ de«      Z' G d„ de«      Z( G d„ dee(«      Z) G d„ de«      Z* G d„ d e«      Z+g d!¢Z,y)#zPyTorch Phi-3 model.é    )ÚCallableÚOptionalÚTupleN)Únné   )ÚACT2FN)ÚCache)ÚFlashAttentionKwargs)ÚALL_ATTENTION_FUNCTIONS)ÚUnpack)Úloggingé   )ÚMistralDecoderLayerÚMistralForCausalLMÚ MistralForSequenceClassificationÚMistralForTokenClassificationÚMistralPreTrainedModelÚeager_attention_forwardÚrotate_halfé   )Ú
Phi3Configz microsoft/Phi-3-mini-4k-instructr   c                   óV   ‡ — e Zd Zˆ fd„Zdej
                  dej
                  fd„Zˆ xZS )ÚPhi3MLPc                 ó*  •— t         ‰|   «        || _        t        j                  |j
                  d|j                  z  d¬«      | _        t        j                  |j                  |j
                  d¬«      | _        t        |j                     | _        y )Nr   F©Úbias)ÚsuperÚ__init__Úconfigr   ÚLinearÚhidden_sizeÚintermediate_sizeÚgate_up_projÚ	down_projr   Ú
hidden_actÚactivation_fn)Úselfr   Ú	__class__s     €ú{/var/www/pru.catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/phi3/modular_phi3.pyr   zPhi3MLP.__init__1   sp   ø€ Ü‰ÑÔàˆŒÜŸI™I f×&8Ñ&8¸!¸f×>VÑ>VÑ:VÐ]bÔcˆÔÜŸ™ 6×#;Ñ#;¸V×=OÑ=OÐV[Ô\ˆŒÜ# F×$5Ñ$5Ñ6ˆÕó    Úhidden_statesÚreturnc                 óš   — | j                  |«      }|j                  dd¬«      \  }}|| j                  |«      z  }| j                  |«      S )Nr   éÿÿÿÿ©Údim)r#   Úchunkr&   r$   )r'   r+   Ú	up_statesÚgates       r)   ÚforwardzPhi3MLP.forward9   sL   € Ø×%Ñ% mÓ4ˆ	à#Ÿ/™/¨!°˜/Ó4‰ˆˆiØ × 2Ñ 2°4Ó 8Ñ8ˆ	à~‰~˜iÓ(Ð(r*   )Ú__name__Ú
__module__Ú__qualname__r   ÚtorchÚFloatTensorr4   Ú__classcell__©r(   s   @r)   r   r   0   s'   ø„ ô7ð) U×%6Ñ%6ð )¸5×;LÑ;L÷ )r*   r   c                 ó`  — |j                  |«      }|j                  |«      }|j                  d   }| dd|…f   | d|d…f   }}|dd|…f   |d|d…f   }
}	t        j                  ||z  t	        |«      |z  z   |gd¬«      }t        j                  |	|z  t	        |	«      |z  z   |
gd¬«      }||fS )aÛ  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    r.   .Nr/   )Ú	unsqueezeÚshaper8   Úcatr   )ÚqÚkÚcosÚsinÚposition_idsÚunsqueeze_dimÚ
rotary_dimÚq_rotÚq_passÚk_rotÚk_passÚq_embedÚk_embeds                r)   Úapply_rotary_pos_embrM   B   sÐ   € ð( -‰-˜Ó
&€CØ
-‰-˜Ó
&€Cà—‘˜2‘€JØc˜;˜J˜;Ð&Ñ'¨¨3°
±Ð+;Ñ)<ˆ6€EØc˜;˜J˜;Ð&Ñ'¨¨3°
±Ð+;Ñ)<ˆ6€Eäi‰i˜% #™+¬+°eÓ*<¸sÑ*BÑCÀVÐLÐRTÔU€GÜi‰i˜% #™+¬+°eÓ*<¸sÑ*BÑCÀVÐLÐRTÔU€GØGÐÐr*   c                   ó>  ‡ — e Zd ZdZddedee   fˆ fd„Z	 	 ddej                  de
ej                  ej                  f   deej                     dee   d	eej                     d
ee   de
ej                  eej                     ee
ej                        f   fd„Zˆ xZS )ÚPhi3Attentionz=Multi-headed attention from 'Attention Is All You Need' paperr   Ú	layer_idxc                 ó|  •— t         ‰|   «        || _        || _        t	        |d|j
                  |j                  z  «      | _        |j                  |j                  z  | _	        |j                  | _        | j                  dz  | _
        |j                  | _        d| _        |j                  | j                  z  d|j                  | j                  z  z  z   }t        j                  |j                  | j                  z  |j
                  d¬«      | _        t        j                  |j
                  |d¬«      | _        y )NÚhead_dimg      à¿Tr   Fr   )r   r   r   rP   Úgetattrr!   Únum_attention_headsrR   Únum_key_value_headsÚnum_key_value_groupsÚscalingÚattention_dropoutÚ	is_causalr   r    Úo_projÚqkv_proj)r'   r   rP   Úop_sizer(   s       €r)   r   zPhi3Attention.__init__e   sý   ø€ Ü‰ÑÔØˆŒØ"ˆŒÜ ¨
°F×4FÑ4FÈ&×JdÑJdÑ4dÓeˆŒØ$*×$>Ñ$>À&×B\ÑB\Ñ$\ˆÔ!Ø#)×#=Ñ#=ˆÔ Ø—}‘} dÑ*ˆŒØ!'×!9Ñ!9ˆÔØˆŒà×,Ñ,¨t¯}©}Ñ<¸qÀF×D^ÑD^Ðae×anÑanÑDnÑ?oÑoˆÜ—i‘i × :Ñ :¸T¿]¹]Ñ JÈF×L^ÑL^ÐejÔkˆŒÜŸ	™	 &×"4Ñ"4°gÀEÔJˆr*   r+   Úposition_embeddingsÚattention_maskÚpast_key_valueÚcache_positionÚkwargsr,   c           
      óŽ  — |j                   d d }g |¢d‘| j                  ‘­}| j                  |«      }	| j                  j                  | j                  z  }
|	dd |
…f   }|	d|
|
| j
                  | j                  z  z   …f   }|	d|
| j
                  | j                  z  z   d …f   }|j                  |«      j                  dd«      }|j                  |«      j                  dd«      }|j                  |«      j                  dd«      }|\  }}t        ||||«      \  }}|'|||dœ}|j                  ||| j                  |«      \  }}t        }| j                  j                  dk7  r^| j                  j                  dk(  r(|j                  dd	«      rt        j                  d
«       nt         | j                  j                     } || ||||f| j"                  sdn| j$                  | j&                  t)        | j                  dd «      dœ|¤Ž\  }} |j*                  g |¢d‘­Ž j-                  «       }| j/                  |«      }||fS )Nr.   .r   r   )rC   rB   r`   ÚeagerÚsdpaÚoutput_attentionsFzã`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.g        Úsliding_window)ÚdropoutrW   rf   )r>   rR   r[   r   rT   rU   ÚviewÚ	transposerM   ÚupdaterP   r   Ú_attn_implementationÚgetÚloggerÚwarning_oncer   ÚtrainingrX   rW   rS   ÚreshapeÚ
contiguousrZ   )r'   r+   r]   r^   r_   r`   ra   Úinput_shapeÚhidden_shapeÚqkvÚ	query_posÚquery_statesÚ
key_statesÚvalue_statesrB   rC   Úcache_kwargsÚattention_interfaceÚattn_outputÚattn_weightss                       r)   r4   zPhi3Attention.forwardt   s[  € ð $×)Ñ)¨#¨2Ð.ˆØ8˜Ð8 bÐ8¨$¯-©-Ñ8ˆàm‰m˜MÓ*ˆØ—K‘K×3Ñ3°d·m±mÑCˆ	Ø˜3 
  
˜?Ñ+ˆØ˜˜i¨)°d×6NÑ6NÐQU×Q^ÑQ^Ñ6^Ñ*^Ð^Ð^Ñ_ˆ
Ø˜3 	¨D×,DÑ,DÀtÇ}Á}Ñ,TÑ TÑ VÐVÑWˆà#×(Ñ(¨Ó6×@Ñ@ÀÀAÓFˆØ—_‘_ \Ó2×<Ñ<¸QÀÓBˆ
Ø#×(Ñ(¨Ó6×@Ñ@ÀÀAÓFˆà&‰ˆˆSÜ#7¸ÀjÐRUÐWZÓ#[Ñ ˆjàÐ%à#&¨sÀnÑUˆLØ'5×'<Ñ'<¸ZÈÐW[×WeÑWeÐgsÓ'tÑ$ˆJ˜ä(?ÐØ;‰;×+Ñ+¨wÒ6Ø{‰{×/Ñ/°6Ò9¸f¿j¹jÐI\Ð^cÔ>dÜ×#Ñ#ðLõô
 '>¸d¿k¹k×>^Ñ>^Ñ&_Ð#á$7ØØØØØð
%
ð  $Ÿ}š}‘C°$×2HÑ2HØ—L‘LÜ" 4§;¡;Ð0@À$ÓGñ
%
ð ñ
%
Ñ!ˆ\ð *k×)Ñ)Ð;¨;Ð;¸Ò;×FÑFÓHˆØ—k‘k +Ó.ˆØ˜LÐ(Ð(r*   )N)NN)r5   r6   r7   Ú__doc__r   r   Úintr   r8   ÚTensorr   r	   Ú
LongTensorr   r
   r4   r:   r;   s   @r)   rO   rO   b   sÌ   ø„ ÙGñK˜zð K°h¸s±mõ Kð( +/Ø59ñ6)à—|‘|ð6)ð # 5§<¡<°·±Ð#=Ñ>ð6)ð ! §¡Ñ.ð	6)ð
 ! ™ð6)ð ! ×!1Ñ!1Ñ2ð6)ð Ð-Ñ.ð6)ð 
ˆu|‰|˜X e§l¡lÑ3°X¸eÀEÇLÁLÑ>QÑ5RÐRÑ	S÷6)r*   rO   c                   óp  ‡ — e Zd Zdedefˆ fd„Z	 	 	 	 	 	 	 ddej                  deej                     deej                     dee
   dee   d	ee   d
eej                     deeej                  ej                  f      dee   deej                  eeej                  ej                  f      f   fd„Zˆ xZS )ÚPhi3DecoderLayerr   rP   c                 ó  •— t         ‰|   ||«       || _        t        ||¬«      | _        t        |«      | _        t        j                  |j                  «      | _
        t        j                  |j                  «      | _        y )N)r   rP   )r   r   r   rO   Ú	self_attnr   Úmlpr   ÚDropoutÚresid_pdropÚresid_attn_dropoutÚresid_mlp_dropout)r'   r   rP   r(   s      €r)   r   zPhi3DecoderLayer.__init__®   s`   ø€ Ü‰Ñ˜ Ô+ØˆŒÜ&¨fÀ	ÔJˆŒÜ˜6“?ˆŒÜ"$§*¡*¨V×-?Ñ-?Ó"@ˆÔÜ!#§¡¨F×,>Ñ,>Ó!?ˆÕr*   r+   r^   rD   r_   re   Ú	use_cacher`   r]   ra   r,   c	                 ó  — |}
| j                  |«      } | j                  d||||||||dœ|	¤Ž\  }}|
| j                  |«      z   }|}
| j                  |«      }| j	                  |«      }|
| j                  |«      z   }|f}|r||fz  }|S )aÝ  
        Args:
            hidden_states (`torch.FloatTensor`):
                input to the layer of shape `(batch, seq_len, embed_dim)`
            attention_mask (`torch.FloatTensor`, *optional*): attention mask of size
                `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
            position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
                Indices of positions of each input sequence tokens in the position embeddings. Selected in the range
                `[0, config.n_positions - 1]`. [What are position IDs?](../glossary#position-ids)
            past_key_value (`Cache`, *optional*): cached past key and value projection states
            output_attentions (`bool`, *optional*):
                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
                returned tensors for more detail.
            use_cache (`bool`, *optional*):
                If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
                (see `past_key_values`).
            cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*):
                Indices depicting the position of the input sequence tokens in the sequence
            kwargs (`dict`, *optional*):
                Arbitrary kwargs to be ignored, used for FSDP and other methods that injects code
                into the model
        )r+   r^   rD   r_   re   rŠ   r`   r]   © )Úinput_layernormr„   rˆ   Úpost_attention_layernormr…   r‰   )r'   r+   r^   rD   r_   re   rŠ   r`   r]   ra   ÚresidualÚself_attn_weightsÚoutputss                r)   r4   zPhi3DecoderLayer.forward¶   sÆ   € ðD !ˆà×,Ñ,¨]Ó;ˆð ,:¨4¯>©>ð 
,
Ø'Ø)Ø%Ø)Ø/ØØ)Ø 3ñ
,
ð ñ
,
Ñ(ˆÐ(ð ! 4×#:Ñ#:¸=Ó#IÑIˆà ˆØ×5Ñ5°mÓDˆØŸ™ Ó/ˆØ  4×#9Ñ#9¸-Ó#HÑHˆà Ð"ˆÙØÐ)Ð+Ñ+ˆGàˆr*   )NNNFFNN)r5   r6   r7   r   r~   r   r8   r   r   r€   r	   Úboolr   r   r
   r9   r4   r:   r;   s   @r)   r‚   r‚   ­   s  ø„ ð@˜zð @°cõ @ð 26Ø37Ø*.Ø,1Ø$)Ø59ØKOñ=à—|‘|ð=ð ! §¡Ñ.ð=ð ˜u×/Ñ/Ñ0ð	=ð
 ! ™ð=ð $ D™>ð=ð ˜D‘>ð=ð ! ×!1Ñ!1Ñ2ð=ð & e¨E¯L©L¸%¿,¹,Ð,FÑ&GÑHð=ð Ð-Ñ.ð=ð 
ˆu× Ñ  (¨5°×1BÑ1BÀE×DUÑDUÐ1UÑ+VÑ"WÐWÑ	X÷=r*   r‚   c                   ó   — e Zd ZdZy)ÚPhi3PreTrainedModelz0.0.5N)r5   r6   r7   Ú_versionrŒ   r*   r)   r”   r”   ö   s   „ ØHr*   r”   c                   ó"   — e Zd Z	 	 	 	 	 	 	 dd„Zy)ÚPhi3ForCausalLMNc	                 ó  — |r_| j                   j                  rI|j                  d   | j                   j                  dz   k\  r |d   }
|
| j                   j                  k  rd } t	        «       j
                  d||||||||dœ|	¤Ž}|S )Nr   r   )Ú	input_idsÚpast_key_valuesr^   Úinputs_embedsr`   rD   rŠ   Úlogits_to_keeprŒ   )r   Úrope_scalingr>   Ú original_max_position_embeddingsr”   Úprepare_inputs_for_generation)r'   r™   rš   r^   r›   r`   rD   rŠ   rœ   ra   Úpast_lengthÚmodel_inputss               r)   rŸ   z-Phi3ForCausalLM.prepare_inputs_for_generationû   s›   € ñ$ Ø—‘×(Ò(Ø—‘ Ñ" d§k¡k×&RÑ&RÐUVÑ&VÒVà(¨Ñ+ˆKØ˜dŸk™k×JÑJÒJØ"&àJÔ*Ó,×JÑJð 

ØØ+Ø)Ø'Ø)Ø%ØØ)ñ

ð ñ

ˆð Ðr*   )NNNNNTN)r5   r6   r7   rŸ   rŒ   r*   r)   r—   r—   ú   s   „ ð ØØØØØØô%r*   r—   c                   ó   — e Zd Zy)ÚPhi3ForSequenceClassificationN©r5   r6   r7   rŒ   r*   r)   r£   r£   #  ó   „ Ør*   r£   c                   ó   — e Zd Zy)ÚPhi3ForTokenClassificationNr¤   rŒ   r*   r)   r§   r§   '  r¥   r*   r§   )r”   Ú	Phi3Modelr—   r£   r§   )Nr   )-r}   Útypingr   r   r   r8   Útorch.utils.checkpointr   Úactivationsr   Úcache_utilsr	   Úmodeling_flash_attention_utilsr
   Úmodeling_utilsr   Úprocessing_utilsr   Úutilsr   Úmistral.modeling_mistralr   r   r   r   r   r   r   Úconfiguration_phi3r   Ú
get_loggerr5   rm   Ú_CHECKPOINT_FOR_DOCÚ_CONFIG_FOR_DOCÚModuler   rM   rO   r‚   r”   r—   r£   r§   Ú__all__rŒ   r*   r)   ú<module>r¸      sÖ   ðñ  ç ,Ñ ,ã Û Ý å !Ý  Ý BÝ 5Ý &Ý ÷÷ ñ õ +ð 
ˆ×	Ñ	˜HÓ	%€à8Ð Ø€ô)ˆbi‰iô )ó$ô@H)B—I‘Iô H)ôVFÐ*ô FôRÐ0ô ô&Ð(Ð*=ô &ôR	Ð$Dô 	ô	Ð!>ô 	òr*   