
    %	&h7                        d dl Z d dlmZ d dlZd dlmZ d dlZddlmZ ddl	m
Z
mZmZ ddlmZ ddlmZmZmZmZmZ d	d
lmZ  ej.                  e      Z G d dej4                        Z G d de      Z G d de      Zd Zd"dZ G d de      Z  G d de      Z! G d de      Z" G d de"e      Z# G d de
      Z$ G d de      Z% G d d e      Z&g d!Z'y)#    N)Optional   )logging   )GemmaForCausalLMGemmaForSequenceClassificationGemmaForTokenClassification)GraniteAttention)LlamaDecoderLayerLlamaMLP
LlamaModelLlamaPreTrainedModelLlamaRotaryEmbedding   )HeliumConfigc                   ,     e Zd Zd fd	Zd Zd Z xZS )HeliumRMSNormc                     t         |           t        j                  t	        j
                  |            | _        || _        y N)super__init__nn	Parametertorchonesweightvariance_epsilon)selfhidden_sizeeps	__class__s      /var/www/pru.catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/helium/modular_helium.pyr   zHeliumRMSNorm.__init__.   s/    ll5::k#:; #    c                 \   |j                   }|j                  t        j                        }|j	                  d      j                  dd      }|t        j                  || j                  z         z  }| j                  j                  t        j                        |z  j                  |      S )Nr   T)keepdim)	dtypetor   float32powmeanrsqrtr   r   )r   hidden_statesinput_dtypevariances       r"   forwardzHeliumRMSNorm.forward3   s    #))%((7 $$Q',,R,>%Ht?T?T4T(UUu}}-=AA+NNr#   c                 ^    t        | j                  j                         d| j                   S )Nz, eps=)tupler   shaper   )r   s    r"   
extra_reprzHeliumRMSNorm.extra_repr:   s*    ))*+6$2G2G1HIIr#   )gư>)__name__
__module____qualname__r   r0   r4   __classcell__r!   s   @r"   r   r   -   s    $
OJr#   r   c                       e Zd Zy)HeliumRotaryEmbeddingNr5   r6   r7    r#   r"   r;   r;   >       r#   r;   c                       e Zd Zy)	HeliumMLPNr<   r=   r#   r"   r@   r@   B   r>   r#   r@   c                 |    | ddddf   }| ddddf   }t        j                  | |fd      j                  d      S )	z*Rotates half the hidden dims of the input..r   Nr   r   r%   dim)r   stackflatten)xx1x2s      r"   rotate_halfrJ   F   sJ    	
319B	
319B;;Ryb)11"55r#   c                 F   |j                  |      }|j                  |      }|dd|j                  d   dz  f   j                  dd      }|dd|j                  d   dz  f   j                  dd      }| |z  t        |       |z  z   }||z  t        |      |z  z   }||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    .Nr%   r   rB   )	unsqueezer3   repeat_interleaverJ   )qkcossinposition_idsunsqueeze_dimq_embedk_embeds           r"   apply_rotary_pos_embrV   M   s    ( --
&C
--
&C c'SYYr]a'''
(
:
:1"
:
EC
c'SYYr]a'''
(
:
:1"
:
EC3w;q>C/0G3w;q>C/0GGr#   c                   0     e Zd Zddedee   f fdZ xZS )HeliumAttentionconfig	layer_idxc                     t         |   ||       t        j                  |j                  |j                  d      | _        dt        j                  | j                        z  | _	        y )NF)biasr   )
r   r   r   Linearr   o_projmathsqrthead_dimscalingr   rY   rZ   r!   s      r"   r   zHeliumAttention.__init__o   sK    +ii 2 2F4F4FUS499T]]33r#   r   r5   r6   r7   r   r   intr   r8   r9   s   @r"   rX   rX   n   s    4| 4 4 4r#   rX   c                   0     e Zd Zddedee   f fdZ xZS )HeliumDecoderLayerrY   rZ   c                     t         |           t        |      | _        t	        |j
                  |j                        | _        t	        |j
                  |j                        | _        y )Nr    )	r   r   r@   mlpr   r   rms_norm_epsinput_layernormpost_attention_layernormrc   s      r"   r   zHeliumDecoderLayer.__init__v   sP    V$,V-?-?VEXEXY(5f6H6HfNaNa(b%r#   r   rd   r9   s   @r"   rg   rg   u   s#    c| c c cr#   rg   c                       e Zd Zy)HeliumPreTrainedModelNr<   r=   r#   r"   ro   ro   ~   r>   r#   ro   c                   $     e Zd Zdef fdZ xZS )HeliumModelrY   c           	      R   t         |   |       t        j                  t	        |j
                        D cg c]  }t        ||       c}      | _        t        |j                  |j                        | _        t        |      | _        d| _        | j                          y c c}w )Nri   F)r   r   r   
ModuleListrangenum_hidden_layersrg   layersr   r   rk   normr;   
rotary_embgradient_checkpointing	post_initrc   s      r"   r   zHeliumModel.__init__   s     mmDI&JbJbDcdy	2d
 "&"4"4&:M:MN	/7&+# 	 es   B$r5   r6   r7   r   r   r8   r9   s   @r"   rq   rq      s    
| 
 
r#   rq   c                   $     e Zd Zdef fdZ xZS )HeliumForCausalLMrY   c                 d    t         |   |       t        |      | _        | j	                          y r   r   r   rq   modelrz   r   rY   r!   s     r"   r   zHeliumForCausalLM.__init__   &      (
r#   r{   r9   s   @r"   r}   r}          |  r#   r}   c                   $     e Zd Zdef fdZ xZS )HeliumForSequenceClassificationrY   c                 d    t         |   |       t        |      | _        | j	                          y r   r   r   s     r"   r   z(HeliumForSequenceClassification.__init__   r   r#   r{   r9   s   @r"   r   r      r   r#   r   c                   $     e Zd Zdef fdZ xZS )HeliumForTokenClassificationrY   c                 d    t         |   |       t        |      | _        | j	                          y r   r   r   s     r"   r   z%HeliumForTokenClassification.__init__   r   r#   r{   r9   s   @r"   r   r      r   r#   r   )ro   rq   r}   r   r   )Nr   )(r_   typingr   r   torch.nnr   torch.utils.checkpointutilsr   gemma.modeling_gemmar   r   r	   granite.modeling_graniter
   llama.modeling_llamar   r   r   r   r   configuration_heliumr   
get_loggerr5   loggerModuler   r;   r@   rJ   rV   rX   rg   ro   rq   r}   r   r   __all__r=   r#   r"   <module>r      s           
  / 
		H	%JBII J"	0 		 	6B4& 4c* c	0 	' ( &D #> r#   