
    %	&hw                     z   d dl Z d dlmZmZmZ d dlZd dlmZ d dlmc m	Z
 ddlmZ ddlmZ ddlmZmZmZmZmZmZ ddlmZ ddlmZmZmZmZ d	d
lmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& ddl'm(Z(  ejR                  e*      Z+dZ,dZ-g dZ.dZ/dZ0dZ1d d gZ2dZ3dZ4 G d de%      Z5 G d de      Z6 G d dejn                        Z8 G d de      Z9 G d dejn                        Z: G d d ejn                        Z; G d! d"ejn                        Z< G d# d$ejn                        Z= G d% d&ejn                        Z> G d' d(ee&      Z?d)Z@d*ZAeZB ed+e@       G d, d-e$             ZC ed.e@       G d/ d0e!             ZD ed1e@       G d2 d3e"             ZE ed4e@       G d5 d6e              ZF ed7e@       G d8 d9e#             ZGg d:ZHy);    N)OptionalTupleUnion   )is_deepspeed_zero3_enabled)is_fsdp_managed_module)BaseModelOutputCausalLMOutputSequenceClassifierOutputTokenClassifierOutputWav2Vec2BaseModelOutputXVectorOutput)PreTrainedModel)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardlogging   )	Wav2Vec2FeatureProjectionWav2Vec2FeedForward#Wav2Vec2ForAudioFrameClassificationWav2Vec2ForCTC!Wav2Vec2ForSequenceClassificationWav2Vec2ForXVectorWav2Vec2ModelWav2Vec2PositionalConvEmbeddingWav2Vec2PreTrainedModel   )WavLMConfigr   z1patrickvonplaten/wavlm-libri-clean-100h-base-plus)r   i$  i   zZ'mister quilter is the aposle of the middle classes and we are glad to welcome his gospel'gQ)@zmicrosoft/wavlm-base-plus-sdzmicrosoft/wavlm-base-plus-svg
ףp=
?c                       e Zd Zy)WavLMPositionalConvEmbeddingN__name__
__module____qualname__     }/var/www/pru.catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/wavlm/modular_wavlm.pyr!   r!   3       r'   r!   c                       e Zd Zy)WavLMFeatureProjectionNr"   r&   r'   r(   r+   r+   7   r)   r'   r+   c                       e Zd ZdZ	 	 	 	 ddedededededef fdZ	 	 	 	 dd	ej                  d
e
ej                     de
ej                     dedeej                  e
ej                     e
eej                        f   f
dZd	ej                  d
eej                  ej                   f   dej                  dedej                  ej                  ff
dZdededej                  fdZdej                  dej                  fdZ xZS )WavLMAttentionz=Multi-headed attention from 'Attention Is All You Need' paper	embed_dim	num_headsdropoutnum_bucketsmax_distancehas_relative_position_biasc                    t         |           || _        || _        || _        ||z  | _        | j
                  |z  | j                  k7  rt        d| j                   d| d      | j
                  dz  | _        t        j                  ||      | _
        t        j                  ||      | _        t        j                  ||      | _        t        j                  ||      | _        || _        || _        t        j                   t#        j$                  d| j                  dd            | _        t        j                  | j
                  d      | _        |r0t        j*                  | j                  | j                        | _        y y )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).g      r      )super__init__r.   r/   r0   head_dim
ValueErrorscalingnnLineark_projv_projq_projout_projr1   r2   	Parametertorchonesgru_rel_pos_constgru_rel_pos_linear	Embeddingrel_attn_embed)selfr.   r/   r0   r1   r2   r3   	__class__s          r(   r7   zWavLMAttention.__init__>   s7    	""!Y.MMI%$..8MdnnM]$YKr3  }}d*ii	95ii	95ii	95		)Y7&(!#ejjDNNAq.Q!R"$))DMM1"=%"$,,t/?/?"PD &r'   hidden_statesattention_maskposition_biasoutput_attentionsreturnc                     |j                         \  }}}|S| j                  ||      }|j                  d      j                  |ddd      j	                  || j
                  z  ||      }|j	                  |j                  dd | j
                  dfz         }	|	j                  dddd      }	| j                  |	      }
|
j	                  |	j                  dd dz         j                  d      }
t        j                  |
      j                  dd      \  }}||| j                  z  d	z
  z  d
z   }|j	                  || j
                  z  dd      |z  }|j	                  d||f      }| j                  ||||      \  }}|||fS )z'Attention layer with relative attentionNr   r   r   r   )r      dim      ?g       @)sizecompute_bias	unsqueezerepeatviewr/   shapepermuterE   sumrB   sigmoidchunkrD   torch_multi_head_self_attention)rH   rJ   rK   rL   rM   indexbsztgt_len_gated_hidden_statesrelative_position_projgate_agate_bgate_outputgated_position_biasattn_outputattn_weightss                    r(   forwardzWavLMAttention.forwardb   s    (,,.Wa   --gw?M''*11#q!Q?DDS4>>EY[bdkl  ,001D1DSb1IT^^]_L`1`a199!Q1E "&!8!89L!M!7!<!<=P=V=VWZXZ=[^d=d!e!i!ijl!m '=>DDQBDO)?)? ?# EFL *..sT^^/CRKm[166GW7MN$($H$H>+>@Q%
!\ L-77r'   ri   c                 X   |j                  dd      x}x}}||j                  d      nd}dx}	}
d}t        j                  |||| j                  | j
                  t        j                  dg      t        j                  | j                  j                  | j                  j                  | j                  j                  f      |	|
|| j                  | j                  j                  | j                  j                  | j                   |||d| j                  j                  | j                  j                  | j                  j                        \  }}|j                  dd      }|C|dddf   j#                  |j$                  dd | j
                  fz   |j$                  dd z         }||fS )zCsimple wrapper around torch's multi_head_attention_forward functionr   r   NFT)use_separate_proj_weightq_proj_weightk_proj_weightv_proj_weight)	transposeneFmulti_head_attention_forwardr.   r/   rB   emptycatr?   biasr=   r>   r0   r@   weighttrainingbroadcast_torZ   )rH   rJ   rK   ri   rM   querykeyvaluekey_padding_maskbias_kbias_vadd_zero_attnrj   rk   s                 r(   r_   z.WavLMAttention.torch_multi_head_self_attention   s    ,55a;;;e3A3M>,,Q/SW  %&$B$BNNNNKKIIt{{'')9)94;;;K;KLMLLMM  MMMM%)++,,++,,++,,+%
!\2 "++Aq1# (40==""2A&$..)::\=O=OPQPR=SSL L((r'   query_length
key_lengthc                    t        j                  |t         j                        d d d f   }t        j                  |t         j                        d d d f   }||z
  }| j                  |      }|j	                  | j
                  j                  j                        }| j                  |      }|j                  g d      }|S )N)dtype)r   r   r   )	rB   arangelong_relative_positions_buckettorG   ry   devicer[   )rH   r   r   context_positionmemory_positionrelative_positionrelative_position_bucketvaluess           r(   rV   zWavLMAttention.compute_bias   s     <<EJJG4P,,zDT1WM+.>>#'#B#BCT#U #;#>#>t?R?R?Y?Y?`?`#a $$%=>	*r'   relative_positionsc                 $   | j                   dz  }|dkD  j                  t        j                        |z  }t        j                  |      }|dz  }||k  }t        j
                  |j                         |z        }|t        j
                  | j                  |z        z  }|||z
  z  }||z   j                  t        j                        }t        j                  |t        j                  ||dz
              }|t        j                  |||      z  }|S )Nr   r   r   )r1   r   rB   r   abslogfloatmathr2   min	full_likewhere)rH   r   r1   relative_buckets	max_exactis_smallrelative_positions_if_largerelative_position_if_larges           r(   r   z)WavLMAttention._relative_positions_bucket   s   &&!+.266uzzB[P"YY'9:1$	%	1&+ii0B0H0H0JY0V&W#&ADHHTM^M^ajMjDk&k#&A[S\E\&]#&/2M&M%Q%QRWR\R\%]"%*YY&8RT_bcTc(d&
" 	EKK2DF`aar'   )        i@  i   TNNFr   )r#   r$   r%   __doc__intr   boolr7   rB   Tensorr   r   rl   FloatTensorr   
LongTensor
BoolTensorr_   rV   r   __classcell__rI   s   @r(   r-   r-   ;   s   G +/"Q"Q "Q 	"Q
 "Q "Q %)"QN 2604"''8||'8 !.'8  -	'8
  '8 
u||Xell3XeELL>Q5RR	S'8R5)((5) e..0@0@@A5) #..	5)
  5) 

U..	/5)n # %BSBS  U=N=N  SXSdSd  r'   r-   c                       e Zd Zy)WavLMFeedForwardNr"   r&   r'   r(   r   r      r)   r'   r   c                   2     e Zd Zddedef fdZddZ xZS )WavLMEncoderLayerconfigr3   c                    t         |           t        |j                  |j                  |j
                  |j                  |j                  |      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        t!        |      | _        t        j                  |j                  |j                        | _        y N)r.   r/   r0   r1   r2   r3   epsr6   r7   r-   hidden_sizenum_attention_headsattention_dropoutr1   max_bucket_distance	attentionr;   Dropouthidden_dropoutr0   	LayerNormlayer_norm_eps
layer_normr   feed_forwardfinal_layer_normrH   r   r3   rI   s      r(   r7   zWavLMEncoderLayer.__init__       '((00,,**33'A
 zz&"7"78,,v'9'9v?T?TU,V4 "V-?-?VEZEZ [r'   c                     |}| j                  |||||      \  }}}| j                  |      }||z   }| j                  |      }|| j                  |      z   }| j	                  |      }||f}|r||fz  }|S )NrK   rL   rM   r`   )r   r0   r   r   r   )	rH   rJ   rK   rL   rM   r`   attn_residualrk   outputss	            r(   rl   zWavLMEncoderLayer.forward   s    %59^^)'/ 6D 6
2|] ]3%56%(9(9-(HH--m< -0&Gr'   Tr   r#   r$   r%   r   r   r7   rl   r   r   s   @r(   r   r      s    \{ \ \r'   r   c                   2     e Zd Zddedef fdZddZ xZS ) WavLMEncoderLayerStableLayerNormr   r3   c                    t         |           t        |j                  |j                  |j
                  |j                  |j                  |      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        t!        |      | _        t        j                  |j                  |j                        | _        y r   r   r   s      r(   r7   z)WavLMEncoderLayerStableLayerNorm.__init__  r   r'   c                     |}| j                  |      }| j                  ||||      \  }}}| j                  |      }||z   }|| j                  | j	                  |            z   }||f}|r||fz  }|S )N)rK   rL   rM   )r   r   r0   r   r   )rH   rJ   rK   rL   rM   r   rk   r   s           r(   rl   z(WavLMEncoderLayerStableLayerNorm.forward  s    %659^^)'/	 6D 6
2|] ]3%5%(9(9$:O:OP]:^(__ -0&Gr'   r   )NNFr   r   s   @r(   r   r     s    \{ \ \r'   r   c                   .     e Zd Z fdZ	 	 	 	 ddZ xZS )WavLMEncoderc           
         t         |           || _        t        |      | _        t        j                  |j                  |j                        | _	        t        j                  |j                        | _        t        j                  t        |j                        D cg c]  }t!        ||dk(         c}      | _        d| _        y c c}w Nr   r   )r3   F)r6   r7   r   r!   pos_conv_embedr;   r   r   r   r   r   r   r0   
ModuleListrangenum_hidden_layersr   layersgradient_checkpointingrH   r   irI   s      r(   r7   zWavLMEncoder.__init__4  s    :6B,,v'9'9v?T?TUzz&"7"78mmUZ[a[s[sUtuPQv16Ku
 ',# v   !Cc                    |rdnd }|rdnd }|5|j                  d      j                  dd|j                  d         }d|| <   | j                  |      }	||	z   }| j	                  |      }| j                  |      }t               xs t        |       }
d }t        | j                        D ]  \  }}|r||fz   }t        j                  g       }| j                  xr  |dkD  xr || j                  j                  k  }|r|
rM| j                  r,| j                  r | j!                  |j"                  ||||      }n ||||||      }|d d \  }}|rd}|s|d   fz   } |r||fz   }|st%        d |||fD              S t'        |||	      S )
Nr&   rP   r   r   r   r   NNNc              3   &   K   | ]	  }||  y wNr&   .0vs     r(   	<genexpr>z'WavLMEncoder.forward.<locals>.<genexpr>~       mq_`_lm   last_hidden_staterJ   
attentions)rW   rX   rZ   r   r   r0   r   r   	enumerater   rB   randrz   r   	layerdropr   _gradient_checkpointing_func__call__tupler	   rH   rJ   rK   rM   output_hidden_statesreturn_dictall_hidden_statesall_self_attentionsexpand_attention_maskposition_embeddingssynced_gpusrL   r   layerdropout_probabilityskip_the_layerlayer_outputss                    r(   rl   zWavLMEncoder.forward?  s    #7BD$5b4%$2$<$<R$@$G$G1mNaNabcNd$e!45M001"11-@%(;;6]302R6LT6R!$++. !	PHAu#$58H$H! #(**R.!]]fq1uf:MPTP[P[PePe:eN![..4==$($E$E%&%)%M %*%'5&3*;%M 0=Ra/@,} 2 &9]1=M<O&O#C!	PF   1]4D Dm]4EGZ$[mmm++*
 	
r'   NFFTr#   r$   r%   r7   rl   r   r   s   @r(   r   r   3  s    	, "D
r'   r   c                   .     e Zd Z fdZ	 	 	 	 ddZ xZS )WavLMEncoderStableLayerNormc           
         t         |           || _        t        |      | _        t        j                  |j                  |j                        | _	        t        j                  |j                        | _        t        j                  t        |j                        D cg c]  }t!        ||dk(         c}      | _        d| _        y c c}w r   )r6   r7   r   r!   r   r;   r   r   r   r   r   r   r0   r   r   r   r   r   r   r   s      r(   r7   z$WavLMEncoderStableLayerNorm.__init__  s    :6B,,v'9'9v?T?TUzz&"7"78mm v778 1UVZ[U[]
 ',#r   c                    |rdnd }|rdnd }|5|j                  d      j                  dd|j                  d         }d|| <   | j                  |      }	||	z   }| j	                  |      }t               xs t        |       }
d }t        | j                        D ]  \  }}|r||fz   }t        j                  g       }| j                  xr  |dkD  xr || j                  j                  k  }|r|
rL| j                  r,| j                  r | j                  |j                   ||||      }n |||||      }|d d \  }}|rd}|s|d   fz   } | j#                  |      }|r||fz   }|st%        d |||fD              S t'        |||	      S )
Nr&   rP   r   r   r   )rK   rM   rL   r   c              3   &   K   | ]	  }||  y wr   r&   r   s     r(   r   z6WavLMEncoderStableLayerNorm.forward.<locals>.<genexpr>  r   r   r   )rW   rX   rZ   r   r0   r   r   r   r   rB   r   rz   r   r   r   r   r   r   r   r	   r   s                    r(   rl   z#WavLMEncoderStableLayerNorm.forward  s    #7BD$5b4%$2$<$<R$@$G$G1mNaNabcNd$e!45M001"11-@%(;;]302R6LT6R!$++.  	PHAu#$58H$H! #(**R.!]]fq1uf:MPTP[P[PePe:eN![ ..4==$($E$E%&%)%M %*%'5*;&3	%M 0=Ra/@,} 2 &9]1=M<O&O#A 	PD 6 1]4D Dm]4EGZ$[mmm+;LYl
 	
r'   r   r   r   s   @r(   r   r     s    ," "B
r'   r   c                   8     e Zd ZdZ fdZed        Zd Z xZS )WavLMGumbelVectorQuantizerz
    Vector quantization using gumbel softmax. See [CATEGORICAL REPARAMETERIZATION WITH
    GUMBEL-SOFTMAX](https://arxiv.org/pdf/1611.01144.pdf) for more information.
    c                 0   t         |           |j                  | _        |j                  | _        |j                  | j                  z  dk7  r&t        d|j                   d| j                   d      t        j                  t        j                  d| j                  | j
                  z  |j                  | j                  z              | _        t        j                  |j                  d   | j                  | j
                  z        | _        d| _        y )Nr   z`config.codevector_dim z5 must be divisible by `config.num_codevector_groups` z for concatenation.r   rP   r   )r6   r7   num_codevector_groups
num_groupsnum_codevectors_per_groupnum_varscodevector_dimr9   r;   rA   rB   r   codevectorsr<   conv_dimweight_projtemperature)rH   r   rI   s     r(   r7   z#WavLMGumbelVectorQuantizer.__init__  s     6688  4??2a7)&*?*?)@ A66:oo5F G%%  <<a4==!@&BWBW[_[j[jBjk
 99V__R%8$//DMM:YZ r'   c           	          | j                  d      }t        j                  t        j                  |t        j                  |dz         z  d             j                         }|S )Nr   rR   gHz>rP   )meanrB   expr\   r   )probsmarginal_probs
perplexitys      r(   _compute_perplexityz.WavLMGumbelVectorQuantizer._compute_perplexity  sR    *YY		.599^VZEZ;[*[ac ddeiik
r'   c                    |j                   \  }}}| j                  |      }|j                  ||z  | j                  z  d      }| j                  rt
        j                  j                  |j                         | j                  d      }|j                  |      }t        j                  |j                  ||z  | j                  d      j                         d      }| j                  |      }n}|j                  d      } |j                  |j                    j!                  d|j                  dd      d      }|j                  ||z  | j                  d      }| j                  |      }|j                  ||z  d      }|j#                  d      | j$                  z  }	|	j                  ||z  | j                  | j&                  d      }
|
j)                  d      j                  ||d      }
|
|fS )NrP   T)tauhardrR   r   rT   )rZ   r	  rY   r  rz   r;   
functionalgumbel_softmaxr   r
  type_asrB   softmaxr  argmax	new_zerosscatter_rW   r  r  r\   )rH   rJ   
batch_sizesequence_lengthr   codevector_probscodevector_soft_distr  codevector_idxcodevectors_per_groupr  s              r(   rl   z"WavLMGumbelVectorQuantizer.forward  s   3@3F3F0
O[ ((7%**:+G$//+Y[]^==!}};;M<O<O<QW[WgWgnr;s/77F $)=="":#?RTU[[]ce$  112FGJ +11b19N6}668K8KLUUN''A.   044Z/5QSWSbSbdfg112BCJ+00o1MrR 0 : :2 >AQAQ Q+00o1Mt`d`m`moqr!oob)..z?BOJ&&r'   )	r#   r$   r%   r   r7   staticmethodr  rl   r   r   s   @r(   r   r     s&    
*  
"'r'   r   c                   @    e Zd ZdZeZdZdZdZdZ	dZ
d Zd Zd Zd	 Zy
)WavLMPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    wavlminput_valuesTFc           
      z   t        |t              r|j                  j                  j                  j                  dd       |j                  j                  j                  j                          t        j                  j                  |j                         yt        |t              rt        j                  j                  |j                  j                  ddt        j                  d|j                  j                   d   |j                  j"                  z  z        z         t        j                  j%                  |j                  j                  d       yt        |t&              rt        j                  d|j(                  j*                  z        }t        j                  j                  |j(                  j                  | |       t        j                  j                  |j(                  j                  | |       yt        |t        j,                        rm|j                  j                  j                  d| j.                  j0                         |j                  %|j                  j                  j                          yyt        |t        j2                  t        j4                  f      rJ|j                  j                  j                          |j                  j                  j7                  d       yt        |t        j8                        rt        j                  j;                  |j                         |j                  jt        j                  |j<                  |j"                  |j                   d   z  z        }t        j                  j                  |j                  | |       yyy)	zInitialize the weightsr   r   )r  stdr   r   )abNrT   )
isinstancer   r	  ry   datanormal_rx   zero_r;   inituniform_r  r!   convr   sqrtkernel_sizein_channels	constant_r+   
projectionin_featuresr<   r   initializer_ranger   	GroupNormfill_Conv1dkaiming_normal_groups)rH   moduleks      r(   _init_weightsz"WavLMPreTrainedModel._init_weights-  s    f89%%**222C##((..0GGV//0 <=GGOO""		!v{{'>'>q'AFKKD[D['["\]]  
 GGfkk..2 67		!f//;;;<AGGV..55!qAGGV..33rQ?		*MM&&CT[[5R5R&S{{&  &&( 'r|| <=KK""$MM$$S)		*GG##FMM2{{&IIfmmv/A/AFDVDVWXDY/YZ[  a 8 ' +r'   c                     t        d      NzNot needed for WavLMAttributeErrorrH   s    r(   _get_adaptersz"WavLMPreTrainedModel._get_adaptersN      344r'   c                     t        d      rC  rD  rF  s    r(   init_adapter_layersz(WavLMPreTrainedModel.init_adapter_layersQ  rH  r'   c                     t        d      rC  rD  rF  s    r(   load_adapterz!WavLMPreTrainedModel.load_adapterT  rH  r'   N)r#   r$   r%   r   r   config_classbase_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_2_supports_sdparA  rG  rJ  rL  r&   r'   r(   r%  r%     s?    
 L$O&*#"N9B555r'   r%  a  
    WavLM was proposed in [WavLM: Unified Speech Representation Learning with Labeled and Unlabeled
    Data](https://arxiv.org/abs/2110.13900) by Sanyuan Chen, Chengyi Wang, Zhengyang Chen, Yu Wu, Shujie Liu, Zhuo
    Chen, Jinyu Li, Naoyuki Kanda, Takuya Yoshioka, Xiong Xiao, Jian Wu, Long Zhou, Shuo Ren, Yanmin Qian, Yao Qian,
    Jian Wu, Michael Zeng, Xiangzhan Yu, Furu Wei.

    This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving etc.).

    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`WavLMConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
aI  
    Args:
        input_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
            Float values of input raw speech waveform. Values can be obtained by loading a `.flac` or `.wav` audio file
            into an array of type `List[float]` or a `numpy.ndarray`, *e.g.* via the soundfile library (`pip install
            soundfile`). To prepare the array into `input_values`, the [`AutoProcessor`] should be used for padding and
            conversion into a tensor of type `torch.FloatTensor`. See [`Wav2Vec2Processor.__call__`] for details.
        attention_mask (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to avoid performing convolution and attention on padding token indices. Mask values selected in `[0,
            1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)

            <Tip warning={true}>

            `attention_mask` should only be passed if the corresponding processor has `config.return_attention_mask ==
            True`. For all models whose processor has `config.return_attention_mask == False`, `attention_mask` should
            **not** be passed to avoid degraded performance when doing batched inference. For such models
            `input_values` should simply be padded with 0 and passed without `attention_mask`. Be aware that these
            models also yield slightly different results depending on whether `input_values` is padded or not.

            </Tip>

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
z_The bare WavLM Model transformer outputting raw hidden-states without any specific head on top.c                   T     e Zd Z ee       eeeede	       fd              Z
 xZS )
WavLMModelaudio
checkpointoutput_typerM  modalityexpected_outputc                 "    t        |   di |S Nr&   r6   rl   rH   super_kwargsrI   s     r(   rl   zWavLMModel.forward  s     w...r'   )r#   r$   r%   r   WAVLM_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCWavLMBaseModelOutput_CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPErl   r   r   s   @r(   rT  rT    s:    
 ++AB&($./ C/r'   rT  zcWavLM Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).c                   T     e Zd Z ee       eeeee	e
       fd              Z xZS )WavLMForCTC)rW  rX  rM  rZ  expected_lossc                 $    t        |   di | y r\  r]  r^  s     r(   rl   zWavLMForCTC.forward       	','r'   )r#   r$   r%   r   r`  r   ra  r
   rc  _CTC_EXPECTED_OUTPUT_CTC_EXPECTED_LOSSrl   r   r   s   @r(   rf  rf    s:    
 ++AB&"$,(( C(r'   rf  z
    WavLM Model with a sequence classification head on top (a linear layer over the pooled output) for tasks like
    SUPERB Keyword Spotting.
    c                   R     e Zd Z ee       eeeed       fd              Z	 xZ
S )WavLMForSequenceClassificationrU  )rW  rX  rM  rY  c                 $    t        |   di | y r\  r]  r^  s     r(   rl   z&WavLMForSequenceClassification.forward  s     	','r'   )r#   r$   r%   r   r`  r   ra  r   rc  rl   r   r   s   @r(   rm  rm    s7     ++AB&,$	( C(r'   rm  za
    WavLM Model with a frame classification head on top for tasks like Speaker Diarization.
    c                   T     e Zd Z ee       eeeede	       fd              Z
 xZS ) WavLMForAudioFrameClassificationrU  rV  c                 $    t        |   di | y r\  r]  r^  s     r(   rl   z(WavLMForAudioFrameClassification.forward  ri  r'   )r#   r$   r%   r   r`  r   _FRAME_CLASS_CHECKPOINTr   rc  _FRAME_EXPECTED_OUTPUTrl   r   r   s   @r(   rp  rp    s:     ++AB*)$.( C(r'   rp  zi
    WavLM Model with an XVector feature extraction head on top for tasks like Speaker Verification.
    c                   V     e Zd Z	  ee       eeeede	       fd              Z
 xZS )WavLMForXVectorrU  rV  c                 $    t        |   di | y r\  r]  r^  s     r(   rl   zWavLMForXVector.forward  ri  r'   )r#   r$   r%   r   r`  r   _XVECTOR_CHECKPOINTr   rc  _XVECTOR_EXPECTED_OUTPUTrl   r   r   s   @r(   ru  ru    s=     	*+AB&!$0( C(r'   ru  )rp  rf  rm  ru  rT  r%  )Ir   typingr   r   r   rB   torch.nnr;   torch.nn.functionalr  rt   integrations.deepspeedr   integrations.fsdpr   modeling_outputsr	   r
   r   r   r   r   modeling_utilsr   utilsr   r   r   r   wav2vec2.modeling_wav2vec2r   r   r   r   r   r   r   r   r   configuration_wavlmr   
get_loggerr#   loggerrc  ra  rd  rj  rk  rr  rs  rw  rx  r!   r+   Moduler-   r   r   r   r   r   r   r%  WAVLM_START_DOCSTRINGr`  rb  rT  rf  rm  rp  ru  __all__r&   r'   r(   <module>r     s(    ) )     @ 7  . u u
 
 
 - 
		H	%I & s  8 Q 4  	#B 		6 	c RYY c L	* 	&		 &R"ryy "JP
299 P
fQ
")) Q
hC' C'L55?,C 55p &" H /  e
/ 
/	
/ m
(. 
(	
(  	(%F 	(	(  	
('J 
(
(  	(( ((r'   