
    %	&hBJ                     J   d Z ddlmZmZmZmZmZ ddlZddlm	Z	 ddlm
Z
 ddlmZmZmZ ddlmZmZ ddlmZ ddlmZmZmZmZ dd	lmZ dd
lmZmZmZmZ ddlm Z   ejB                  e"      Z#dZ$dZ%g dZ& G d de	jN                        Z( G d de	jN                        Z) G d de	jN                        Z* G d de	jN                        Z+dZ,dZ- G d de      Z. ede,       G d de.             Z/ ede,       G d d e.             Z0 ed!e,       G d" d#e.e              Z1g d$Z2y)%zPyTorch TextNet model.    )AnyListOptionalTupleUnionN)Tensor)BCEWithLogitsLossCrossEntropyLossMSELoss)PreTrainedModeladd_start_docstrings)ACT2CLS)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)TextNetConfig)add_code_sample_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings)BackboneMixinr   zczczup/textnet-base)   i         c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )TextNetConvLayerconfigc                    t         |           |j                  | _        |j                  | _        |j                  | _        t        |j                  t              r$|j                  d   dz  |j                  d   dz  fn|j                  dz  }t        j                  |j                  |j                  |j                  |j                  |d      | _        t        j                  |j                  |j                         | _        t        j$                         | _        | j                  t)        | j                            | _        y y )Nr      r   F)kernel_sizestridepaddingbias)super__init__stem_kernel_sizer!   stem_strider"   stem_act_funcactivation_function
isinstancetuplennConv2dstem_num_channelsstem_out_channelsconvBatchNorm2dbatch_norm_eps
batch_normIdentity
activationr   )selfr   r#   	__class__s      /var/www/pru.catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/textnet/modeling_textnet.pyr&   zTextNetConvLayer.__init__3   s   !22((#)#7#7  &1159 "a'););A)>!)CD((A- 	 II$$$$//%%
	 ..)A)A6CXCXY++-##/%d&>&>?ADO 0    hidden_statesreturnc                 h    | j                  |      }| j                  |      }| j                  |      S N)r1   r4   r6   )r7   r;   s     r9   forwardzTextNetConvLayer.forwardN   s-    		-06}--r:   )	__name__
__module____qualname__r   r&   torchr   r?   __classcell__r8   s   @r9   r   r   2   s,    B} B6.U\\ .ell .r:   r   c            
       p     e Zd ZdZdededededef
 fdZdej                  d	ej                  fd
Z	 xZ
S )TextNetRepConvLayera  
    This layer supports re-parameterization by combining multiple convolutional branches
    (e.g., main convolution, vertical, horizontal, and identity branches) during training.
    At inference time, these branches can be collapsed into a single convolution for
    efficiency, as per the re-parameterization paradigm.

    The "Rep" in the name stands for "re-parameterization" (introduced by RepVGG).
    r   in_channelsout_channelsr!   r"   c                 t   t         	|           || _        || _        || _        || _        |d   dz
  dz  |d   dz
  dz  f}t        j                         | _        t        j                  |||||d      | _
        t        j                  ||j                        | _        |d   dz
  dz  df}d|d   dz
  dz  f}|d   dk7  rLt        j                  |||d   df||d      | _        t        j                  ||j                        | _        nd\  | _        | _        |d   dk7  rLt        j                  ||d|d   f||d      | _        t        j                  ||j                        | _        nd\  | _        | _        ||k(  r,|dk(  r't        j                  ||j                        | _        y d | _        y )Nr   r   r    F)rH   rI   r!   r"   r#   r$   )num_featuresepsNN)r%   r&   num_channelsrI   r!   r"   r-   ReLUr*   r.   	main_convr2   r3   main_batch_normvertical_convvertical_batch_normhorizontal_convhorizontal_batch_normrbr_identity)
r7   r   rH   rI   r!   r"   r#   vertical_paddinghorizontal_paddingr8   s
            r9   r&   zTextNetRepConvLayer.__init__^   s   '(&NQ&1,{1~/Aa.GH#%779 #%#
  "~~<VMbMbc(^a/A5q9+a.1"4!:;q>Q!#')(^Q/("D (*~~<U[UjUj'kD$;E8D 8q>Q#%99')A/*$D  *,\W]WlWl)mD&?I<D $"< {*v{ NN9N9NO 	  	r:   r;   r<   c                 x   | j                  |      }| j                  |      }| j                  '| j                  |      }| j                  |      }||z   }| j                  '| j	                  |      }| j                  |      }||z   }| j                  | j                  |      }||z   }| j                  |      S r>   )rP   rQ   rR   rS   rT   rU   rV   r*   )r7   r;   main_outputsvertical_outputshorizontal_outputsid_outs         r9   r?   zTextNetRepConvLayer.forward   s    ~~m4++L9 )#11-@#778HI'*::L +!%!5!5m!D!%!;!;<N!O'*<<L(&&}5F'&0L''55r:   )r@   rA   rB   __doc__r   intr&   rC   r   r?   rD   rE   s   @r9   rG   rG   T   sN    7
} 7
3 7
c 7
`c 7
mp 7
r6U\\ 6ell 6r:   rG   c                   .     e Zd Zdedef fdZd Z xZS )TextNetStager   depthc                 p   t         |           |j                  |   }|j                  |   }t	        |      }|j
                  |   }|j
                  |dz      }|g|g|dz
  z  z   }|g|z  }	g }
t        ||	||      D ]  }|
j                  t        |g|         t        j                  |
      | _        y )Nr   )r%   r&   conv_layer_kernel_sizesconv_layer_strideslenhidden_sizeszipappendrG   r-   
ModuleListstage)r7   r   rb   r!   r"   
num_layersstage_in_channel_sizestage_out_channel_sizerH   rI   rk   stage_configr8   s               r9   r&   zTextNetStage.__init__   s    44U;**51%
 & 3 3E :!'!4!4UQY!?,-1G0HJYZN0[[./*<\;O 	ELLL,VClCD	E]]5)
r:   c                 8    | j                   D ]
  } ||      } |S r>   )rk   )r7   hidden_stateblocks      r9   r?   zTextNetStage.forward   s%    ZZ 	/E .L	/r:   )r@   rA   rB   r   r_   r&   r?   rD   rE   s   @r9   ra   ra      s    *} *S *"r:   ra   c            	       b     e Zd Zdef fdZ	 	 ddej                  dee   dee   de	fdZ
 xZS )	TextNetEncoderr   c                     t         |           g }t        |j                        }t	        |      D ]  }|j                  t        ||              t        j                  |      | _	        y r>   )
r%   r&   rf   rd   rangeri   ra   r-   rj   stages)r7   r   rw   
num_stagesstage_ixr8   s        r9   r&   zTextNetEncoder.__init__   s\    778
j) 	:HMM,vx89	: mmF+r:   rq   output_hidden_statesreturn_dictr<   c                     |g}| j                   D ]  } ||      }|j                  |        |s|f}|r||fz   S |S t        ||      S )N)last_hidden_stater;   )rw   ri   r   )r7   rq   rz   r{   r;   rk   outputs          r9   r?   zTextNetEncoder.forward   se     &[[ 	/E .L  .	/ "_F0D6],,P&P-\ijjr:   rM   )r@   rA   rB   r   r&   rC   r   r   boolr   r?   rD   rE   s   @r9   rt   rt      sS    ,} , 04&*	kllk 'tnk d^	k
 
(kr:   rt   aI  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. Use it
    as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`TextNetConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
aE  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`TextNetImageProcessor.__call__`] for details.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
c                   "    e Zd ZdZeZdZdZd Zy)TextNetPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    textnetpixel_valuesc                    t        |t        j                  t        j                  f      rm|j                  j
                  j                  d| j                  j                         |j                  %|j                  j
                  j                          y y t        |t        j                        rW|j                  j
                  j                  d       |j                  %|j                  j
                  j                          y y y )Ng        )meanstdg      ?)r+   r-   Linearr.   weightdatanormal_r   initializer_ranger$   zero_r2   fill_)r7   modules     r9   _init_weightsz$TextNetPreTrainedModel._init_weights  s    fryy"))45MM&&CT[[5R5R&S{{&  &&( '/MM$$S){{&  &&( ' 0r:   N)	r@   rA   rB   r^   r   config_classbase_model_prefixmain_input_namer    r:   r9   r   r      s    
 !L!$O)r:   r   zPThe bare Textnet model outputting raw features without any specific head on top.c                        e Zd Z fdZ ee       eeee	de
      	 d	dedee   dee   deeeee   f   ee   ef   fd              Z xZS )
TextNetModelc                     t         |   |       t        |      | _        t	        |      | _        t        j                  d      | _        | j                          y )N)r    r    )
r%   r&   r   stemrt   encoderr-   AdaptiveAvgPool2dpooler	post_initr7   r   r8   s     r9   r&   zTextNetModel.__init__  sD     $V,	%f-**62r:   vision)
checkpointoutput_typer   modalityexpected_outputr   rz   r{   r<   c                 :   ||n| j                   j                  }||n| j                   j                  }| j                  |      }| j	                  |||      }|d   }| j                  |      }|s||f}|r	||d   fz   S |S t        |||r
|d         S d       S )Nrz   r{   r   r   )r}   pooler_outputr;   )r   use_return_dictrz   r   r   r   r   )	r7   r   rz   r{   rq   encoder_outputsr}   pooled_outputr~   s	            r9   r?   zTextNetModel.forward  s     &1%<k$++B]B]$8$D $++JjJj 	 yy.,,/CQ\ ' 
 ,A.$56'7F5I6_Q/11UvU7/'0D/!,
 	
 KO
 	
r:   rM   )r@   rA   rB   r&   r   TEXTNET_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr   r   r   r   r   r   r   r?   rD   rE   s   @r9   r   r     s    
 ++CD&<$. pt
"
:B4.
^fgk^l
	uS$s)^$eCj2ZZ	[
 E
r:   r   z
    TextNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                        e Zd Z fdZ ee       eee      	 	 	 	 d	de	e
j                     de	e
j                     de	e   de	e   def
d              Z xZS )
TextNetForImageClassificationc                    t         |   |       |j                  | _        t        |      | _        t        j                  d      | _        t        j                         | _	        |j                  dkD  r-t        j                  |j                  d   |j                        nt        j                         | _        t        j                  | j                  | j                  g      | _        | j!                          y )N)r   r   r   )r%   r&   
num_labelsr   r   r-   r   avg_poolFlattenflattenr   rg   r5   fcrj   
classifierr   r   s     r9   r&   z&TextNetForImageClassification.__init__I  s      ++#F+,,V4zz|KQK\K\_`K`"))F//3V5F5FGfhfqfqfs --(EF 	r:   r   r   r   labelsrz   r{   r<   c                 .   ||n| j                   j                  }| j                  |||      }|d   }| j                  D ]
  } ||      } | j	                  |      }d}	|| j                   j
                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rIt               }
| j                  dk(  r& |
|j                         |j                               }	n |
||      }	n| j                   j
                  dk(  r=t               }
 |
|j                  d| j                        |j                  d            }	n,| j                   j
                  dk(  rt               }
 |
||      }	|s|f|d	d z   }|	|	f|z   S |S t!        |	||j"                  
      S )a~  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

        Returns:

        Examples:
        ```python
        >>> import torch
        >>> import requests
        >>> from transformers import TextNetForImageClassification, TextNetImageProcessor
        >>> from PIL import Image

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = TextNetImageProcessor.from_pretrained("czczup/textnet-base")
        >>> model = TextNetForImageClassification.from_pretrained("czczup/textnet-base")

        >>> inputs = processor(images=image, return_tensors="pt")
        >>> with torch.no_grad():
        ...     outputs = model(**inputs)
        >>> outputs.logits.shape
        torch.Size([1, 2])
        ```Nr   r   r   
regressionsingle_label_classificationmulti_label_classificationr   r    )losslogitsr;   )r   r   r   r   r   problem_typer   dtyperC   longr_   r   squeezer
   viewr	   r   r;   )r7   r   r   rz   r{   outputsr}   layerr   r   loss_fctr~   s               r9   r?   z%TextNetForImageClassification.forwardW  s   H &1%<k$++B]B],,|BVdo,p#AJ__ 	9E %&7 8	9*+{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F'+'7D7V#CVC3f\c\q\qrrr:   )NNNN)r@   rA   rB   r&   r   r   r   r   r   r   rC   FloatTensor
LongTensorr   r?   rD   rE   s   @r9   r   r   A  s     ++CD+O^mn 59-1/3&*Dsu001Ds ))*Ds 'tn	Ds
 d^Ds 
.Ds o EDsr:   r   zP
    TextNet backbone, to be used with frameworks like DETR and MaskFormer.
    c                        e Zd Z fdZ ee       eee      	 dde	de
e   de
e   deee   ef   fd              Z xZS )	TextNetBackbonec                     t         |   |       t         | 	  |       t        |      | _        |j
                  | _        | j                          y r>   )r%   r&   _init_backboner   r   rg   rK   r   r   s     r9   r&   zTextNetBackbone.__init__  sC     v&#F+"// 	r:   r   r   rz   r{   r<   c                    ||n| j                   j                  }||n| j                   j                  }| j                  |d|      }|r|j                  n|d   }d}t        | j                        D ]  \  }}|| j                  v s|||   fz  } |s |f}	|r|r|j                  n|d   }|	|fz  }	|	S t        ||r|j                  d      S dd      S )a  
        Returns:

        Examples:

        ```python
        >>> import torch
        >>> import requests
        >>> from PIL import Image
        >>> from transformers import AutoImageProcessor, AutoBackbone

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("czczup/textnet-base")
        >>> model = AutoBackbone.from_pretrained("czczup/textnet-base")

        >>> inputs = processor(image, return_tensors="pt")
        >>> with torch.no_grad():
        >>>     outputs = model(**inputs)
        ```NTr   r    r   )feature_mapsr;   
attentions)	r   r   rz   r   r;   	enumeratestage_namesout_featuresr   )
r7   r   rz   r{   r   r;   r   idxrk   r~   s
             r9   r?   zTextNetBackbone.forward  s   4 &1%<k$++B]B]$8$D $++JjJj 	 ,,|$T_,`1<--'!*#D$4$45 	6JC)))s!3 55	6 "_F#9D 5 5'RS*=**M%3G'//
 	
MQ
 	
r:   rM   )r@   rA   rB   r&   r   r   r   r   r   r   r   r   r   r   r?   rD   rE   s   @r9   r   r     sn     ++CD>Xos1
"1
:B4.1
^fgk^l1
	uU|^+	,1
 Y E1
r:   r   )r   r   r   r   )3r^   typingr   r   r   r   r   rC   torch.nnr-   r   r	   r
   r   transformersr   r   transformers.activationsr   transformers.modeling_outputsr   r   r   r   1transformers.models.textnet.configuration_textnetr   transformers.utilsr   r   r   r   !transformers.utils.backbone_utilsr   
get_loggerr@   loggerr   r   r   Moduler   rG   ra   rt   TEXTNET_START_DOCSTRINGr   r   r   r   r   __all__r   r:   r9   <module>r      s]    4 4    A A > ,  L  < 
		H	% "+ ) .ryy .DW6")) W6t299 0kRYY k:	  )_ )* V)
) )
	)
X  Us$: UsUsp  	>
,m >
>
B ir:   