
    %	&h                     p    d Z ddlmZ ddlmZ ddlmZmZ  ej                  e	      Z
 G d de      ZdgZy)	zLlava model configuration   )PretrainedConfig)logging   )CONFIG_MAPPING
AutoConfigc                   F     e Zd ZdZdZeedZdZ	 	 	 	 	 	 	 	 d fd	Z xZ	S )LlavaConfigaI
  
    This is the configuration class to store the configuration of a [`LlavaForConditionalGeneration`]. It is used to instantiate an
    Llava model according to the specified arguments, defining the model architecture. Instantiating a configuration
    with the defaults will yield a similar configuration to that of the Llava-9B.

    e.g. [llava-hf/llava-9b](https://huggingface.co/llava-hf/llava-9b)

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        vision_config (`Union[AutoConfig, dict]`,  *optional*, defaults to `CLIPVisionConfig`):
            The config object or dictionary of the vision backbone.
        text_config (`Union[AutoConfig, dict]`, *optional*, defaults to `LlamaConfig`):
            The config object or dictionary of the text backbone.
        image_token_index (`int`, *optional*, defaults to 32000):
            The image token index to encode the image prompt.
        projector_hidden_act (`str`, *optional*, defaults to `"gelu"`):
            The activation function used by the multimodal projector.
        vision_feature_select_strategy (`str`, *optional*, defaults to `"default"`):
            The feature selection strategy used to select the vision feature from the vision backbone.
            Can be one of `"default"` or `"full"`.
        vision_feature_layer (`Union[int, List[int]]`, *optional*, defaults to -2):
            The index of the layer to select the vision feature. If multiple indices are provided,
            the vision feature of the corresponding indices will be concatenated to form the
            vision features.
        image_seq_length (`int`, *optional*, defaults to 576):
            Sequence length of one image embedding.
        multimodal_projector_bias (`bool`, *optional*, defaults to `True`):
            Whether to use bias in the multimodal projector.

    Example:

    ```python
    >>> from transformers import LlavaForConditionalGeneration, LlavaConfig, CLIPVisionConfig, LlamaConfig

    >>> # Initializing a CLIP-vision config
    >>> vision_config = CLIPVisionConfig()

    >>> # Initializing a Llama config
    >>> text_config = LlamaConfig()

    >>> # Initializing a Llava llava-1.5-7b style configuration
    >>> configuration = LlavaConfig(vision_config, text_config)

    >>> # Initializing a model from the llava-1.5-7b style configuration
    >>> model = LlavaForConditionalGeneration(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```llava)text_configvision_configTc	           
         || _         || _        || _        |dvrt        d|       || _        || _        t        |t              r d|v r|d   nd|d<   t        |d      di |}n|t        d   ddddd	d
dd      }|| _	        t        |t              r d|v r|d   nd|d<   t        |d      di |}n|t        d          }|| _
        || _        t        
| 4  di |	 y )N)defaultfullzGvision_feature_select_strategy should be one of 'default', 'full'.Got: 
model_typeclip_vision_modeli   i      iP         }  i   )intermediate_sizehidden_size
patch_size
image_sizenum_hidden_layersnum_attention_heads
vocab_sizeprojection_dimllama )image_token_indexprojector_hidden_actimage_seq_length
ValueErrorvision_feature_select_strategyvision_feature_layer
isinstancedictr   r   r   multimodal_projector_biassuper__init__)selfr   r   r    r!   r$   r%   r"   r(   kwargs	__class__s             /var/www/pru.catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/llava/configuration_llava.pyr*   zLlavaConfig.__init__Q   s;    "3$8! 0)1DD679 
 /M+$8!mT*/;}/Ll+Re ,' +=+FGX-XM"*+>?"& "$$& "	M +k4(EQU`E`L(AfmK%(\)BCRkRK (13K&)B&"6"    )NNr   gelur   i@  T)
__name__
__module____qualname____doc__r   r   sub_configsis_compositionr*   __classcell__)r-   s   @r.   r	   r	      sC    2h J",zJKN #'0"&5# 5#r/   r	   N)r5   configuration_utilsr   utilsr   autor   r   
get_loggerr2   loggerr	   __all__r   r/   r.   <module>r?      s@      3  - 
		H	%n#" n#b /r/   