
    %	&h                        U d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
mZmZ ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZmZmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$m%Z%m&Z&m'Z'm(Z(  e       rddl)m*Z* ndZ* ejV                  e,      Z-er e       Z.ee/ee
e/   e
e/   f   f   e0d<   n1 eg d e       rdnd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffdd  e       rd!nd e       rd"ndffd#d$d e       rdndffd% e       rd&nddffd'd(d) e       rd*nd e       rd+ndffd,d- e       rd.ndffd/d0d1d2d e       rdndffd3d4 e       rd5ndffd6d e       rd7ndffd8d9 e       rd:ndffd;d e       rdndffd<d= e       rd>nd e       rd?ndffd@dA e       rdnd e       rdndffdBd e       rdndffdCd9 e       rd:ndffdDdE e       rdFndffdGdE e       rdFndffdHdI e       rdJnd e       rdKndffdLdM e       rdNndffdOd e       rdndffdPd e       rdndffdQd e       rdndffdRdS e       rdTndffdU e       rdVnd e       rdWndffdXdYdZd[d9 e       rd:ndffd\d4 e       rd5ndffd]d^ e       rd_ndffd` e       rdand e       rdbndffdc e       rdnd e       rdndffdd e       rdnd e       rdndffdedf e       rdgndffdhdi e       rdjndffdkdl e       rdmndffdnd4 e       rd5ndffdod e       rdndffdp e       rdqnddffdrdsd e       rdtndffdud e       rdvndffdw e       rdxnddffdydzd{ e       rd|ndffd}d~d e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffdd e       rdndffdd e       rdtndffdd e       rdtndffd e       rdnddffdd4 e       rd5ndffdd4 e       rd5ndffdd4 e       rd5ndffdd e       rdvndffddd4 e       rd5ndffddd e       rdndffddE e       rdFndffdd e       rdtndffdd e       rdndffddd9 e       rd:ndffdd e       rdndffdd e       rdndffdd e       rdndffdd4 e       rd5ndffdd4 e       rd5ndffd e       rdnd e       rdndffd e       rdnd e       rdndffdd e       rdnd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffd e       rdnd e       rdndffdÑdd e       rdndffd e       rdnddffdd e       rdvndffdd e       rdvndffd e       rdnddffd e       rdnd e       rdndffd e       rdnd e       rdndffdd9 e       rd:ndffdd e       rdndffdՑd e       rdnd e       rdndffd e       rdnd e       rdndffdd e       rdndffd e       rdnddffdd e       rdndffdd e       rdtndffdd e       rdtndffdd e       rdtndffdd e       rdndffdd e       rdvndffdd9 e       rd:ndffd e       rdnd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffddd e       rdtndffdd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rdndffdd e       rdvndffdd e       rdvndffdd e       rdvndffddE e       rdFndffddE e       rdFndffdd e       rdndffdd4 e       rd5ndffddE e       rdFndffd dE e       rdFndffdd e       rdndffd e       rd-nd e       rd.ndffd e       rd-nd e       rd.ndffdd e       rdnd e       rdndffddM e       rdNndffdd e       rdndffdd e       rdndffd	d
d e       rdndffdd e       rdtndffd e       rdnddffddd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffdd e       rdndffddd e       rdndffd e       rdnd e       rdndffd e       rdnd e       rd ndffd! e       rd"nd e       rd#ndffd$d% e       rd&ndffd'd9 e       rd:ndffd(d9 e       rd:ndffd)d*d+ e       rd,ndffd-d e       rdvndffd. e       rd/nd e       rd0ndffd1 e       rd/nd e       rd0ndffd2 e       rdnd e       rdndffd3 e       rd4nddffd5 e       rdnd e       rdndffd6 e       rd7nddffd8d9 e       rd:nddffd;d<d= e       rd>ndffd?d e       rdvndffd@d4 e       rd5ndffdA e       rdnd e       rdndffdB e       rdnd e       rdndffdCdDdEdFd e       rdndffdG e       rdHnd e       rdIndffdJ e       rdnd e       rdndffdKd e       rdndffdLd e       rdndffdMd e       rdndffdNd e       rdndffdOdPdQdRdSdTdU e       rdVndffdWdE e       rdFndffdX e       rdYnd e       rdZndffd[d\ e       rd]nddffd^ e       rdnd e       rdndffd_ e       rdnd e       rdndffd` e       rdand e       rdbndffdc e       rdnd e       rdndffdd e       rdnd e       rdndffde e       rdnd e       rdndffdf e       rdnd e       rdndff      Z. e"e$e.      Z1 e$jd                         D  ci c]  \  } }|| 
 c}} Z3dge/fdhZ4	 	 	 	 	 	 	 	 dudiee/ejj                  f   dje
ee/ejj                  f      dke6dle
e6   dme
e	e/e/f      dne
ee6e/f      doe
e/   dpe6dqe/fdrZ7 G ds dt      Z8yc c}} w (v  zAuto Tokenizer class.    N)OrderedDict)TYPE_CHECKINGDictOptionalTupleUnion   )PretrainedConfig)get_class_from_dynamic_moduleresolve_trust_remote_code)load_gguf_checkpoint)PreTrainedTokenizer)TOKENIZER_CONFIG_FILE)cached_fileextract_commit_hashis_g2p_en_availableis_sentencepiece_availableis_tokenizers_availablelogging   )EncoderDecoderConfig   )_LazyAutoMapping)CONFIG_MAPPING_NAMES
AutoConfigconfig_class_to_model_typemodel_type_to_module_name!replace_list_option_in_docstrings)PreTrainedTokenizerFastTOKENIZER_MAPPING_NAMESalbertAlbertTokenizerAlbertTokenizerFastalignBertTokenizerBertTokenizerFastariaLlamaTokenizerLlamaTokenizerFast
aya_visionCohereTokenizerFastbark)bart)BartTokenizerBartTokenizerFastbarthezBarthezTokenizerBarthezTokenizerFast)bartpho)BartphoTokenizerNbertzbert-generationBertGenerationTokenizer)zbert-japanese)BertJapaneseTokenizerN)bertweet)BertweetTokenizerNbig_birdBigBirdTokenizerBigBirdTokenizerFastbigbird_pegasusPegasusTokenizerPegasusTokenizerFast)biogpt)BioGptTokenizerN)
blenderbot)BlenderbotTokenizerBlenderbotTokenizerFast)zblenderbot-small)BlenderbotSmallTokenizerNblipzblip-2GPT2TokenizerGPT2TokenizerFastbloomBloomTokenizerFastbridgetowerRobertaTokenizerRobertaTokenizerFastbros)byt5)ByT5TokenizerN	camembertCamembertTokenizerCamembertTokenizerFast)canine)CanineTokenizerN	chameleonchinese_clipclapclipCLIPTokenizerCLIPTokenizerFastclipseg)clvp)ClvpTokenizerN
code_llamaCodeLlamaTokenizerCodeLlamaTokenizerFastcodegenCodeGenTokenizerCodeGenTokenizerFastcoherecohere2colpaliconvbertConvBertTokenizerConvBertTokenizerFastcpmCpmTokenizerCpmTokenizerFast)cpmant)CpmAntTokenizerN)ctrl)CTRLTokenizerN)zdata2vec-audioWav2Vec2CTCTokenizerNzdata2vec-textdbrxdebertaDebertaTokenizerDebertaTokenizerFastz
deberta-v2DebertaV2TokenizerDebertaV2TokenizerFastdeepseek_v3	diffllama
distilbertDistilBertTokenizerDistilBertTokenizerFastdprDPRQuestionEncoderTokenizerDPRQuestionEncoderTokenizerFastelectraElectraTokenizerElectraTokenizerFastemu3ernieernie_mErnieMTokenizer)esm)EsmTokenizerNfalconr   falcon_mambaGPTNeoXTokenizerFastfastspeech2_conformerFastSpeech2ConformerTokenizer)flaubert)FlaubertTokenizerNfnetFNetTokenizerFNetTokenizerFast)fsmt)FSMTTokenizerNfunnelFunnelTokenizerFunnelTokenizerFastgemmaGemmaTokenizerGemmaTokenizerFastgemma2gemma3gemma3_textgitglmglm4zgpt-sw3GPTSw3Tokenizergpt2gpt_bigcodegpt_neogpt_neox)gpt_neox_japanese)GPTNeoXJapaneseTokenizerNgptj)zgptsan-japanese)GPTSanJapaneseTokenizerNzgrounding-dinogroupvitheliumherbertHerbertTokenizerHerbertTokenizerFast)hubertrr   ibertideficsidefics2idefics3instructblipinstructblipvideojambajetmoe)jukebox)JukeboxTokenizerNzkosmos-2XLMRobertaTokenizerXLMRobertaTokenizerFastlayoutlmLayoutLMTokenizerLayoutLMTokenizerFast
layoutlmv2LayoutLMv2TokenizerLayoutLMv2TokenizerFast
layoutlmv3LayoutLMv3TokenizerLayoutLMv3TokenizerFast	layoutxlmLayoutXLMTokenizerLayoutXLMTokenizerFastledLEDTokenizerLEDTokenizerFastliltllamallama4llama4_textllava
llava_nextllava_next_videollava_onevision
longformerLongformerTokenizerLongformerTokenizerFastlongt5T5TokenizerT5TokenizerFast)luke)LukeTokenizerNlxmertLxmertTokenizerLxmertTokenizerFastm2m_100M2M100Tokenizermambamamba2marianMarianTokenizermbartMBartTokenizerMBartTokenizerFastmbart50MBart50TokenizerMBart50TokenizerFastmegazmegatron-bert)zmgp-str)MgpstrTokenizerNmistralmixtralmllamamlukeMLukeTokenizer
mobilebertMobileBertTokenizerMobileBertTokenizerFast
modernbert	moonshinemoshimpnetMPNetTokenizerMPNetTokenizerFastmptmramt5MT5TokenizerMT5TokenizerFastmusicgenmusicgen_melodymvpMvpTokenizerMvpTokenizerFast)myt5)MyT5TokenizerNnemotronnezhanllbNllbTokenizerNllbTokenizerFastznllb-moenystromformerolmoolmo2olmoezomdet-turbo	oneformerz
openai-gptOpenAIGPTTokenizerOpenAIGPTTokenizerFastoptowlv2owlvit	paligemmapegasus	pegasus_x)	perceiver)PerceiverTokenizerN	persimmonphiphi3phimoe)phobert)PhobertTokenizerN
pix2structpixtralplbartPLBartTokenizer)
prophetnet)ProphetNetTokenizerNqdqbertqwen2Qwen2TokenizerQwen2TokenizerFast
qwen2_5_vlqwen2_audio	qwen2_moeqwen2_vlqwen3	qwen3_moe)rag)RagTokenizerNrealmRealmTokenizerRealmTokenizerFastrecurrent_gemmareformerReformerTokenizerReformerTokenizerFastrembertRemBertTokenizerRemBertTokenizerFast	retribertRetriBertTokenizerRetriBertTokenizerFastrobertazroberta-prelayernorm)roc_bert)RoCBertTokenizerNroformerRoFormerTokenizerRoFormerTokenizerFastrwkvseamless_m4tSeamlessM4TTokenizerSeamlessM4TTokenizerFastseamless_m4t_v2shieldgemma2siglipSiglipTokenizersiglip2speech_to_textSpeech2TextTokenizer)speech_to_text_2)Speech2Text2TokenizerNspeecht5SpeechT5Tokenizer)splinter)SplinterTokenizerSplinterTokenizerFastsqueezebertSqueezeBertTokenizerSqueezeBertTokenizerFaststablelm
starcoder2switch_transformerst5)tapas)TapasTokenizerN)tapex)TapexTokenizerN)z
transfo-xl)TransfoXLTokenizerNtvpudopUdopTokenizerUdopTokenizerFastumt5video_llavaviltvipllavavisual_bert)vits)VitsTokenizerN)wav2vec2rr   )zwav2vec2-bertrr   )zwav2vec2-conformerrr   )wav2vec2_phoneme)Wav2Vec2PhonemeCTCTokenizerNwhisperWhisperTokenizerWhisperTokenizerFastxclipxglmXGLMTokenizerXGLMTokenizerFast)xlm)XLMTokenizerNzxlm-prophetnetXLMProphetNetTokenizerzxlm-robertazxlm-roberta-xlxlnetXLNetTokenizerXLNetTokenizerFastxmodyosozambazamba2
class_namec                    | dk(  rt         S t        j                         D ]<  \  }}| |v st        |      }t	        j
                  d| d      }	 t        ||       c S  t        j                  j                         D ]"  \  }}|D ]  }t        |dd       | k(  s|c c S  $ t	        j
                  d      }t        ||       rt        ||       S y # t        $ r Y w xY w)Nr   .ztransformers.models__name__transformers)r   r    itemsr   	importlibimport_modulegetattrAttributeErrorTOKENIZER_MAPPING_extra_contenthasattr)r  module_name
tokenizersmoduleconfig	tokenizermain_modules          /var/www/pru.catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/auto/tokenization_auto.pytokenizer_class_from_namer    s    ..&&#:#@#@#B Z#3K@K,,q->@UVFvz22 0>>DDF !
# 	!Iy*d3zA  	!! )).9K{J'{J// " s   C	CCpretrained_model_name_or_path	cache_dirforce_downloadresume_downloadproxiestokenrevisionlocal_files_only	subfolderc	                    |	j                  dd      }
|
)t        j                  dt               |t	        d      |
}|	j                  dd      }t        | t        ||||||||ddd|      }|t        j                  d       i S t        ||      }t        |d	
      5 }t        j                  |      }ddd       |d<   |S # 1 sw Y   xY w)a	  
    Loads the tokenizer configuration from a pretrained model tokenizer configuration.

    Args:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

            - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
              huggingface.co.
            - a path to a *directory* containing a configuration file saved using the
              [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.

        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model configuration should be cached if the standard
            cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the configuration files and override the cached versions if they
            exist.
        resume_download:
            Deprecated and ignored. All downloads are now resumed by default when possible.
            Will be removed in v5 of Transformers.
        proxies (`Dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `huggingface-cli login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
            identifier allowed by git.
        local_files_only (`bool`, *optional*, defaults to `False`):
            If `True`, will only try to load the tokenizer configuration from local files.
        subfolder (`str`, *optional*, defaults to `""`):
            In case the tokenizer config is located inside a subfolder of the model repo on huggingface.co, you can
            specify the folder name here.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Returns:
        `Dict`: The configuration of the tokenizer.

    Examples:

    ```python
    # Download configuration from huggingface.co and cache.
    tokenizer_config = get_tokenizer_config("google-bert/bert-base-uncased")
    # This model does not have a tokenizer config so the result will be an empty dict.
    tokenizer_config = get_tokenizer_config("FacebookAI/xlm-roberta-base")

    # Save a pretrained tokenizer locally and you can reload its config
    from transformers import AutoTokenizer

    tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
    tokenizer.save_pretrained("tokenizer-test")
    tokenizer_config = get_tokenizer_config("tokenizer-test")
    ```use_auth_tokenNrThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.V`token` and `use_auth_token` are both specified. Please set only the argument `token`._commit_hashF)r  r  r  r  r  r  r  r   _raise_exceptions_for_gated_repo%_raise_exceptions_for_missing_entries'_raise_exceptions_for_connection_errorsr  z\Could not locate the tokenizer configuration file, will try to use the model config instead.zutf-8)encoding)popwarningswarnFutureWarning
ValueErrorgetr   r   loggerinfor   openjsonload)r  r  r  r  r  r  r  r  r  kwargsr  commit_hashresolved_config_filereaderresults                  r  get_tokenizer_configr    s    R ZZ 0$7N! A	
 uvv**^T2K&%%'))..305   #rs	%&:KHK	"W	5 #6"#(F>M# #s   CCc                   N    e Zd ZdZd Ze ee      d               Ze	dd       Z
y)AutoTokenizera  
    This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library when
    created with the [`AutoTokenizer.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    c                     t        d      )Nz}AutoTokenizer is designed to be instantiated using the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method.)EnvironmentError)selfs    r  __init__zAutoTokenizer.__init__-  s    _
 	
    c           
      	   |j                  dd      }|<t        j                  dt               |j	                  dd      t        d      ||d<   |j                  dd      }d|d<   |j                  d	d      }|j                  d
d      }|j                  dd      }|j	                  dd      }	|d}
t        j	                  |d      }|:t        d| ddj                  d t        j                         D               d      |\  }}|r#|t        |      }
nt        j                  d       |
t        |      }
|
t        d| d       |
j                  |g|i |S t        |fi |}d|v r|d   |d<   |j	                  d      }d}d|v r4t        |d   t        t         f      r|d   }n|d   j	                  dd      }|t        |t"              sM|	r3t%        ||	fi |}t'        |d      d   }t)        j*                  d'i |}nt)        j                  |fd|i|}|j,                  }t/        |d      rd|j0                  v r|j0                  d   }|du}t3        |      t4        v xs% |duxr t        |      duxs t        |dz         du}t7        ||||      }|rz|rx|r|d   |d   }n|d   }t9        ||fi |}
|j                  dd      }t:        j<                  j?                  |      r|
jA                           |
j                  |g|d|i|S |[d}
|r!|jC                  d      s| d}t        |      }
|
|}t        |      }
|
t        d d       |
j                  |g|i |S t        |tD              rzt3        |jF                        t3        |jH                        urDt        j                  d |jH                  jJ                   d!|jF                  jJ                   d"       |jH                  }tM        t3        |      jN                        }|Tt4        t3        |         \  }}|r|s| |j                  |g|i |S | |j                  |g|i |S t        d#      t        d$|jJ                   d%dj                  d& t4        j                         D               d      )(a]  
        Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

        The tokenizer class to instantiate is selected based on the `model_type` property of the config object (either
        passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's missing, by
        falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                Can be either:

                    - A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
                    - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
                      using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
                    - A path or url to a single saved vocabulary file if and only if the tokenizer only requires a
                      single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
                      applicable to all derived classes)
            inputs (additional positional arguments, *optional*):
                Will be passed along to the Tokenizer `__init__()` method.
            config ([`PretrainedConfig`], *optional*)
                The configuration object used to determine the tokenizer class to instantiate.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force the (re-)download the model weights and configuration files and override the
                cached versions if they exist.
            resume_download:
                Deprecated and ignored. All downloads are now resumed by default when possible.
                Will be removed in v5 of Transformers.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.
            subfolder (`str`, *optional*):
                In case the relevant files are located inside a subfolder of the model repo on huggingface.co (e.g. for
                facebook/rag-token-base), specify it here.
            use_fast (`bool`, *optional*, defaults to `True`):
                Use a [fast Rust-based tokenizer](https://huggingface.co/docs/tokenizers/index) if it is supported for
                a given model. If a fast tokenizer is not available for a given model, a normal Python-based tokenizer
                is returned instead.
            tokenizer_type (`str`, *optional*):
                Tokenizer type to be loaded.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (additional keyword arguments, *optional*):
                Will be passed to the Tokenizer `__init__()` method. Can be used to set special tokens like
                `bos_token`, `eos_token`, `unk_token`, `sep_token`, `pad_token`, `cls_token`, `mask_token`,
                `additional_special_tokens`. See parameters in the `__init__()` for more details.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer

        >>> # Download vocabulary from huggingface.co and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

        >>> # Download vocabulary from huggingface.co (user-uploaded) and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")

        >>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
        >>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")

        >>> # Download vocabulary from huggingface.co and define model-specific arguments
        >>> tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base", add_prefix_space=True)
        ```r  Nr  r  r  r  T
_from_autouse_fasttokenizer_typetrust_remote_code	gguf_filezPassed `tokenizer_type` z3 does not exist. `tokenizer_type` should be one of z, c              3       K   | ]  }|  y wN .0cs     r  	<genexpr>z0AutoTokenizer.from_pretrained.<locals>.<genexpr>  s      Kq Ks   r  zt`use_fast` is set to `True` but the tokenizer class does not have a fast version.  Falling back to the slow version.zTokenizer class z is not currently imported.r  tokenizer_classauto_mapr  F)return_tensorsFastr   r   code_revisionz- does not exist or is not currently imported.z The encoder model config class: z3 is different from the decoder model config class: z. It is not recommended to use the `AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder specific tokenizer classes.zzThis tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed in order to use this tokenizer.z!Unrecognized configuration class z8 to build an AutoTokenizer.
Model type should be one of c              3   4   K   | ]  }|j                     y wr  )r  r  s     r  r  z0AutoTokenizer.from_pretrained.<locals>.<genexpr>  s     4bAQZZ4bs   r  )(r  r  r  r  r  r  r    joinkeysr  r  warningfrom_pretrainedr  
isinstancetuplelistr
   r   r   r   	for_modelr  r  r  typer  r   r   ospathisdirregister_for_auto_classendswithr   decoderencoder	__class__r   r  )clsr  inputsr  r  r  r  r  r  r  r  tokenizer_class_tupletokenizer_class_nametokenizer_fast_class_nametokenizer_configconfig_tokenizer_classtokenizer_auto_map	gguf_pathconfig_dicthas_remote_codehas_local_code	class_ref_tokenizer_class_candidate
model_typetokenizer_class_pytokenizer_class_fasts                              r  r  zAutoTokenizer.from_pretrained3  s   Z  $4d;%MM E zz'4(4 l  -F7OHd+#|::j$/$4d;"JJ':DAJJ{D1	 %"O$;$?$?PT$U!$, .~.>>qyy K,C,H,H,J KKLAO 
 ?T; ";,8&?@Y&ZONN= &";<P"Q& #34H3IId!eff2?223PdSYd]cdd 00MXQWX--%5n%EF>"!1!5!56G!H!))*:6F%5j%A"%5j%A%E%EoW[%\" ")f&67 +,I9 _X^ _I"6yQV"WX`"aK'11@K@F'775IZ^dF &,%;%;"vz*&///Q%+___%E",D8f):: 
"$. )*@AM Z,-Cf-LMUYY	 	 6<no
 0.q1=.q1	.q1	;IGdohnoO

?D1Aww}}:;7792?22-06J[_e  $/"O 6 ? ? G/E.Fd,K)";<U"V&,B)";<U"V& &'@&AAno  3?223PdSYd]cdd f23FNN#4+??6v~~7O7O6P Q%%+^^%=%=$> ?22 ^^F/V0E0EF
!7Hf7V4 4#5G5O;+;;<Ym\bmflmm%1=-==>[o^dohnoo$: 
 /0@0@/A B++/994bIZI_I_Ia4b+b*ccdf
 	
r  Nc                    ||t        d      |t        |t              rt        d      |t        |t              rt        d      |=|;t        |t              r+|j                  |k7  rt        d|j                   d| d      | t
        j                  v rt
        |    \  }}||}||}t
        j                  | ||f|       y)	a  
        Register a new tokenizer in this mapping.


        Args:
            config_class ([`PretrainedConfig`]):
                The configuration corresponding to the model to register.
            slow_tokenizer_class ([`PretrainedTokenizer`], *optional*):
                The slow tokenizer to register.
            fast_tokenizer_class ([`PretrainedTokenizerFast`], *optional*):
                The fast tokenizer to register.
        NzKYou need to pass either a `slow_tokenizer_class` or a `fast_tokenizer_classz:You passed a fast tokenizer in the `slow_tokenizer_class`.z:You passed a slow tokenizer in the `fast_tokenizer_class`.zThe fast tokenizer class you are passing has a `slow_tokenizer_class` attribute that is not consistent with the slow tokenizer class you passed (fast tokenizer has z and you passed z!. Fix one of those so they match!)exist_ok)r  
issubclassr   r   slow_tokenizer_classr  r  register)config_classr  fast_tokenizer_classr  existing_slowexisting_fasts         r  r  zAutoTokenizer.register  s     ',@,Hjkk+
;OQh0iYZZ+
;OQd0eYZZ !,$0/1HI$99=QQ['<<==MNbMc d!!  ,;;;+<\+J(M=#+'4$#+'4$""<2FH\1]hp"qr  )NNF)r  
__module____qualname____doc__r  classmethodr   r    r  staticmethodr  r  r  r  r  r  %  sH    
 &'>?\
 @ \
| )r )rr  r  )NFNNNNF )9r  r  r  r  r  collectionsr   typingr   r   r   r   r   configuration_utilsr
   dynamic_module_utilsr   r   modeling_gguf_pytorch_utilsr   tokenization_utilsr   tokenization_utils_baser   utilsr   r   r   r   r   r   encoder_decoderr   auto_factoryr   configuration_autor   r   r   r   r   tokenization_utils_fastr   
get_loggerr  r  r    str__annotations__r  r  CONFIG_TO_TYPEr  PathLikeboolr  r  )kvs   00r  <module>r     s      	  # > > 3 \ ? 5 <  3 *  B" 
		H	% VaUb[eHSM8C=4P.Q)QRb)T		
)C)E%4-D-F)DT		
 ?V?X(;^bcdT		
 &@W@Y(<_cdeT		
 D;R;T"7Z^_`T		
 o>U>W':]abcT		
 =T		
 *D*F&D.E.G*TT		
( 4)T		
* o>U>W':]abc+T		
, >X>Z!:`dfj kl-T		
. ?/T		
0 61T		
4 *D*F&D.E.G*T3T		
@ !3OfOh5Knr stAT		
B 2CT		
D OET		
F EGT		
H o>U>W':]abcIT		
J @W@Y)<_cdeKT		
L t5L5N1TXYZMT		
N /KbKd1GjnopOT		
P o>U>W':]abcQT		
R .ST		
V ,F,H(d0G0I,tUT		
b 2cT		
f (B(D$$,C,E(4eT		
r oF]F_/BeijksT		
v &.E.G*TuT		
D #+B+D'$CT		
R #+B+D'$QT		
^ ._T		
b ,F,H(d0G0I,taT		
n +G^G`-CfjkloT		
p 7N7P3VZ[\qT		
r 8O8Q4W[\]sT		
t )CZC\+?bfghuT		
v -JaJc/FimnowT		
z &@&BN*A*C&yT		
F 2GT		
H .IT		
J ?KT		
L 1MdMf3IlpqrMT		
N o>U>W':]abcOT		
P +G^G`-CfjklQT		
T ,F,H(d0G0I,tST		
b (B(D$$,C,E(4aT		
p (B(D$$,C,E(4oT		
| 1PgPi3Lostu}T		
@ 19P9R5X\T		
L +G^G`-CfjklMT		
N o>U>W':]abcOT		
P ?V?X(;^bcdQT		
R .H.J*PTVZ[\ST		
T ,UT		
V ;R;T7Z^_`WT		
X d>U>W$:]abcYT		
\ (4G4I0tUYZ[T		
b 6cT		
d o>U>W':]abceT		
f .gT		
h )D[D]+@cghiiT		
l (B(D$$,C,E(4kT		
z (B(D$$,C,E(4yT		
H (B(D$$,C,E(4GT		
V (B(D$$,C,E(4UT		
b _=T=V&9\`abcT		
d T8O8Q4W[\]eT		
f d9P9R5X\]^gT		
h .H.J*PTVZ[\iT		
j o>U>W':]abckT		
l _E\E^.AdhijmT		
n AXAZ*=`defoT		
p $:Q:S 6Y]^_qT		
r FsT		
t o>U>W':]abcuT		
v CwT		
x H_Ha1DgklmyT		
z /BYB[+>aefg{T		
| ;R;T7Z^_`}T		
~ +G^G`-CfjklT		
@ 7AT		
B )E\E^+AdhijCT		
D 7N7P3VZ[\ET		
F *D[D],@cghiGT		
H *D[D],@cghiIT		
J oF]F_/BeijkKT		
L !?KbKd4Gjn"opMT		
P (B(D$$,C,E(4OT		
^ (B(D$$,C,E(4]T		
j 4kT		
n -G-I)t1H1J-PTmT		
z -JaJc/Fimno{T		
| 1PgPi3Lostu}T		
~ 1PgPi3LostuT		
@ /MdMf1IlpqrAT		
B ^;R;T%7Z^_`CT		
D +JaJc-FimnoET		
H (B(D$$,C,E(4GT		
V (B(D$$,C,E(4UT		
d (B(D$$,C,E(4cT		
p 'AXAZ)=`defqT		
r ,F]F_.BeijksT		
t  "2LcLe4Hko!pquT		
v !1KbKd3Gjn opwT		
x 1PgPi3LostuyT		
| %?%AMt)@)B%{T		
H .IT		
J )D[D]+@cghiKT		
L .H.J*PTVZ[\MT		
N t7N7P3VZ[\OT		
P 8O8Q4W[\]QT		
R -G-I)tUYZ[ST		
V (B(D$$,C,E(4UT		
d *D*F&D.E.G*TcT		
p (D[D]*@cghiqT		
r G^G`0CfjklsT		
t 3uT		
x (B(D$$,C,E(4wT		
F	 (B(D$$,C,E(4E	T		
R	 (BYB[*>aefgS	T		
T	 +E+G'TSWXYU	T		
V	 1PgPi3LostuW	T		
X	 D?V?X";^bcdY	T		
Z	 4>U>W!:]abc[	T		
\	 t:Q:S6Y]^_]	T		
^	 'AXAZ)=`def_	T		
`	 T5L5N1TXYZa	T		
b	 'CZC\)?bfghc	T		
f	 &@&BN*A*C&e	T		
r	 ->U>W):]abcs	T		
t	 E\E^0Adh iju	T		
v	 ^;R;T%7Z^_`w	T		
x	 .y	T		
z	 $=T=V 9\`ab{	T		
|	 ?V?X(;^bcd}	T		
@
 'A'CO+B+D'$	T		
N
 'A'CO+B+D'$M
T		
\
  )C)E%4-D-F)D[
T		
h
 d6M6O2UYZ[i
T		
j
 t7N7P3VZ[\k
T		
l
 t7N7P3VZ[\m
T		
p
  9P9R"5X\]o
T		
v
 ?CZC\,?bfghw
T		
z
 %CZC\'?bfgy
T		
@ _=T=V&9\`abAT		
B ?V?X(;^bcdCT		
D @W@Y)<_cdeET		
F +E\E^-AdhijGT		
J *D*F&D.E.G*TIT		
X *D*F&D.E.G*TWT		
deT		
t (B(D$$,C,E(4sT		
@ 'CZC\)?bfghAT		
B &@W@Y(<_cdeCT		
D (BYB[*>aefgET		
F 4GT		
H M@W@Y+<_cdeIT		
J <S<U8[_`aKT		
L -G-I)tUYZ[MT		
N :OT		
P AXAZ*=`defQT		
T $,C,E(4ST		
` ,F]F_.BeijkaT		
b -G^G`/CfjklcT		
f $,C,E(4eT		
r *D[D],@cghisT		
v $,C,E(4uT		
D $,C,E(4CT		
P ,QT		
R 'AXAZ)=`defST		
V "(B(D$$,C,E(4UT		
d +E+G'T/F/H+dcT		
r *D*F&D.E.G*TqT		
~ /MdMf1IlpqrT		
@ +G^G`-CfjklAT		
D '#?V?X%;^bcCT		
J 5KT		
L -JaJc/FimnoMT		
N d6M6O2UYZ[OT		
R .H.J*PT2I2K.QUQT		
` ".H.J*PT2I2K.QU_T		
n (B(D$$,C,E(4mT		
z -G-I)tUYZ[{T		
~ (B(D$$,C,E(4}T		
J :T:V 6\`bfghKT		
L BMT		
N 1K1M-SWY]^_OT		
P IQT		
T 'G^G`)CfjkST		
Z $:Q:S 6Y]^_[T		
\ OD[D]-@cghi]T		
` &%?%AMt)@)B%_T		
n %?%AMt)@)B%mT		
z 0{T		
| 0}T		
~ 9T		
@ _=T=V&9\`abAT		
D 'A'CO+B+D'$CT		
R %?%AMt)@)B%QT		
^ -G^G`/Cfjkl_T		
` o>U>W':]abcaT		
b *D[D],@cghicT		
d _E\E^.AdhijeT		
f .gT		
h 9iT		
j >kT		
l CmT		
n HoT		
p +G^G`-CfjklqT		
r ?V?X(;^bcdsT		
v 'A'CO+B+D'$uT		
B ,CT		
D <V<X 8^bdhijET		
H -G-I)t1H1J-PTGT		
V !-G-I)t1H1J-PTUT		
d (B(D$$,C,E(4cT		
r -G-I)t1H1J-PTqT		
@ )C)E%4-D-F)DT		
N (B(D$$,C,E(4MT		
\ (B(D$$,C,E(4[T		
V	p %%9;RS #=#7#=#=#?@41a!Q$@# < 48 &*(,(,""l#(bkk)9#:lc2;;./0l l d^	l
 d38n%l E$)$%l sml l l^Xr Xr] As   z	