Skip to content

auto

mindnlp.transformers.models.auto.auto_factory.get_values(model_mapping)

This function takes a dictionary called 'model_mapping' as a parameter and returns a list of values from the dictionary.

PARAMETER DESCRIPTION
model_mapping

A dictionary that maps keys to values. The values can be either a single object or a list/tuple of objects.

TYPE: dict

RETURNS DESCRIPTION
list

A list containing all the values from the 'model_mapping' dictionary. If a value is a list or tuple, its elements are included in the final list.

Source code in mindnlp/transformers/models/auto/auto_factory.py
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
def get_values(model_mapping):
    """
    This function takes a dictionary called 'model_mapping' as a parameter and returns a list of values from the dictionary. 

    Args:
        model_mapping (dict): A dictionary that maps keys to values. The values can be either a single object or a list/tuple of objects.

    Returns:
        list: A list containing all the values from the 'model_mapping' dictionary. If a value is a list or tuple, its elements are included in the final list.

    Raises:
        None.

    """
    result = []
    for model in model_mapping.values():
        if isinstance(model, (list, tuple)):
            result += list(model)
        else:
            result.append(model)

    return result

mindnlp.transformers.models.auto.configuration_auto.ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = _LazyLoadAllMappings(CONFIG_ARCHIVE_MAP_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.configuration_auto.CONFIG_MAPPING = _LazyConfigMapping(CONFIG_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.configuration_auto.MODEL_NAMES_MAPPING = OrderedDict([('albert', 'ALBERT'), ('align', 'ALIGN'), ('altclip', 'AltCLIP'), ('audio-spectrogram-transformer', 'Audio Spectrogram Transformer'), ('autoformer', 'Autoformer'), ('bark', 'Bark'), ('bart', 'BART'), ('barthez', 'BARThez'), ('bartpho', 'BARTpho'), ('beit', 'BEiT'), ('bert', 'BERT'), ('bert-generation', 'Bert Generation'), ('bert-japanese', 'BertJapanese'), ('bertweet', 'BERTweet'), ('bge-m3', 'BgeM3'), ('big_bird', 'BigBird'), ('bigbird_pegasus', 'BigBird-Pegasus'), ('biogpt', 'BioGpt'), ('bit', 'BiT'), ('blenderbot', 'Blenderbot'), ('blenderbot-small', 'BlenderbotSmall'), ('blip', 'BLIP'), ('blip-2', 'BLIP-2'), ('bloom', 'BLOOM'), ('bort', 'BORT'), ('bridgetower', 'BridgeTower'), ('bros', 'BROS'), ('byt5', 'ByT5'), ('camembert', 'CamemBERT'), ('canine', 'CANINE'), ('chinese_clip', 'Chinese-CLIP'), ('chatglm', 'ChatGLM'), ('clap', 'CLAP'), ('clip', 'CLIP'), ('clip_vision_model', 'CLIPVisionModel'), ('clipseg', 'CLIPSeg'), ('clipseg_vision_model', 'CLIPSegVisionModel'), ('code_llama', 'CodeLlama'), ('codegen', 'CodeGen'), ('cohere', 'Cohere'), ('conditional_detr', 'Conditional DETR'), ('cogvlm', 'CogVLM'), ('convbert', 'ConvBERT'), ('convnext', 'ConvNeXT'), ('convnextv2', 'ConvNeXTV2'), ('cpm', 'CPM'), ('cpmant', 'CPM-Ant'), ('cpmbee', 'CPM-Bee'), ('ctrl', 'CTRL'), ('cvt', 'CvT'), ('data2vec-audio', 'Data2VecAudio'), ('data2vec-text', 'Data2VecText'), ('data2vec-vision', 'Data2VecVision'), ('deberta', 'DeBERTa'), ('deberta-v2', 'DeBERTa-v2'), ('decision_transformer', 'Decision Transformer'), ('deformable_detr', 'Deformable DETR'), ('deepseek_v2', 'Deepseek_v2'), ('deit', 'DeiT'), ('deplot', 'DePlot'), ('deta', 'DETA'), ('detr', 'DETR'), ('dialogpt', 'DialoGPT'), ('dinat', 'DiNAT'), ('dinov2', 'DINOv2'), ('distilbert', 'DistilBERT'), ('donut', 'Donut'), ('donut-swin', 'DonutSwin'), ('dit', 'DiT'), ('donut-swin', 'DonutSwin'), ('dpr', 'DPR'), ('dpt', 'DPT'), ('efficientformer', 'EfficientFormer'), ('efficientnet', 'EfficientNet'), ('electra', 'ELECTRA'), ('encodec', 'EnCodec'), ('encoder-decoder', 'Encoder decoder'), ('ernie', 'ERNIE'), ('ernie_m', 'ErnieM'), ('esm', 'ESM'), ('falcon', 'Falcon'), ('fastspeech2_conformer', 'FastSpeech2ConformerModel'), ('flan-t5', 'FLAN-T5'), ('flan-ul2', 'FLAN-UL2'), ('flaubert', 'FlauBERT'), ('flava', 'FLAVA'), ('fnet', 'FNet'), ('focalnet', 'FocalNet'), ('fsmt', 'FairSeq Machine-Translation'), ('funnel', 'Funnel Transformer'), ('fuyu', 'Fuyu'), ('gemma', 'Gemma'), ('git', 'GIT'), ('glpn', 'GLPN'), ('gpt-sw3', 'GPT-Sw3'), ('gpt2', 'OpenAI GPT-2'), ('gpt_bigcode', 'GPTBigCode'), ('gpt_neo', 'GPT Neo'), ('gpt_neox', 'GPT NeoX'), ('gpt_neox_japanese', 'GPT NeoX Japanese'), ('gpt_pangu', 'GPTPangu'), ('gptj', 'GPT-J'), ('gptsan-japanese', 'GPTSAN-japanese'), ('graphormer', 'Graphormer'), ('groupvit', 'GroupViT'), ('herbert', 'HerBERT'), ('hubert', 'Hubert'), ('ibert', 'I-BERT'), ('idefics', 'IDEFICS'), ('imagegpt', 'ImageGPT'), ('informer', 'Informer'), ('instructblip', 'InstructBLIP'), ('jukebox', 'Jukebox'), ('jetmoe', 'JetMoE'), ('kosmos-2', 'KOSMOS-2'), ('layoutlm', 'LayoutLM'), ('layoutlmv2', 'LayoutLMv2'), ('layoutlmv3', 'LayoutLMv3'), ('layoutxlm', 'LayoutXLM'), ('led', 'LED'), ('levit', 'LeViT'), ('lilt', 'LiLT'), ('llama', 'LLaMA'), ('llama2', 'Llama2'), ('llava', 'LLaVa'), ('llava_next', 'LLaVA-NeXT'), ('longformer', 'Longformer'), ('longt5', 'LongT5'), ('luke', 'LUKE'), ('lxmert', 'LXMERT'), ('m2m_100', 'M2M100'), ('mamba', 'Mamba'), ('marian', 'Marian'), ('markuplm', 'MarkupLM'), ('mask2former', 'Mask2Former'), ('maskformer', 'MaskFormer'), ('maskformer-swin', 'MaskFormerSwin'), ('matcha', 'MatCha'), ('mbart', 'mBART'), ('mbart50', 'mBART-50'), ('mctct', 'M-CTC-T'), ('mega', 'MEGA'), ('megatron-bert', 'Megatron-BERT'), ('megatron_gpt2', 'Megatron-GPT2'), ('mgp-str', 'MGP-STR'), ('minicpm', 'MiniCPM'), ('mistral', 'Mistral'), ('mixtral', 'Mixtral'), ('mluke', 'mLUKE'), ('mms', 'MMS'), ('mobilebert', 'MobileBERT'), ('mobilenet_v1', 'MobileNetV1'), ('mobilenet_v2', 'MobileNetV2'), ('mobilevit', 'MobileViT'), ('mobilevitv2', 'MobileViTV2'), ('mpnet', 'MPNet'), ('mpt', 'MPT'), ('mra', 'MRA'), ('mt5', 'MT5'), ('musicgen', 'MusicGen'), ('musicgen_melody', 'MusicGen Melody'), ('mvp', 'MVP'), ('nat', 'NAT'), ('nezha', 'Nezha'), ('nllb', 'NLLB'), ('nllb-moe', 'NLLB-MOE'), ('nougat', 'Nougat'), ('nystromformer', 'Nyströmformer'), ('olmo', 'OLMo'), ('openelm', 'OpenELM'), ('oneformer', 'OneFormer'), ('open-llama', 'OpenLlama'), ('openai-gpt', 'OpenAI GPT'), ('opt', 'OPT'), ('owlv2', 'OWLv2'), ('owlvit', 'OWL-ViT'), ('pegasus', 'Pegasus'), ('pegasus_x', 'PEGASUS-X'), ('perceiver', 'Perceiver'), ('persimmon', 'Persimmon'), ('phi', 'Phi'), ('phi3', 'Phi3'), ('phobert', 'PhoBERT'), ('pix2struct', 'Pix2Struct'), ('plbart', 'PLBart'), ('poolformer', 'PoolFormer'), ('pop2piano', 'Pop2Piano'), ('prophetnet', 'ProphetNet'), ('pvt', 'PVT'), ('qdqbert', 'QDQBert'), ('qwen2', 'Qwen2'), ('qwen2_moe', 'Qwen2MoE'), ('rag', 'RAG'), ('realm', 'REALM'), ('reformer', 'Reformer'), ('regnet', 'RegNet'), ('rembert', 'RemBERT'), ('resnet', 'ResNet'), ('roberta', 'RoBERTa'), ('roberta-prelayernorm', 'RoBERTa-PreLayerNorm'), ('roc_bert', 'RoCBert'), ('roformer', 'RoFormer'), ('rwkv', 'RWKV'), ('sam', 'SAM'), ('seamless_m4t', 'SeamlessM4T'), ('segformer', 'SegFormer'), ('sew', 'SEW'), ('sew-d', 'SEW-D'), ('speech-encoder-decoder', 'Speech Encoder decoder'), ('speech_to_text', 'Speech2Text'), ('speech_to_text_2', 'Speech2Text2'), ('speecht5', 'SpeechT5'), ('splinter', 'Splinter'), ('squeezebert', 'SqueezeBERT'), ('stablelm', 'StableLm'), ('starcoder2', 'Starcoder2'), ('swiftformer', 'SwiftFormer'), ('swin', 'Swin Transformer'), ('swin2sr', 'Swin2SR'), ('swinv2', 'Swin Transformer V2'), ('switch_transformers', 'SwitchTransformers'), ('t5', 'T5'), ('t5v1.1', 'T5v1.1'), ('table-transformer', 'Table Transformer'), ('tapas', 'TAPAS'), ('tapex', 'TAPEX'), ('time_series_transformer', 'Time Series Transformer'), ('timesformer', 'TimeSformer'), ('timm_backbone', 'TimmBackbone'), ('trajectory_transformer', 'Trajectory Transformer'), ('transfo-xl', 'Transformer-XL'), ('trocr', 'TrOCR'), ('tvlt', 'TVLT'), ('ul2', 'UL2'), ('udop', 'UDOP'), ('umt5', 'UMT5'), ('unispeech', 'UniSpeech'), ('unispeech-sat', 'UniSpeechSat'), ('univnet', 'UnivNet'), ('upernet', 'UPerNet'), ('van', 'VAN'), ('videomae', 'VideoMAE'), ('vilt', 'ViLT'), ('vipllava', 'VipLlava'), ('vision-encoder-decoder', 'Vision Encoder decoder'), ('vision-text-dual-encoder', 'VisionTextDualEncoder'), ('visual_bert', 'VisualBERT'), ('vit', 'ViT'), ('vit_hybrid', 'ViT Hybrid'), ('vit_mae', 'ViTMAE'), ('vit_msn', 'ViTMSN'), ('vitdet', 'VitDet'), ('vitmatte', 'ViTMatte'), ('vits', 'VITS'), ('vivit', 'ViViT'), ('wav2vec2', 'Wav2Vec2'), ('wav2vec2-bert', 'Wav2Vec2-BERT'), ('wav2vec2-conformer', 'Wav2Vec2-Conformer'), ('wav2vec2_phoneme', 'Wav2Vec2Phoneme'), ('wavlm', 'WavLM'), ('whisper', 'Whisper'), ('xclip', 'X-CLIP'), ('xglm', 'XGLM'), ('xlm', 'XLM'), ('xlm-prophetnet', 'XLM-ProphetNet'), ('xlm-roberta', 'XLM-RoBERTa'), ('xlm-roberta-xl', 'XLM-RoBERTa-XL'), ('xlm-v', 'XLM-V'), ('xlnet', 'XLNet'), ('xls_r', 'XLS-R'), ('xlsr_wav2vec2', 'XLSR-Wav2Vec2'), ('xmod', 'X-MOD'), ('yolos', 'YOLOS'), ('yoso', 'YOSO')]) module-attribute

mindnlp.transformers.models.auto.configuration_auto.AutoConfig

This is a generic configuration class that will be instantiated as one of the configuration classes of the library when created with the [~AutoConfig.from_pretrained] class method.

This class cannot be instantiated directly using __init__() (throws an error).

Source code in mindnlp/transformers/models/auto/configuration_auto.py
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
class AutoConfig:
    r"""
    This is a generic configuration class that will be instantiated as one of the configuration classes of the library
    when created with the [`~AutoConfig.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    """

    def __init__(self):
        """
        Initialize AutoConfig.

        Args:
            self: The instance of the AutoConfig class.
                It is automatically passed when the method is called.

                - Purpose: Represents the instance of the AutoConfig class.
                - Restrictions: None.

        Returns:
            None.

        Raises:
            EnvironmentError:
                If the AutoConfig is instantiated directly using the `__init__` method,
                it raises an EnvironmentError with the message
                'AutoConfig is designed to be instantiated using the `
                AutoConfig.from_pretrained(pretrained_model_name_or_path)` method.'.
        """
        raise EnvironmentError(
            "AutoConfig is designed to be instantiated "
            "using the `AutoConfig.from_pretrained(pretrained_model_name_or_path)` method."
        )

    @classmethod
    def for_model(cls, model_type: str, *args, **kwargs):
        """
                This class method 'for_model' in the 'AutoConfig' class is used to instantiate a configuration class based on the provided model type.

        Args:
            cls (class): The class itself, automatically passed as the first parameter.
            model_type (str): A string representing the type of the model for which the configuration class needs to be instantiated.
                It must be a key within the CONFIG_MAPPING dictionary.

        Returns:
            None: This method does not return any value directly.
                It instantiates and returns an instance of the appropriate configuration class based on the model type.

        Raises:
            ValueError:
                Raised when the provided 'model_type' is not recognized or is not found as a key in the CONFIG_MAPPING dictionary.
                The exception message indicates the unrecognized model identifier and lists all valid model identifiers
                available in the CONFIG_MAPPING dictionary.
        """
        if model_type in CONFIG_MAPPING:
            config_class = CONFIG_MAPPING[model_type]
            return config_class(*args, **kwargs)
        raise ValueError(
            f"Unrecognized model identifier: {model_type}. Should contain one of {', '.join(CONFIG_MAPPING.keys())}"
        )

    @classmethod
    @replace_list_option_in_docstrings()
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate one of the configuration classes of the library from a pretrained model configuration.

        The configuration class to instantiate is selected based on the `model_type` property of the config object that
        is loaded, or when it's missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                Can be either:

                - A string, the *model id* of a pretrained model configuration hosted inside a model repo on
                hf-mirror.com. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
                namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
                - A path to a *directory* containing a configuration file saved using the
                [`~PretrainedConfig.save_pretrained`] method, or the [`~PreTrainedModel.save_pretrained`] method,
                e.g., `./my_model_directory/`.
                - A path or url to a saved configuration JSON *file*, e.g.,
                `./my_model_directory/configuration.json`.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force the (re-)download the model weights and configuration files and override the
                cached versions if they exist.
            resume_download (`bool`, *optional*, defaults to `False`):
                Whether or not to delete incompletely received files. Will attempt to resume the download if such a
                file exists.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
                identifier allowed by git.
            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final configuration object.
                If `True`, then this functions returns a `Tuple(config, unused_kwargs)` where *unused_kwargs* is a
                dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e., the
                part of `kwargs` which has not been used to update `config` and is otherwise ignored.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs(additional keyword arguments, *optional*):
                The values in kwargs of any keys which are configuration attributes will be used to override the loaded
                values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
                by the `return_unused_kwargs` keyword parameter.

        Example:
            ```python
            >>> from transformers import AutoConfig
            ...
            >>> # Download configuration from hf-mirror.com and cache.
            >>> config = AutoConfig.from_pretrained("bert-base-uncased")
            ...
            >>> # Download configuration from hf-mirror.com (user-uploaded) and cache.
            >>> config = AutoConfig.from_pretrained("dbmdz/bert-base-german-cased")
            ...
            >>> # If configuration file is in a directory (e.g., was saved using *save_pretrained('./test/saved_model/')*).
            >>> config = AutoConfig.from_pretrained("./test/bert_saved_model/")
            ...
            >>> # Load a specific configuration file.
            >>> config = AutoConfig.from_pretrained("./test/bert_saved_model/my_configuration.json")
            ...
            >>> # Change some config attributes when loading a pretrained config.
            >>> config = AutoConfig.from_pretrained("bert-base-uncased", output_attentions=True, foo=False)
            >>> config.output_attentions
            True
            >>> config, unused_kwargs = AutoConfig.from_pretrained(
            ...     "bert-base-uncased", output_attentions=True, foo=False, return_unused_kwargs=True
            ... )
            >>> config.output_attentions
            True
            >>> unused_kwargs
            {'foo': False}
            ```
        """
        kwargs["name_or_path"] = pretrained_model_name_or_path

        config_dict, unused_kwargs = PretrainedConfig.get_config_dict(
            pretrained_model_name_or_path, **kwargs
        )
        if "model_type" in config_dict:
            config_class = CONFIG_MAPPING[config_dict["model_type"]]
            return config_class.from_dict(config_dict, **unused_kwargs)
        # Fallback: use pattern matching on the string.
        # We go from longer names to shorter names to catch roberta before bert (for instance)
        for pattern in sorted(CONFIG_MAPPING.keys(), key=len, reverse=True):
            if pattern in str(pretrained_model_name_or_path).lower():
                return CONFIG_MAPPING[pattern].from_dict(config_dict, **unused_kwargs)

        raise ValueError(
            f"Unrecognized model in {pretrained_model_name_or_path}. "
            f"Should have a `model_type` key in its {CONFIG_NAME}, or contain one of the following strings "
            f"in its name: {', '.join(CONFIG_MAPPING.keys())}"
        )

    @staticmethod
    def register(model_type, config, exist_ok=False):
        """
        Register a new configuration for this class.

        Args:
            model_type (`str`): The model type like "bert" or "gpt".
            config ([`PretrainedConfig`]): The config to register.
        """
        if issubclass(config, PretrainedConfig) and config.model_type != model_type:
            raise ValueError(
                "The config you are passing has a `model_type` attribute that is not consistent with the model type "
                f"you passed (config has {config.model_type} and you passed {model_type}. Fix one of those so they "
                "match!"
            )
        CONFIG_MAPPING.register(model_type, config, exist_ok=exist_ok)

mindnlp.transformers.models.auto.configuration_auto.AutoConfig.__init__()

Initialize AutoConfig.

PARAMETER DESCRIPTION
self

The instance of the AutoConfig class. It is automatically passed when the method is called.

  • Purpose: Represents the instance of the AutoConfig class.
  • Restrictions: None.

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
EnvironmentError

If the AutoConfig is instantiated directly using the __init__ method, it raises an EnvironmentError with the message 'AutoConfig is designed to be instantiated using the AutoConfig.from_pretrained(pretrained_model_name_or_path) method.'.

Source code in mindnlp/transformers/models/auto/configuration_auto.py
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
def __init__(self):
    """
    Initialize AutoConfig.

    Args:
        self: The instance of the AutoConfig class.
            It is automatically passed when the method is called.

            - Purpose: Represents the instance of the AutoConfig class.
            - Restrictions: None.

    Returns:
        None.

    Raises:
        EnvironmentError:
            If the AutoConfig is instantiated directly using the `__init__` method,
            it raises an EnvironmentError with the message
            'AutoConfig is designed to be instantiated using the `
            AutoConfig.from_pretrained(pretrained_model_name_or_path)` method.'.
    """
    raise EnvironmentError(
        "AutoConfig is designed to be instantiated "
        "using the `AutoConfig.from_pretrained(pretrained_model_name_or_path)` method."
    )

mindnlp.transformers.models.auto.configuration_auto.AutoConfig.for_model(model_type, *args, **kwargs) classmethod

    This class method 'for_model' in the 'AutoConfig' class is used to instantiate a configuration class based on the provided model type.
PARAMETER DESCRIPTION
cls

The class itself, automatically passed as the first parameter.

TYPE: class

model_type

A string representing the type of the model for which the configuration class needs to be instantiated. It must be a key within the CONFIG_MAPPING dictionary.

TYPE: str

RETURNS DESCRIPTION
None

This method does not return any value directly. It instantiates and returns an instance of the appropriate configuration class based on the model type.

RAISES DESCRIPTION
ValueError

Raised when the provided 'model_type' is not recognized or is not found as a key in the CONFIG_MAPPING dictionary. The exception message indicates the unrecognized model identifier and lists all valid model identifiers available in the CONFIG_MAPPING dictionary.

Source code in mindnlp/transformers/models/auto/configuration_auto.py
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
@classmethod
def for_model(cls, model_type: str, *args, **kwargs):
    """
            This class method 'for_model' in the 'AutoConfig' class is used to instantiate a configuration class based on the provided model type.

    Args:
        cls (class): The class itself, automatically passed as the first parameter.
        model_type (str): A string representing the type of the model for which the configuration class needs to be instantiated.
            It must be a key within the CONFIG_MAPPING dictionary.

    Returns:
        None: This method does not return any value directly.
            It instantiates and returns an instance of the appropriate configuration class based on the model type.

    Raises:
        ValueError:
            Raised when the provided 'model_type' is not recognized or is not found as a key in the CONFIG_MAPPING dictionary.
            The exception message indicates the unrecognized model identifier and lists all valid model identifiers
            available in the CONFIG_MAPPING dictionary.
    """
    if model_type in CONFIG_MAPPING:
        config_class = CONFIG_MAPPING[model_type]
        return config_class(*args, **kwargs)
    raise ValueError(
        f"Unrecognized model identifier: {model_type}. Should contain one of {', '.join(CONFIG_MAPPING.keys())}"
    )

mindnlp.transformers.models.auto.configuration_auto.AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs) classmethod

Instantiate one of the configuration classes of the library from a pretrained model configuration.

The configuration class to instantiate is selected based on the model_type property of the config object that is loaded, or when it's missing, by falling back to using pattern matching on pretrained_model_name_or_path:

List options

PARAMETER DESCRIPTION
pretrained_model_name_or_path

Can be either:

  • A string, the model id of a pretrained model configuration hosted inside a model repo on hf-mirror.com. Valid model ids can be located at the root-level, like bert-base-uncased, or namespaced under a user or organization name, like dbmdz/bert-base-german-cased.
  • A path to a directory containing a configuration file saved using the [~PretrainedConfig.save_pretrained] method, or the [~PreTrainedModel.save_pretrained] method, e.g., ./my_model_directory/.
  • A path or url to a saved configuration JSON file, e.g., ./my_model_directory/configuration.json.

TYPE: `str` or `os.PathLike`

cache_dir

Path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used.

TYPE: `str` or `os.PathLike`, *optional*

force_download

Whether or not to force the (re-)download the model weights and configuration files and override the cached versions if they exist.

TYPE: `bool`, *optional*, defaults to `False`

resume_download

Whether or not to delete incompletely received files. Will attempt to resume the download if such a file exists.

TYPE: `bool`, *optional*, defaults to `False`

proxies

A dictionary of proxy servers to use by protocol or endpoint, e.g., {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}. The proxies are used on each request.

TYPE: `Dict[str, str]`, *optional*

revision

The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a git-based system for storing models and other artifacts on hf-mirror.com, so revision can be any identifier allowed by git.

TYPE: `str`, *optional*, defaults to `"main"`

return_unused_kwargs

If False, then this function returns just the final configuration object. If True, then this functions returns a Tuple(config, unused_kwargs) where unused_kwargs is a dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e., the part of kwargs which has not been used to update config and is otherwise ignored.

TYPE: `bool`, *optional*, defaults to `False`

trust_remote_code

Whether or not to allow for custom models defined on the Hub in their own modeling files. This option should only be set to True for repositories you trust and in which you have read the code, as it will execute code present on the Hub on your local machine.

TYPE: `bool`, *optional*, defaults to `False`

kwargs(additional

The values in kwargs of any keys which are configuration attributes will be used to override the loaded values. Behavior concerning key/value pairs whose keys are not configuration attributes is controlled by the return_unused_kwargs keyword parameter.

TYPE: keyword arguments, *optional*

Example
>>> from transformers import AutoConfig
...
>>> # Download configuration from hf-mirror.com and cache.
>>> config = AutoConfig.from_pretrained("bert-base-uncased")
...
>>> # Download configuration from hf-mirror.com (user-uploaded) and cache.
>>> config = AutoConfig.from_pretrained("dbmdz/bert-base-german-cased")
...
>>> # If configuration file is in a directory (e.g., was saved using *save_pretrained('./test/saved_model/')*).
>>> config = AutoConfig.from_pretrained("./test/bert_saved_model/")
...
>>> # Load a specific configuration file.
>>> config = AutoConfig.from_pretrained("./test/bert_saved_model/my_configuration.json")
...
>>> # Change some config attributes when loading a pretrained config.
>>> config = AutoConfig.from_pretrained("bert-base-uncased", output_attentions=True, foo=False)
>>> config.output_attentions
True
>>> config, unused_kwargs = AutoConfig.from_pretrained(
...     "bert-base-uncased", output_attentions=True, foo=False, return_unused_kwargs=True
... )
>>> config.output_attentions
True
>>> unused_kwargs
{'foo': False}
Source code in mindnlp/transformers/models/auto/configuration_auto.py
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
@classmethod
@replace_list_option_in_docstrings()
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
    r"""
    Instantiate one of the configuration classes of the library from a pretrained model configuration.

    The configuration class to instantiate is selected based on the `model_type` property of the config object that
    is loaded, or when it's missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:

    List options

    Args:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            Can be either:

            - A string, the *model id* of a pretrained model configuration hosted inside a model repo on
            hf-mirror.com. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
            namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
            - A path to a *directory* containing a configuration file saved using the
            [`~PretrainedConfig.save_pretrained`] method, or the [`~PreTrainedModel.save_pretrained`] method,
            e.g., `./my_model_directory/`.
            - A path or url to a saved configuration JSON *file*, e.g.,
            `./my_model_directory/configuration.json`.
        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model configuration should be cached if the
            standard cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force the (re-)download the model weights and configuration files and override the
            cached versions if they exist.
        resume_download (`bool`, *optional*, defaults to `False`):
            Whether or not to delete incompletely received files. Will attempt to resume the download if such a
            file exists.
        proxies (`Dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
            identifier allowed by git.
        return_unused_kwargs (`bool`, *optional*, defaults to `False`):
            If `False`, then this function returns just the final configuration object.
            If `True`, then this functions returns a `Tuple(config, unused_kwargs)` where *unused_kwargs* is a
            dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e., the
            part of `kwargs` which has not been used to update `config` and is otherwise ignored.
        trust_remote_code (`bool`, *optional*, defaults to `False`):
            Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
            should only be set to `True` for repositories you trust and in which you have read the code, as it will
            execute code present on the Hub on your local machine.
        kwargs(additional keyword arguments, *optional*):
            The values in kwargs of any keys which are configuration attributes will be used to override the loaded
            values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
            by the `return_unused_kwargs` keyword parameter.

    Example:
        ```python
        >>> from transformers import AutoConfig
        ...
        >>> # Download configuration from hf-mirror.com and cache.
        >>> config = AutoConfig.from_pretrained("bert-base-uncased")
        ...
        >>> # Download configuration from hf-mirror.com (user-uploaded) and cache.
        >>> config = AutoConfig.from_pretrained("dbmdz/bert-base-german-cased")
        ...
        >>> # If configuration file is in a directory (e.g., was saved using *save_pretrained('./test/saved_model/')*).
        >>> config = AutoConfig.from_pretrained("./test/bert_saved_model/")
        ...
        >>> # Load a specific configuration file.
        >>> config = AutoConfig.from_pretrained("./test/bert_saved_model/my_configuration.json")
        ...
        >>> # Change some config attributes when loading a pretrained config.
        >>> config = AutoConfig.from_pretrained("bert-base-uncased", output_attentions=True, foo=False)
        >>> config.output_attentions
        True
        >>> config, unused_kwargs = AutoConfig.from_pretrained(
        ...     "bert-base-uncased", output_attentions=True, foo=False, return_unused_kwargs=True
        ... )
        >>> config.output_attentions
        True
        >>> unused_kwargs
        {'foo': False}
        ```
    """
    kwargs["name_or_path"] = pretrained_model_name_or_path

    config_dict, unused_kwargs = PretrainedConfig.get_config_dict(
        pretrained_model_name_or_path, **kwargs
    )
    if "model_type" in config_dict:
        config_class = CONFIG_MAPPING[config_dict["model_type"]]
        return config_class.from_dict(config_dict, **unused_kwargs)
    # Fallback: use pattern matching on the string.
    # We go from longer names to shorter names to catch roberta before bert (for instance)
    for pattern in sorted(CONFIG_MAPPING.keys(), key=len, reverse=True):
        if pattern in str(pretrained_model_name_or_path).lower():
            return CONFIG_MAPPING[pattern].from_dict(config_dict, **unused_kwargs)

    raise ValueError(
        f"Unrecognized model in {pretrained_model_name_or_path}. "
        f"Should have a `model_type` key in its {CONFIG_NAME}, or contain one of the following strings "
        f"in its name: {', '.join(CONFIG_MAPPING.keys())}"
    )

mindnlp.transformers.models.auto.configuration_auto.AutoConfig.register(model_type, config, exist_ok=False) staticmethod

Register a new configuration for this class.

PARAMETER DESCRIPTION
model_type

The model type like "bert" or "gpt".

TYPE: `str`

config

The config to register.

TYPE: [`PretrainedConfig`]

Source code in mindnlp/transformers/models/auto/configuration_auto.py
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
@staticmethod
def register(model_type, config, exist_ok=False):
    """
    Register a new configuration for this class.

    Args:
        model_type (`str`): The model type like "bert" or "gpt".
        config ([`PretrainedConfig`]): The config to register.
    """
    if issubclass(config, PretrainedConfig) and config.model_type != model_type:
        raise ValueError(
            "The config you are passing has a `model_type` attribute that is not consistent with the model type "
            f"you passed (config has {config.model_type} and you passed {model_type}. Fix one of those so they "
            "match!"
        )
    CONFIG_MAPPING.register(model_type, config, exist_ok=exist_ok)

mindnlp.transformers.models.auto.tokenization_auto.TOKENIZER_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TOKENIZER_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.tokenization_auto.AutoTokenizer

This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library when created with the [AutoTokenizer.from_pretrained] class method.

This class cannot be instantiated directly using __init__() (throws an error).

Source code in mindnlp/transformers/models/auto/tokenization_auto.py
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
class AutoTokenizer:
    r"""
    This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library when
    created with the [`AutoTokenizer.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    """

    def __init__(self):
        """
        This method initializes an instance of the AutoTokenizer class.

        Args:
            self: The instance of the AutoTokenizer class.

        Returns:
            None.

        Raises:
            EnvironmentError: If the AutoTokenizer is instantiated directly using the __init__ method,
                an EnvironmentError is raised with the message 'AutoTokenizer is designed to be instantiated using the
                `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method.'
        """
        raise EnvironmentError(
            "AutoTokenizer is designed to be instantiated "
            "using the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method."
        )

    @classmethod
    @replace_list_option_in_docstrings(TOKENIZER_MAPPING_NAMES)
    def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
        r"""
        Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

        The tokenizer class to instantiate is selected based on the `model_type` property of the config object (either
        passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's missing, by
        falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                Can be either:

                - A string, the *model id* of a predefined tokenizer hosted inside a model repo on hf-mirror.com.
                Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
                user or organization name, like `dbmdz/bert-base-german-cased`.
                - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
                using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
                - A path or url to a single saved vocabulary file if and only if the tokenizer only requires a
                single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
                applicable to all derived classes)
            inputs (additional positional arguments, *optional*):
                Will be passed along to the Tokenizer `__init__()` method.
            config ([`PretrainedConfig`], *optional*)
                The configuration object used to determine the tokenizer class to instantiate.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force the (re-)download the model weights and configuration files and override the
                cached versions if they exist.
            resume_download (`bool`, *optional*, defaults to `False`):
                Whether or not to delete incompletely received files. Will attempt to resume the download if such a
                file exists.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
                identifier allowed by git.
            subfolder (`str`, *optional*):
                In case the relevant files are located inside a subfolder of the model repo on hf-mirror.com (e.g. for
                facebook/rag-token-base), specify it here.
            use_fast (`bool`, *optional*, defaults to `True`):
                Use a [fast Rust-based tokenizer](https://hf-mirror.com/docs/tokenizers/index) if it is supported for
                a given model. If a fast tokenizer is not available for a given model, a normal Python-based tokenizer
                is returned instead.
            tokenizer_type (`str`, *optional*):
                Tokenizer type to be loaded.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (additional keyword arguments, *optional*):
                Will be passed to the Tokenizer `__init__()` method. Can be used to set special tokens like
                `bos_token`, `eos_token`, `unk_token`, `sep_token`, `pad_token`, `cls_token`, `mask_token`,
                `additional_special_tokens`. See parameters in the `__init__()` for more details.

        Example:
            ```python
            >>> from transformers import AutoTokenizer
            ...
            >>> # Download vocabulary from hf-mirror.com and cache.
            >>> tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
            ...
            >>> # Download vocabulary from hf-mirror.com (user-uploaded) and cache.
            >>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")
            ...
            >>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
            >>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")
            ...
            >>> # Download vocabulary from hf-mirror.com and define model-specific arguments
            >>> tokenizer = AutoTokenizer.from_pretrained("roberta-base", add_prefix_space=True)
            ```
        """
        use_fast = kwargs.pop("use_fast", True)
        tokenizer_type = kwargs.pop("tokenizer_type", None)
        config = kwargs.pop("config", None)

        # First, let's see whether the tokenizer_type is passed so that we can leverage it
        if tokenizer_type is not None:
            tokenizer_class = None
            tokenizer_class_tuple = TOKENIZER_MAPPING_NAMES.get(tokenizer_type, None)

            if tokenizer_class_tuple is None:
                raise ValueError(
                    f"Passed `tokenizer_type` {tokenizer_type} does not exist. `tokenizer_type` should be one of "
                    f"{', '.join(c for c in TOKENIZER_MAPPING_NAMES.keys())}."
                )

            tokenizer_class_name, tokenizer_fast_class_name = tokenizer_class_tuple

            if use_fast:
                if tokenizer_fast_class_name is not None:
                    tokenizer_class = tokenizer_class_from_name(
                        tokenizer_fast_class_name
                    )
                else:
                    logger.warning(
                        "`use_fast` is set to `True` but the tokenizer class does not have a fast version. "
                        " Falling back to the slow version."
                    )
            if tokenizer_class is None:
                tokenizer_class = tokenizer_class_from_name(tokenizer_class_name)

            if tokenizer_class is None:
                raise ValueError(
                    f"Tokenizer class {tokenizer_class_name} is not currently imported."
                )

            return tokenizer_class.from_pretrained(
                pretrained_model_name_or_path, *inputs, **kwargs
            )

        # Next, let's try to use the tokenizer_config file to get the tokenizer class.
        tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)
        if "_commit_hash" in tokenizer_config:
            kwargs["_commit_hash"] = tokenizer_config["_commit_hash"]
        config_tokenizer_class = tokenizer_config.get("tokenizer_class")

        # If that did not work, let's try to use the config.
        if config_tokenizer_class is None:
            if config is None or not isinstance(config, PretrainedConfig):
                config = AutoConfig.from_pretrained(
                    pretrained_model_name_or_path, **kwargs
                )
            config_tokenizer_class = config.tokenizer_class

        if config_tokenizer_class is not None:
            tokenizer_class = None
            if use_fast and not config_tokenizer_class.endswith("Fast"):
                tokenizer_class_candidate = f"{config_tokenizer_class}Fast"
                tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
            if tokenizer_class is None:
                tokenizer_class_candidate = config_tokenizer_class
                tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
            if tokenizer_class is None:
                raise ValueError(
                    f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
                )
            return tokenizer_class.from_pretrained(
                pretrained_model_name_or_path, *inputs, **kwargs
            )

        # Otherwise we have to be creative.
        # if model is an encoder decoder, the encoder tokenizer class is used by default
        if isinstance(config, EncoderDecoderConfig):
            if type(config.decoder) is not type(config.encoder):  # noqa: E721
                logger.warning(
                    f"The encoder model config class: {config.encoder.__class__} is different from the decoder model "
                    f"config class: {config.decoder.__class__}. It is not recommended to use the "
                    "`AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder "
                    "specific tokenizer classes."
                )
            config = config.encoder

        model_type = config_class_to_model_type(type(config).__name__)
        if model_type is not None:
            tokenizer_class_py, tokenizer_class_fast = TOKENIZER_MAPPING[type(config)]
            if tokenizer_class_fast and (use_fast or tokenizer_class_py is None):
                return tokenizer_class_fast.from_pretrained(
                    pretrained_model_name_or_path, *inputs, **kwargs
                )
            if tokenizer_class_py is not None:
                return tokenizer_class_py.from_pretrained(
                    pretrained_model_name_or_path, *inputs, **kwargs
                )
            raise ValueError(
                "This tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed "
                "in order to use this tokenizer."
            )

        raise ValueError(
            f"Unrecognized configuration class {config.__class__} to build an AutoTokenizer.\n"
            f"Model type should be one of {', '.join(c.__name__ for c in TOKENIZER_MAPPING.keys())}."
        )

    def register(
        config_class,
        slow_tokenizer_class=None,
        fast_tokenizer_class=None,
        exist_ok=False,
    ):  # pylint: disable=no-self-argument
        """
        Register a new tokenizer in this mapping.

        Args:
            config_class ([`PretrainedConfig`]):
                The configuration corresponding to the model to register.
            slow_tokenizer_class ([`PretrainedTokenizer`], *optional*):
                The slow tokenizer to register.
            fast_tokenizer_class ([`PretrainedTokenizerFast`], *optional*):
                The fast tokenizer to register.
        """
        if slow_tokenizer_class is None and fast_tokenizer_class is None:
            raise ValueError(
                "You need to pass either a `slow_tokenizer_class` or a `fast_tokenizer_class"
            )
        if fast_tokenizer_class is not None and issubclass(
            fast_tokenizer_class, PreTrainedTokenizer
        ):
            raise ValueError(
                "You passed a slow tokenizer in the `fast_tokenizer_class`."
            )

        if (
            slow_tokenizer_class is not None
            and fast_tokenizer_class is not None
            and fast_tokenizer_class.slow_tokenizer_class != slow_tokenizer_class
        ):
            raise ValueError(
                "The fast tokenizer class you are passing has a `slow_tokenizer_class` attribute that is not "
                "consistent with the slow tokenizer class you passed (fast tokenizer has "
                f"{fast_tokenizer_class.slow_tokenizer_class} and you passed {slow_tokenizer_class}. Fix one of those "
                "so they match!"
            )

        # Avoid resetting a set slow/fast tokenizer if we are passing just the other ones.
        if config_class in TOKENIZER_MAPPING._extra_content:
            existing_slow, existing_fast = TOKENIZER_MAPPING[config_class]
            if slow_tokenizer_class is None:
                slow_tokenizer_class = existing_slow
            if fast_tokenizer_class is None:
                fast_tokenizer_class = existing_fast

        TOKENIZER_MAPPING.register(
            config_class,
            (slow_tokenizer_class, fast_tokenizer_class),
            exist_ok=exist_ok,
        )

mindnlp.transformers.models.auto.tokenization_auto.AutoTokenizer.__init__()

This method initializes an instance of the AutoTokenizer class.

PARAMETER DESCRIPTION
self

The instance of the AutoTokenizer class.

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
EnvironmentError

If the AutoTokenizer is instantiated directly using the init method, an EnvironmentError is raised with the message 'AutoTokenizer is designed to be instantiated using the AutoTokenizer.from_pretrained(pretrained_model_name_or_path) method.'

Source code in mindnlp/transformers/models/auto/tokenization_auto.py
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
def __init__(self):
    """
    This method initializes an instance of the AutoTokenizer class.

    Args:
        self: The instance of the AutoTokenizer class.

    Returns:
        None.

    Raises:
        EnvironmentError: If the AutoTokenizer is instantiated directly using the __init__ method,
            an EnvironmentError is raised with the message 'AutoTokenizer is designed to be instantiated using the
            `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method.'
    """
    raise EnvironmentError(
        "AutoTokenizer is designed to be instantiated "
        "using the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method."
    )

mindnlp.transformers.models.auto.tokenization_auto.AutoTokenizer.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs) classmethod

Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

The tokenizer class to instantiate is selected based on the model_type property of the config object (either passed as an argument or loaded from pretrained_model_name_or_path if possible), or when it's missing, by falling back to using pattern matching on pretrained_model_name_or_path:

List options

PARAMETER DESCRIPTION
pretrained_model_name_or_path

Can be either:

  • A string, the model id of a predefined tokenizer hosted inside a model repo on hf-mirror.com. Valid model ids can be located at the root-level, like bert-base-uncased, or namespaced under a user or organization name, like dbmdz/bert-base-german-cased.
  • A path to a directory containing vocabulary files required by the tokenizer, for instance saved using the [~PreTrainedTokenizer.save_pretrained] method, e.g., ./my_model_directory/.
  • A path or url to a single saved vocabulary file if and only if the tokenizer only requires a single vocabulary file (like Bert or XLNet), e.g.: ./my_model_directory/vocab.txt. (Not applicable to all derived classes)

TYPE: `str` or `os.PathLike`

inputs

Will be passed along to the Tokenizer __init__() method.

TYPE: additional positional arguments, *optional* DEFAULT: ()

cache_dir

Path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used.

TYPE: `str` or `os.PathLike`, *optional*

force_download

Whether or not to force the (re-)download the model weights and configuration files and override the cached versions if they exist.

TYPE: `bool`, *optional*, defaults to `False`

resume_download

Whether or not to delete incompletely received files. Will attempt to resume the download if such a file exists.

TYPE: `bool`, *optional*, defaults to `False`

proxies

A dictionary of proxy servers to use by protocol or endpoint, e.g., {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}. The proxies are used on each request.

TYPE: `Dict[str, str]`, *optional*

revision

The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a git-based system for storing models and other artifacts on hf-mirror.com, so revision can be any identifier allowed by git.

TYPE: `str`, *optional*, defaults to `"main"`

subfolder

In case the relevant files are located inside a subfolder of the model repo on hf-mirror.com (e.g. for facebook/rag-token-base), specify it here.

TYPE: `str`, *optional*

use_fast

Use a fast Rust-based tokenizer if it is supported for a given model. If a fast tokenizer is not available for a given model, a normal Python-based tokenizer is returned instead.

TYPE: `bool`, *optional*, defaults to `True`

tokenizer_type

Tokenizer type to be loaded.

TYPE: `str`, *optional*

trust_remote_code

Whether or not to allow for custom models defined on the Hub in their own modeling files. This option should only be set to True for repositories you trust and in which you have read the code, as it will execute code present on the Hub on your local machine.

TYPE: `bool`, *optional*, defaults to `False`

kwargs

Will be passed to the Tokenizer __init__() method. Can be used to set special tokens like bos_token, eos_token, unk_token, sep_token, pad_token, cls_token, mask_token, additional_special_tokens. See parameters in the __init__() for more details.

TYPE: additional keyword arguments, *optional* DEFAULT: {}

Example
>>> from transformers import AutoTokenizer
...
>>> # Download vocabulary from hf-mirror.com and cache.
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
...
>>> # Download vocabulary from hf-mirror.com (user-uploaded) and cache.
>>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")
...
>>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
>>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")
...
>>> # Download vocabulary from hf-mirror.com and define model-specific arguments
>>> tokenizer = AutoTokenizer.from_pretrained("roberta-base", add_prefix_space=True)
Source code in mindnlp/transformers/models/auto/tokenization_auto.py
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
@classmethod
@replace_list_option_in_docstrings(TOKENIZER_MAPPING_NAMES)
def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
    r"""
    Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

    The tokenizer class to instantiate is selected based on the `model_type` property of the config object (either
    passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's missing, by
    falling back to using pattern matching on `pretrained_model_name_or_path`:

    List options

    Params:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            Can be either:

            - A string, the *model id* of a predefined tokenizer hosted inside a model repo on hf-mirror.com.
            Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
            user or organization name, like `dbmdz/bert-base-german-cased`.
            - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
            using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
            - A path or url to a single saved vocabulary file if and only if the tokenizer only requires a
            single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
            applicable to all derived classes)
        inputs (additional positional arguments, *optional*):
            Will be passed along to the Tokenizer `__init__()` method.
        config ([`PretrainedConfig`], *optional*)
            The configuration object used to determine the tokenizer class to instantiate.
        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model configuration should be cached if the
            standard cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force the (re-)download the model weights and configuration files and override the
            cached versions if they exist.
        resume_download (`bool`, *optional*, defaults to `False`):
            Whether or not to delete incompletely received files. Will attempt to resume the download if such a
            file exists.
        proxies (`Dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
            identifier allowed by git.
        subfolder (`str`, *optional*):
            In case the relevant files are located inside a subfolder of the model repo on hf-mirror.com (e.g. for
            facebook/rag-token-base), specify it here.
        use_fast (`bool`, *optional*, defaults to `True`):
            Use a [fast Rust-based tokenizer](https://hf-mirror.com/docs/tokenizers/index) if it is supported for
            a given model. If a fast tokenizer is not available for a given model, a normal Python-based tokenizer
            is returned instead.
        tokenizer_type (`str`, *optional*):
            Tokenizer type to be loaded.
        trust_remote_code (`bool`, *optional*, defaults to `False`):
            Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
            should only be set to `True` for repositories you trust and in which you have read the code, as it will
            execute code present on the Hub on your local machine.
        kwargs (additional keyword arguments, *optional*):
            Will be passed to the Tokenizer `__init__()` method. Can be used to set special tokens like
            `bos_token`, `eos_token`, `unk_token`, `sep_token`, `pad_token`, `cls_token`, `mask_token`,
            `additional_special_tokens`. See parameters in the `__init__()` for more details.

    Example:
        ```python
        >>> from transformers import AutoTokenizer
        ...
        >>> # Download vocabulary from hf-mirror.com and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
        ...
        >>> # Download vocabulary from hf-mirror.com (user-uploaded) and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")
        ...
        >>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
        >>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")
        ...
        >>> # Download vocabulary from hf-mirror.com and define model-specific arguments
        >>> tokenizer = AutoTokenizer.from_pretrained("roberta-base", add_prefix_space=True)
        ```
    """
    use_fast = kwargs.pop("use_fast", True)
    tokenizer_type = kwargs.pop("tokenizer_type", None)
    config = kwargs.pop("config", None)

    # First, let's see whether the tokenizer_type is passed so that we can leverage it
    if tokenizer_type is not None:
        tokenizer_class = None
        tokenizer_class_tuple = TOKENIZER_MAPPING_NAMES.get(tokenizer_type, None)

        if tokenizer_class_tuple is None:
            raise ValueError(
                f"Passed `tokenizer_type` {tokenizer_type} does not exist. `tokenizer_type` should be one of "
                f"{', '.join(c for c in TOKENIZER_MAPPING_NAMES.keys())}."
            )

        tokenizer_class_name, tokenizer_fast_class_name = tokenizer_class_tuple

        if use_fast:
            if tokenizer_fast_class_name is not None:
                tokenizer_class = tokenizer_class_from_name(
                    tokenizer_fast_class_name
                )
            else:
                logger.warning(
                    "`use_fast` is set to `True` but the tokenizer class does not have a fast version. "
                    " Falling back to the slow version."
                )
        if tokenizer_class is None:
            tokenizer_class = tokenizer_class_from_name(tokenizer_class_name)

        if tokenizer_class is None:
            raise ValueError(
                f"Tokenizer class {tokenizer_class_name} is not currently imported."
            )

        return tokenizer_class.from_pretrained(
            pretrained_model_name_or_path, *inputs, **kwargs
        )

    # Next, let's try to use the tokenizer_config file to get the tokenizer class.
    tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)
    if "_commit_hash" in tokenizer_config:
        kwargs["_commit_hash"] = tokenizer_config["_commit_hash"]
    config_tokenizer_class = tokenizer_config.get("tokenizer_class")

    # If that did not work, let's try to use the config.
    if config_tokenizer_class is None:
        if config is None or not isinstance(config, PretrainedConfig):
            config = AutoConfig.from_pretrained(
                pretrained_model_name_or_path, **kwargs
            )
        config_tokenizer_class = config.tokenizer_class

    if config_tokenizer_class is not None:
        tokenizer_class = None
        if use_fast and not config_tokenizer_class.endswith("Fast"):
            tokenizer_class_candidate = f"{config_tokenizer_class}Fast"
            tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
        if tokenizer_class is None:
            tokenizer_class_candidate = config_tokenizer_class
            tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
        if tokenizer_class is None:
            raise ValueError(
                f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
            )
        return tokenizer_class.from_pretrained(
            pretrained_model_name_or_path, *inputs, **kwargs
        )

    # Otherwise we have to be creative.
    # if model is an encoder decoder, the encoder tokenizer class is used by default
    if isinstance(config, EncoderDecoderConfig):
        if type(config.decoder) is not type(config.encoder):  # noqa: E721
            logger.warning(
                f"The encoder model config class: {config.encoder.__class__} is different from the decoder model "
                f"config class: {config.decoder.__class__}. It is not recommended to use the "
                "`AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder "
                "specific tokenizer classes."
            )
        config = config.encoder

    model_type = config_class_to_model_type(type(config).__name__)
    if model_type is not None:
        tokenizer_class_py, tokenizer_class_fast = TOKENIZER_MAPPING[type(config)]
        if tokenizer_class_fast and (use_fast or tokenizer_class_py is None):
            return tokenizer_class_fast.from_pretrained(
                pretrained_model_name_or_path, *inputs, **kwargs
            )
        if tokenizer_class_py is not None:
            return tokenizer_class_py.from_pretrained(
                pretrained_model_name_or_path, *inputs, **kwargs
            )
        raise ValueError(
            "This tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed "
            "in order to use this tokenizer."
        )

    raise ValueError(
        f"Unrecognized configuration class {config.__class__} to build an AutoTokenizer.\n"
        f"Model type should be one of {', '.join(c.__name__ for c in TOKENIZER_MAPPING.keys())}."
    )

mindnlp.transformers.models.auto.tokenization_auto.AutoTokenizer.register(config_class, slow_tokenizer_class=None, fast_tokenizer_class=None, exist_ok=False)

Register a new tokenizer in this mapping.

PARAMETER DESCRIPTION
config_class

The configuration corresponding to the model to register.

TYPE: [`PretrainedConfig`]

slow_tokenizer_class

The slow tokenizer to register.

TYPE: [`PretrainedTokenizer`], *optional* DEFAULT: None

fast_tokenizer_class

The fast tokenizer to register.

TYPE: [`PretrainedTokenizerFast`], *optional* DEFAULT: None

Source code in mindnlp/transformers/models/auto/tokenization_auto.py
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
def register(
    config_class,
    slow_tokenizer_class=None,
    fast_tokenizer_class=None,
    exist_ok=False,
):  # pylint: disable=no-self-argument
    """
    Register a new tokenizer in this mapping.

    Args:
        config_class ([`PretrainedConfig`]):
            The configuration corresponding to the model to register.
        slow_tokenizer_class ([`PretrainedTokenizer`], *optional*):
            The slow tokenizer to register.
        fast_tokenizer_class ([`PretrainedTokenizerFast`], *optional*):
            The fast tokenizer to register.
    """
    if slow_tokenizer_class is None and fast_tokenizer_class is None:
        raise ValueError(
            "You need to pass either a `slow_tokenizer_class` or a `fast_tokenizer_class"
        )
    if fast_tokenizer_class is not None and issubclass(
        fast_tokenizer_class, PreTrainedTokenizer
    ):
        raise ValueError(
            "You passed a slow tokenizer in the `fast_tokenizer_class`."
        )

    if (
        slow_tokenizer_class is not None
        and fast_tokenizer_class is not None
        and fast_tokenizer_class.slow_tokenizer_class != slow_tokenizer_class
    ):
        raise ValueError(
            "The fast tokenizer class you are passing has a `slow_tokenizer_class` attribute that is not "
            "consistent with the slow tokenizer class you passed (fast tokenizer has "
            f"{fast_tokenizer_class.slow_tokenizer_class} and you passed {slow_tokenizer_class}. Fix one of those "
            "so they match!"
        )

    # Avoid resetting a set slow/fast tokenizer if we are passing just the other ones.
    if config_class in TOKENIZER_MAPPING._extra_content:
        existing_slow, existing_fast = TOKENIZER_MAPPING[config_class]
        if slow_tokenizer_class is None:
            slow_tokenizer_class = existing_slow
        if fast_tokenizer_class is None:
            fast_tokenizer_class = existing_fast

    TOKENIZER_MAPPING.register(
        config_class,
        (slow_tokenizer_class, fast_tokenizer_class),
        exist_ok=exist_ok,
    )

mindnlp.transformers.models.auto.feature_extraction_auto.FEATURE_EXTRACTOR_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, FEATURE_EXTRACTOR_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.feature_extraction_auto.AutoFeatureExtractor

This is a generic feature extractor class that will be instantiated as one of the feature extractor classes of the library when created with the [AutoFeatureExtractor.from_pretrained] class method.

This class cannot be instantiated directly using __init__() (throws an error).

Source code in mindnlp/transformers/models/auto/feature_extraction_auto.py
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
class AutoFeatureExtractor:
    r"""
    This is a generic feature extractor class that will be instantiated as one of the feature extractor classes of the
    library when created with the [`AutoFeatureExtractor.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    """
    def __init__(self):
        """
        Initializes an instance of the AutoFeatureExtractor class.

        Args:
            self: An instance of the AutoFeatureExtractor class.

        Returns:
            None.

        Raises:
            EnvironmentError: This exception is raised with the message 'AutoFeatureExtractor is designed to be
                instantiated using the `AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path)` method.'
        """
        raise EnvironmentError(
            "AutoFeatureExtractor is designed to be instantiated "
            "using the `AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path)` method."
        )

    @classmethod
    @replace_list_option_in_docstrings(FEATURE_EXTRACTOR_MAPPING_NAMES)
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate one of the feature extractor classes of the library from a pretrained model vocabulary.

        The feature extractor class to instantiate is selected based on the `model_type` property of the config object
        (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's
        missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  hf-mirror.com.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] method, e.g.,
                  `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model feature extractor should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force to (re-)download the feature extractor files and override the cached versions
                if they exist.
            resume_download (`bool`, *optional*, defaults to `False`):
                Whether or not to delete incompletely received file. Attempts to resume the download if such a file
                exists.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
            token (`str` or *bool*, *optional*):
                The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
                when running `huggingface-cli login` (stored in `~/.huggingface`).
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
                identifier allowed by git.
            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final feature extractor object. If `True`, then this
                functions returns a `Tuple(feature_extractor, unused_kwargs)` where *unused_kwargs* is a dictionary
                consisting of the key/value pairs whose keys are not feature extractor attributes: i.e., the part of
                `kwargs` which has not been used to update `feature_extractor` and is otherwise ignored.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (`Dict[str, Any]`, *optional*):
                The values in kwargs of any keys which are feature extractor attributes will be used to override the
                loaded values. Behavior concerning key/value pairs whose keys are *not* feature extractor attributes is
                controlled by the `return_unused_kwargs` keyword parameter.

        <Tip>

        Passing `token=True` is required when you want to use a private model.

        </Tip>

        Example:
            ```python
            >>> from transformers import AutoFeatureExtractor
            ...
            >>> # Download feature extractor from hf-mirror.com and cache.
            >>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
            ...
            >>> # If feature extractor files are in a directory (e.g. feature extractor was saved using *save_pretrained('./test/saved_model/')*)
            >>> # feature_extractor = AutoFeatureExtractor.from_pretrained("./test/saved_model/")
            ```
        """
        use_auth_token = kwargs.pop("use_auth_token", None)
        if use_auth_token is not None:
            warnings.warn(
                "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
                FutureWarning,
            )
            if kwargs.get("token", None) is not None:
                raise ValueError(
                    "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
                )
            kwargs["token"] = use_auth_token

        config = kwargs.pop("config", None)
        kwargs["_from_auto"] = True

        config_dict, _ = FeatureExtractionMixin.get_feature_extractor_dict(pretrained_model_name_or_path, **kwargs)
        feature_extractor_class = config_dict.get("feature_extractor_type", None)
        feature_extractor_auto_map = None
        if "AutoFeatureExtractor" in config_dict.get("auto_map", {}):
            feature_extractor_auto_map = config_dict["auto_map"]["AutoFeatureExtractor"]

        # If we don't find the feature extractor class in the feature extractor config, let's try the model config.
        if feature_extractor_class is None and feature_extractor_auto_map is None:
            if not isinstance(config, PretrainedConfig):
                config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
            # It could be in `config.feature_extractor_type``
            feature_extractor_class = getattr(config, "feature_extractor_type", None)
            if hasattr(config, "auto_map") and "AutoFeatureExtractor" in config.auto_map:
                feature_extractor_auto_map = config.auto_map["AutoFeatureExtractor"]

        if feature_extractor_class is not None:
            feature_extractor_class = feature_extractor_class_from_name(feature_extractor_class)

        if feature_extractor_class is not None:
            return feature_extractor_class.from_dict(config_dict, **kwargs)
        # Last try: we use the FEATURE_EXTRACTOR_MAPPING.
        if type(config) in FEATURE_EXTRACTOR_MAPPING:
            feature_extractor_class = FEATURE_EXTRACTOR_MAPPING[type(config)]
            return feature_extractor_class.from_dict(config_dict, **kwargs)

        raise ValueError(
            f"Unrecognized feature extractor in {pretrained_model_name_or_path}. Should have a "
            f"`feature_extractor_type` key in its {FEATURE_EXTRACTOR_NAME} of {CONFIG_NAME}, or one of the following "
            f"`model_type` keys in its {CONFIG_NAME}: {', '.join(c for c in FEATURE_EXTRACTOR_MAPPING_NAMES.keys())}"
        )

    @staticmethod
    def register(config_class, feature_extractor_class, exist_ok=False):
        """
        Register a new feature extractor for this class.

        Args:
            config_class ([`PretrainedConfig`]):
                The configuration corresponding to the model to register.
            feature_extractor_class ([`FeatureExtractorMixin`]): The feature extractor to register.
        """
        FEATURE_EXTRACTOR_MAPPING.register(config_class, feature_extractor_class, exist_ok=exist_ok)

mindnlp.transformers.models.auto.feature_extraction_auto.AutoFeatureExtractor.__init__()

Initializes an instance of the AutoFeatureExtractor class.

PARAMETER DESCRIPTION
self

An instance of the AutoFeatureExtractor class.

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
EnvironmentError

This exception is raised with the message 'AutoFeatureExtractor is designed to be instantiated using the AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path) method.'

Source code in mindnlp/transformers/models/auto/feature_extraction_auto.py
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
def __init__(self):
    """
    Initializes an instance of the AutoFeatureExtractor class.

    Args:
        self: An instance of the AutoFeatureExtractor class.

    Returns:
        None.

    Raises:
        EnvironmentError: This exception is raised with the message 'AutoFeatureExtractor is designed to be
            instantiated using the `AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path)` method.'
    """
    raise EnvironmentError(
        "AutoFeatureExtractor is designed to be instantiated "
        "using the `AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path)` method."
    )

mindnlp.transformers.models.auto.feature_extraction_auto.AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs) classmethod

Instantiate one of the feature extractor classes of the library from a pretrained model vocabulary.

The feature extractor class to instantiate is selected based on the model_type property of the config object (either passed as an argument or loaded from pretrained_model_name_or_path if possible), or when it's missing, by falling back to using pattern matching on pretrained_model_name_or_path:

List options

PARAMETER DESCRIPTION
pretrained_model_name_or_path

This can be either:

  • a string, the model id of a pretrained feature_extractor hosted inside a model repo on hf-mirror.com.
  • a path to a directory containing a feature extractor file saved using the [~feature_extraction_utils.FeatureExtractionMixin.save_pretrained] method, e.g., ./my_model_directory/.
  • a path or url to a saved feature extractor JSON file, e.g., ./my_model_directory/preprocessor_config.json.

TYPE: `str` or `os.PathLike`

cache_dir

Path to a directory in which a downloaded pretrained model feature extractor should be cached if the standard cache should not be used.

TYPE: `str` or `os.PathLike`, *optional*

force_download

Whether or not to force to (re-)download the feature extractor files and override the cached versions if they exist.

TYPE: `bool`, *optional*, defaults to `False`

resume_download

Whether or not to delete incompletely received file. Attempts to resume the download if such a file exists.

TYPE: `bool`, *optional*, defaults to `False`

proxies

A dictionary of proxy servers to use by protocol or endpoint, e.g., {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}. The proxies are used on each request.

TYPE: `Dict[str, str]`, *optional*

token

The token to use as HTTP bearer authorization for remote files. If True, will use the token generated when running huggingface-cli login (stored in ~/.huggingface).

TYPE: `str` or *bool*, *optional*

revision

The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a git-based system for storing models and other artifacts on hf-mirror.com, so revision can be any identifier allowed by git.

TYPE: `str`, *optional*, defaults to `"main"`

return_unused_kwargs

If False, then this function returns just the final feature extractor object. If True, then this functions returns a Tuple(feature_extractor, unused_kwargs) where unused_kwargs is a dictionary consisting of the key/value pairs whose keys are not feature extractor attributes: i.e., the part of kwargs which has not been used to update feature_extractor and is otherwise ignored.

TYPE: `bool`, *optional*, defaults to `False`

trust_remote_code

Whether or not to allow for custom models defined on the Hub in their own modeling files. This option should only be set to True for repositories you trust and in which you have read the code, as it will execute code present on the Hub on your local machine.

TYPE: `bool`, *optional*, defaults to `False`

kwargs

The values in kwargs of any keys which are feature extractor attributes will be used to override the loaded values. Behavior concerning key/value pairs whose keys are not feature extractor attributes is controlled by the return_unused_kwargs keyword parameter.

TYPE: `Dict[str, Any]`, *optional* DEFAULT: {}

Passing token=True is required when you want to use a private model.

Example
>>> from transformers import AutoFeatureExtractor
...
>>> # Download feature extractor from hf-mirror.com and cache.
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
...
>>> # If feature extractor files are in a directory (e.g. feature extractor was saved using *save_pretrained('./test/saved_model/')*)
>>> # feature_extractor = AutoFeatureExtractor.from_pretrained("./test/saved_model/")
Source code in mindnlp/transformers/models/auto/feature_extraction_auto.py
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
@classmethod
@replace_list_option_in_docstrings(FEATURE_EXTRACTOR_MAPPING_NAMES)
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
    r"""
    Instantiate one of the feature extractor classes of the library from a pretrained model vocabulary.

    The feature extractor class to instantiate is selected based on the `model_type` property of the config object
    (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's
    missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:

    List options

    Params:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

            - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
              hf-mirror.com.
            - a path to a *directory* containing a feature extractor file saved using the
              [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] method, e.g.,
              `./my_model_directory/`.
            - a path or url to a saved feature extractor JSON *file*, e.g.,
              `./my_model_directory/preprocessor_config.json`.
        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model feature extractor should be cached if the
            standard cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the feature extractor files and override the cached versions
            if they exist.
        resume_download (`bool`, *optional*, defaults to `False`):
            Whether or not to delete incompletely received file. Attempts to resume the download if such a file
            exists.
        proxies (`Dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `huggingface-cli login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
            identifier allowed by git.
        return_unused_kwargs (`bool`, *optional*, defaults to `False`):
            If `False`, then this function returns just the final feature extractor object. If `True`, then this
            functions returns a `Tuple(feature_extractor, unused_kwargs)` where *unused_kwargs* is a dictionary
            consisting of the key/value pairs whose keys are not feature extractor attributes: i.e., the part of
            `kwargs` which has not been used to update `feature_extractor` and is otherwise ignored.
        trust_remote_code (`bool`, *optional*, defaults to `False`):
            Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
            should only be set to `True` for repositories you trust and in which you have read the code, as it will
            execute code present on the Hub on your local machine.
        kwargs (`Dict[str, Any]`, *optional*):
            The values in kwargs of any keys which are feature extractor attributes will be used to override the
            loaded values. Behavior concerning key/value pairs whose keys are *not* feature extractor attributes is
            controlled by the `return_unused_kwargs` keyword parameter.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Example:
        ```python
        >>> from transformers import AutoFeatureExtractor
        ...
        >>> # Download feature extractor from hf-mirror.com and cache.
        >>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
        ...
        >>> # If feature extractor files are in a directory (e.g. feature extractor was saved using *save_pretrained('./test/saved_model/')*)
        >>> # feature_extractor = AutoFeatureExtractor.from_pretrained("./test/saved_model/")
        ```
    """
    use_auth_token = kwargs.pop("use_auth_token", None)
    if use_auth_token is not None:
        warnings.warn(
            "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
            FutureWarning,
        )
        if kwargs.get("token", None) is not None:
            raise ValueError(
                "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
            )
        kwargs["token"] = use_auth_token

    config = kwargs.pop("config", None)
    kwargs["_from_auto"] = True

    config_dict, _ = FeatureExtractionMixin.get_feature_extractor_dict(pretrained_model_name_or_path, **kwargs)
    feature_extractor_class = config_dict.get("feature_extractor_type", None)
    feature_extractor_auto_map = None
    if "AutoFeatureExtractor" in config_dict.get("auto_map", {}):
        feature_extractor_auto_map = config_dict["auto_map"]["AutoFeatureExtractor"]

    # If we don't find the feature extractor class in the feature extractor config, let's try the model config.
    if feature_extractor_class is None and feature_extractor_auto_map is None:
        if not isinstance(config, PretrainedConfig):
            config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
        # It could be in `config.feature_extractor_type``
        feature_extractor_class = getattr(config, "feature_extractor_type", None)
        if hasattr(config, "auto_map") and "AutoFeatureExtractor" in config.auto_map:
            feature_extractor_auto_map = config.auto_map["AutoFeatureExtractor"]

    if feature_extractor_class is not None:
        feature_extractor_class = feature_extractor_class_from_name(feature_extractor_class)

    if feature_extractor_class is not None:
        return feature_extractor_class.from_dict(config_dict, **kwargs)
    # Last try: we use the FEATURE_EXTRACTOR_MAPPING.
    if type(config) in FEATURE_EXTRACTOR_MAPPING:
        feature_extractor_class = FEATURE_EXTRACTOR_MAPPING[type(config)]
        return feature_extractor_class.from_dict(config_dict, **kwargs)

    raise ValueError(
        f"Unrecognized feature extractor in {pretrained_model_name_or_path}. Should have a "
        f"`feature_extractor_type` key in its {FEATURE_EXTRACTOR_NAME} of {CONFIG_NAME}, or one of the following "
        f"`model_type` keys in its {CONFIG_NAME}: {', '.join(c for c in FEATURE_EXTRACTOR_MAPPING_NAMES.keys())}"
    )

mindnlp.transformers.models.auto.feature_extraction_auto.AutoFeatureExtractor.register(config_class, feature_extractor_class, exist_ok=False) staticmethod

Register a new feature extractor for this class.

PARAMETER DESCRIPTION
config_class

The configuration corresponding to the model to register.

TYPE: [`PretrainedConfig`]

feature_extractor_class

The feature extractor to register.

TYPE: [`FeatureExtractorMixin`]

Source code in mindnlp/transformers/models/auto/feature_extraction_auto.py
406
407
408
409
410
411
412
413
414
415
416
@staticmethod
def register(config_class, feature_extractor_class, exist_ok=False):
    """
    Register a new feature extractor for this class.

    Args:
        config_class ([`PretrainedConfig`]):
            The configuration corresponding to the model to register.
        feature_extractor_class ([`FeatureExtractorMixin`]): The feature extractor to register.
    """
    FEATURE_EXTRACTOR_MAPPING.register(config_class, feature_extractor_class, exist_ok=exist_ok)

mindnlp.transformers.models.auto.image_processing_auto.IMAGE_PROCESSOR_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, IMAGE_PROCESSOR_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.image_processing_auto.AutoImageProcessor

This is a generic image processor class that will be instantiated as one of the image processor classes of the library when created with the [AutoImageProcessor.from_pretrained] class method.

This class cannot be instantiated directly using __init__() (throws an error).

Source code in mindnlp/transformers/models/auto/image_processing_auto.py
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
class AutoImageProcessor:
    r"""
    This is a generic image processor class that will be instantiated as one of the image processor classes of the
    library when created with the [`AutoImageProcessor.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    """
    def __init__(self):
        """
        Initializes an instance of AutoImageProcessor.

        Args:
            self: The object itself.

        Returns:
            None.
        Raises:
            EnvironmentError:
                Raised when attempting to directly instantiate an AutoImageProcessor object.
                AutoImageProcessor is designed to be instantiated using the
                `AutoImageProcessor.from_pretrained(pretrained_model_name_or_path)` method.
        """
        raise EnvironmentError(
            "AutoImageProcessor is designed to be instantiated "
            "using the `AutoImageProcessor.from_pretrained(pretrained_model_name_or_path)` method."
        )

    @classmethod
    @replace_list_option_in_docstrings(IMAGE_PROCESSOR_MAPPING_NAMES)
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate one of the image processor classes of the library from a pretrained model vocabulary.

        The image processor class to instantiate is selected based on the `model_type` property of the config object
        (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's
        missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

                - a string, the *model id* of a pretrained image_processor hosted inside a model repo on
                  hf-mirror.com.
                - a path to a *directory* containing a image processor file saved using the
                  [`~image_processing_utils.ImageProcessingMixin.save_pretrained`] method, e.g.,
                  `./my_model_directory/`.
                - a path or url to a saved image processor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model image processor should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force to (re-)download the image processor files and override the cached versions if
                they exist.
            resume_download (`bool`, *optional*, defaults to `False`):
                Whether or not to delete incompletely received file. Attempts to resume the download if such a file
                exists.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
            token (`str` or *bool*, *optional*):
                The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
                when running `huggingface-cli login` (stored in `~/.huggingface`).
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
                identifier allowed by git.
            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final image processor object. If `True`, then this
                functions returns a `Tuple(image_processor, unused_kwargs)` where *unused_kwargs* is a dictionary
                consisting of the key/value pairs whose keys are not image processor attributes: i.e., the part of
                `kwargs` which has not been used to update `image_processor` and is otherwise ignored.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (`Dict[str, Any]`, *optional*):
                The values in kwargs of any keys which are image processor attributes will be used to override the
                loaded values. Behavior concerning key/value pairs whose keys are *not* image processor attributes is
                controlled by the `return_unused_kwargs` keyword parameter.

        <Tip>

        Passing `token=True` is required when you want to use a private model.

        </Tip>

        Example:
            ```python
            >>> from transformers import AutoImageProcessor
            ...
            >>> # Download image processor from hf-mirror.com and cache.
            >>> image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
            ...
            >>> # If image processor files are in a directory (e.g. image processor was saved using *save_pretrained('./test/saved_model/')*)
            >>> # image_processor = AutoImageProcessor.from_pretrained("./test/saved_model/")
            ```
        """
        use_auth_token = kwargs.pop("use_auth_token", None)
        if use_auth_token is not None:
            warnings.warn(
                "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
                FutureWarning,
            )
            if kwargs.get("token", None) is not None:
                raise ValueError(
                    "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
                )
            kwargs["token"] = use_auth_token

        config = kwargs.pop("config", None)
        kwargs["_from_auto"] = True

        config_dict, _ = ImageProcessingMixin.get_image_processor_dict(pretrained_model_name_or_path, **kwargs)
        image_processor_class = config_dict.get("image_processor_type", None)
        image_processor_auto_map = None
        if "AutoImageProcessor" in config_dict.get("auto_map", {}):
            image_processor_auto_map = config_dict["auto_map"]["AutoImageProcessor"]

        # If we still don't have the image processor class, check if we're loading from a previous feature extractor config
        # and if so, infer the image processor class from there.
        if image_processor_class is None and image_processor_auto_map is None:
            feature_extractor_class = config_dict.pop("feature_extractor_type", None)
            if feature_extractor_class is not None:
                logger.warning(
                    "Could not find image processor class in the image processor config or the model config. Loading "
                    "based on pattern matching with the model's feature extractor configuration. Please open a "
                    "PR/issue to update `preprocessor_config.json` to use `image_processor_type` instead of "
                    "`feature_extractor_type`. This warning will be removed in v4.40."
                )
                image_processor_class = feature_extractor_class.replace("FeatureExtractor", "ImageProcessor")
            if "AutoFeatureExtractor" in config_dict.get("auto_map", {}):
                feature_extractor_auto_map = config_dict["auto_map"]["AutoFeatureExtractor"]
                image_processor_auto_map = feature_extractor_auto_map.replace("FeatureExtractor", "ImageProcessor")
                logger.warning(
                    "Could not find image processor auto map in the image processor config or the model config. "
                    "Loading based on pattern matching with the model's feature extractor configuration. Please open a "
                    "PR/issue to update `preprocessor_config.json` to use `AutoImageProcessor` instead of "
                    "`AutoFeatureExtractor`. This warning will be removed in v4.40."
                )

        print(image_processor_class)
        # If we don't find the image processor class in the image processor config, let's try the model config.
        if image_processor_class is None and image_processor_auto_map is None:
            if not isinstance(config, PretrainedConfig):
                config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
            # It could be in `config.image_processor_type``
            image_processor_class = getattr(config, "image_processor_type", None)
            if hasattr(config, "auto_map") and "AutoImageProcessor" in config.auto_map:
                image_processor_auto_map = config.auto_map["AutoImageProcessor"]

        if image_processor_class is not None:
            image_processor_class = image_processor_class_from_name(image_processor_class)

        if image_processor_class is not None:
            return image_processor_class.from_dict(config_dict, **kwargs)
        # Last try: we use the IMAGE_PROCESSOR_MAPPING.
        if type(config) in IMAGE_PROCESSOR_MAPPING:
            image_processor_class = IMAGE_PROCESSOR_MAPPING[type(config)]
            return image_processor_class.from_dict(config_dict, **kwargs)

        raise ValueError(
            f"Unrecognized image processor in {pretrained_model_name_or_path}. Should have a "
            f"`image_processor_type` key in its {IMAGE_PROCESSOR_NAME} of {CONFIG_NAME}, or one of the following "
            f"`model_type` keys in its {CONFIG_NAME}: {', '.join(c for c in IMAGE_PROCESSOR_MAPPING_NAMES.keys())}"
        )

    @staticmethod
    def register(config_class, image_processor_class, exist_ok=False):
        """
        Register a new image processor for this class.

        Args:
            config_class ([`PretrainedConfig`]):
                The configuration corresponding to the model to register.
            image_processor_class ([`ImageProcessingMixin`]): The image processor to register.
        """
        IMAGE_PROCESSOR_MAPPING.register(config_class, image_processor_class, exist_ok=exist_ok)

mindnlp.transformers.models.auto.image_processing_auto.AutoImageProcessor.__init__()

Initializes an instance of AutoImageProcessor.

PARAMETER DESCRIPTION
self

The object itself.

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/auto/image_processing_auto.py
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
def __init__(self):
    """
    Initializes an instance of AutoImageProcessor.

    Args:
        self: The object itself.

    Returns:
        None.
    Raises:
        EnvironmentError:
            Raised when attempting to directly instantiate an AutoImageProcessor object.
            AutoImageProcessor is designed to be instantiated using the
            `AutoImageProcessor.from_pretrained(pretrained_model_name_or_path)` method.
    """
    raise EnvironmentError(
        "AutoImageProcessor is designed to be instantiated "
        "using the `AutoImageProcessor.from_pretrained(pretrained_model_name_or_path)` method."
    )

mindnlp.transformers.models.auto.image_processing_auto.AutoImageProcessor.from_pretrained(pretrained_model_name_or_path, **kwargs) classmethod

Instantiate one of the image processor classes of the library from a pretrained model vocabulary.

The image processor class to instantiate is selected based on the model_type property of the config object (either passed as an argument or loaded from pretrained_model_name_or_path if possible), or when it's missing, by falling back to using pattern matching on pretrained_model_name_or_path:

List options

PARAMETER DESCRIPTION
pretrained_model_name_or_path

This can be either:

  • a string, the model id of a pretrained image_processor hosted inside a model repo on hf-mirror.com.
  • a path to a directory containing a image processor file saved using the [~image_processing_utils.ImageProcessingMixin.save_pretrained] method, e.g., ./my_model_directory/.
  • a path or url to a saved image processor JSON file, e.g., ./my_model_directory/preprocessor_config.json.

TYPE: `str` or `os.PathLike`

cache_dir

Path to a directory in which a downloaded pretrained model image processor should be cached if the standard cache should not be used.

TYPE: `str` or `os.PathLike`, *optional*

force_download

Whether or not to force to (re-)download the image processor files and override the cached versions if they exist.

TYPE: `bool`, *optional*, defaults to `False`

resume_download

Whether or not to delete incompletely received file. Attempts to resume the download if such a file exists.

TYPE: `bool`, *optional*, defaults to `False`

proxies

A dictionary of proxy servers to use by protocol or endpoint, e.g., {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}. The proxies are used on each request.

TYPE: `Dict[str, str]`, *optional*

token

The token to use as HTTP bearer authorization for remote files. If True, will use the token generated when running huggingface-cli login (stored in ~/.huggingface).

TYPE: `str` or *bool*, *optional*

revision

The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a git-based system for storing models and other artifacts on hf-mirror.com, so revision can be any identifier allowed by git.

TYPE: `str`, *optional*, defaults to `"main"`

return_unused_kwargs

If False, then this function returns just the final image processor object. If True, then this functions returns a Tuple(image_processor, unused_kwargs) where unused_kwargs is a dictionary consisting of the key/value pairs whose keys are not image processor attributes: i.e., the part of kwargs which has not been used to update image_processor and is otherwise ignored.

TYPE: `bool`, *optional*, defaults to `False`

trust_remote_code

Whether or not to allow for custom models defined on the Hub in their own modeling files. This option should only be set to True for repositories you trust and in which you have read the code, as it will execute code present on the Hub on your local machine.

TYPE: `bool`, *optional*, defaults to `False`

kwargs

The values in kwargs of any keys which are image processor attributes will be used to override the loaded values. Behavior concerning key/value pairs whose keys are not image processor attributes is controlled by the return_unused_kwargs keyword parameter.

TYPE: `Dict[str, Any]`, *optional* DEFAULT: {}

Passing token=True is required when you want to use a private model.

Example
>>> from transformers import AutoImageProcessor
...
>>> # Download image processor from hf-mirror.com and cache.
>>> image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
...
>>> # If image processor files are in a directory (e.g. image processor was saved using *save_pretrained('./test/saved_model/')*)
>>> # image_processor = AutoImageProcessor.from_pretrained("./test/saved_model/")
Source code in mindnlp/transformers/models/auto/image_processing_auto.py
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
@classmethod
@replace_list_option_in_docstrings(IMAGE_PROCESSOR_MAPPING_NAMES)
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
    r"""
    Instantiate one of the image processor classes of the library from a pretrained model vocabulary.

    The image processor class to instantiate is selected based on the `model_type` property of the config object
    (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's
    missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:

    List options

    Params:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

            - a string, the *model id* of a pretrained image_processor hosted inside a model repo on
              hf-mirror.com.
            - a path to a *directory* containing a image processor file saved using the
              [`~image_processing_utils.ImageProcessingMixin.save_pretrained`] method, e.g.,
              `./my_model_directory/`.
            - a path or url to a saved image processor JSON *file*, e.g.,
              `./my_model_directory/preprocessor_config.json`.
        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model image processor should be cached if the
            standard cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the image processor files and override the cached versions if
            they exist.
        resume_download (`bool`, *optional*, defaults to `False`):
            Whether or not to delete incompletely received file. Attempts to resume the download if such a file
            exists.
        proxies (`Dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `huggingface-cli login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
            identifier allowed by git.
        return_unused_kwargs (`bool`, *optional*, defaults to `False`):
            If `False`, then this function returns just the final image processor object. If `True`, then this
            functions returns a `Tuple(image_processor, unused_kwargs)` where *unused_kwargs* is a dictionary
            consisting of the key/value pairs whose keys are not image processor attributes: i.e., the part of
            `kwargs` which has not been used to update `image_processor` and is otherwise ignored.
        trust_remote_code (`bool`, *optional*, defaults to `False`):
            Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
            should only be set to `True` for repositories you trust and in which you have read the code, as it will
            execute code present on the Hub on your local machine.
        kwargs (`Dict[str, Any]`, *optional*):
            The values in kwargs of any keys which are image processor attributes will be used to override the
            loaded values. Behavior concerning key/value pairs whose keys are *not* image processor attributes is
            controlled by the `return_unused_kwargs` keyword parameter.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Example:
        ```python
        >>> from transformers import AutoImageProcessor
        ...
        >>> # Download image processor from hf-mirror.com and cache.
        >>> image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
        ...
        >>> # If image processor files are in a directory (e.g. image processor was saved using *save_pretrained('./test/saved_model/')*)
        >>> # image_processor = AutoImageProcessor.from_pretrained("./test/saved_model/")
        ```
    """
    use_auth_token = kwargs.pop("use_auth_token", None)
    if use_auth_token is not None:
        warnings.warn(
            "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
            FutureWarning,
        )
        if kwargs.get("token", None) is not None:
            raise ValueError(
                "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
            )
        kwargs["token"] = use_auth_token

    config = kwargs.pop("config", None)
    kwargs["_from_auto"] = True

    config_dict, _ = ImageProcessingMixin.get_image_processor_dict(pretrained_model_name_or_path, **kwargs)
    image_processor_class = config_dict.get("image_processor_type", None)
    image_processor_auto_map = None
    if "AutoImageProcessor" in config_dict.get("auto_map", {}):
        image_processor_auto_map = config_dict["auto_map"]["AutoImageProcessor"]

    # If we still don't have the image processor class, check if we're loading from a previous feature extractor config
    # and if so, infer the image processor class from there.
    if image_processor_class is None and image_processor_auto_map is None:
        feature_extractor_class = config_dict.pop("feature_extractor_type", None)
        if feature_extractor_class is not None:
            logger.warning(
                "Could not find image processor class in the image processor config or the model config. Loading "
                "based on pattern matching with the model's feature extractor configuration. Please open a "
                "PR/issue to update `preprocessor_config.json` to use `image_processor_type` instead of "
                "`feature_extractor_type`. This warning will be removed in v4.40."
            )
            image_processor_class = feature_extractor_class.replace("FeatureExtractor", "ImageProcessor")
        if "AutoFeatureExtractor" in config_dict.get("auto_map", {}):
            feature_extractor_auto_map = config_dict["auto_map"]["AutoFeatureExtractor"]
            image_processor_auto_map = feature_extractor_auto_map.replace("FeatureExtractor", "ImageProcessor")
            logger.warning(
                "Could not find image processor auto map in the image processor config or the model config. "
                "Loading based on pattern matching with the model's feature extractor configuration. Please open a "
                "PR/issue to update `preprocessor_config.json` to use `AutoImageProcessor` instead of "
                "`AutoFeatureExtractor`. This warning will be removed in v4.40."
            )

    print(image_processor_class)
    # If we don't find the image processor class in the image processor config, let's try the model config.
    if image_processor_class is None and image_processor_auto_map is None:
        if not isinstance(config, PretrainedConfig):
            config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
        # It could be in `config.image_processor_type``
        image_processor_class = getattr(config, "image_processor_type", None)
        if hasattr(config, "auto_map") and "AutoImageProcessor" in config.auto_map:
            image_processor_auto_map = config.auto_map["AutoImageProcessor"]

    if image_processor_class is not None:
        image_processor_class = image_processor_class_from_name(image_processor_class)

    if image_processor_class is not None:
        return image_processor_class.from_dict(config_dict, **kwargs)
    # Last try: we use the IMAGE_PROCESSOR_MAPPING.
    if type(config) in IMAGE_PROCESSOR_MAPPING:
        image_processor_class = IMAGE_PROCESSOR_MAPPING[type(config)]
        return image_processor_class.from_dict(config_dict, **kwargs)

    raise ValueError(
        f"Unrecognized image processor in {pretrained_model_name_or_path}. Should have a "
        f"`image_processor_type` key in its {IMAGE_PROCESSOR_NAME} of {CONFIG_NAME}, or one of the following "
        f"`model_type` keys in its {CONFIG_NAME}: {', '.join(c for c in IMAGE_PROCESSOR_MAPPING_NAMES.keys())}"
    )

mindnlp.transformers.models.auto.image_processing_auto.AutoImageProcessor.register(config_class, image_processor_class, exist_ok=False) staticmethod

Register a new image processor for this class.

PARAMETER DESCRIPTION
config_class

The configuration corresponding to the model to register.

TYPE: [`PretrainedConfig`]

image_processor_class

The image processor to register.

TYPE: [`ImageProcessingMixin`]

Source code in mindnlp/transformers/models/auto/image_processing_auto.py
435
436
437
438
439
440
441
442
443
444
445
@staticmethod
def register(config_class, image_processor_class, exist_ok=False):
    """
    Register a new image processor for this class.

    Args:
        config_class ([`PretrainedConfig`]):
            The configuration corresponding to the model to register.
        image_processor_class ([`ImageProcessingMixin`]): The image processor to register.
    """
    IMAGE_PROCESSOR_MAPPING.register(config_class, image_processor_class, exist_ok=exist_ok)

mindnlp.transformers.models.auto.processing_auto.PROCESSOR_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, PROCESSOR_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.processing_auto.AutoProcessor

This is a generic processor class that will be instantiated as one of the processor classes of the library when created with the [AutoProcessor.from_pretrained] class method.

This class cannot be instantiated directly using __init__() (throws an error).

Source code in mindnlp/transformers/models/auto/processing_auto.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
class AutoProcessor:
    r"""
    This is a generic processor class that will be instantiated as one of the processor classes of the library when
    created with the [`AutoProcessor.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    """

    def __init__(self):
        """
        Class: AutoProcessor

        __init__(self)
            Initializes a new instance of the AutoProcessor class.

        Args:
            self (object): The instance of the AutoProcessor class.

        Returns:
            None.

        Raises:
            EnvironmentError: This method raises an EnvironmentError with the message 'AutoProcessor is designed to be
            instantiated using the `AutoProcessor.from_pretrained(pretrained_model_name_or_path)`method.'

        """
        raise EnvironmentError(
            "AutoProcessor is designed to be instantiated "
            "using the `AutoProcessor.from_pretrained(pretrained_model_name_or_path)` method."
        )

    @classmethod
    @replace_list_option_in_docstrings(PROCESSOR_MAPPING_NAMES)
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate one of the processor classes of the library from a pretrained model vocabulary.

        The processor class to instantiate is selected based on the `model_type` property of the config object (either
        passed as an argument or loaded from `pretrained_model_name_or_path` if possible):

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

               - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  hf-mirror.com.
               - a path to a *directory* containing a processor files saved using the `save_pretrained()` method,
                  e.g., `./my_model_directory/`.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model feature extractor should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force to (re-)download the feature extractor files and override the cached versions
                if they exist.
            resume_download (`bool`, *optional*, defaults to `False`):
                Whether or not to delete incompletely received file. Attempts to resume the download if such a file
                exists.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
            token (`str` or *bool*, *optional*):
                The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
                when running `huggingface-cli login` (stored in `~/.huggingface`).
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
                identifier allowed by git.
            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final feature extractor object. If `True`, then this
                functions returns a `Tuple(feature_extractor, unused_kwargs)` where *unused_kwargs* is a dictionary
                consisting of the key/value pairs whose keys are not feature extractor attributes: i.e., the part of
                `kwargs` which has not been used to update `feature_extractor` and is otherwise ignored.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (`Dict[str, Any]`, *optional*):
                The values in kwargs of any keys which are feature extractor attributes will be used to override the
                loaded values. Behavior concerning key/value pairs whose keys are *not* feature extractor attributes is
                controlled by the `return_unused_kwargs` keyword parameter.

        <Tip>

        Passing `token=True` is required when you want to use a private model.

        </Tip>

        Example:
            ```python
            >>> from transformers import AutoProcessor
            ...
            >>> # Download processor from hf-mirror.com and cache.
            >>> processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
            ...
            >>> # If processor files are in a directory (e.g. processor was saved using *save_pretrained('./test/saved_model/')*)
            >>> # processor = AutoProcessor.from_pretrained("./test/saved_model/")
            ```
        """
        use_auth_token = kwargs.pop("use_auth_token", None)
        if use_auth_token is not None:
            warnings.warn(
                "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
                FutureWarning,
            )
            if kwargs.get("token", None) is not None:
                raise ValueError(
                    "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
                )
            kwargs["token"] = use_auth_token

        config = kwargs.pop("config", None)
        trust_remote_code = kwargs.pop("trust_remote_code", None)
        kwargs["_from_auto"] = True

        processor_class = None

        # First, let's see if we have a processor or preprocessor config.
        # Filter the kwargs for `get_file_from_repo`.
        get_file_from_repo_kwargs = {
            key: kwargs[key]
            for key in inspect.signature(get_file_from_repo).parameters.keys()
            if key in kwargs
        }

        # Let's start by checking whether the processor class is saved in a processor config
        processor_config_file = get_file_from_repo(
            pretrained_model_name_or_path, PROCESSOR_NAME, **get_file_from_repo_kwargs
        )
        if processor_config_file is not None:
            config_dict, _ = ProcessorMixin.get_processor_dict(
                pretrained_model_name_or_path, **kwargs
            )
            processor_class = config_dict.get("processor_class", None)

        if processor_class is None:
            # If not found, let's check whether the processor class is saved in an image processor config
            preprocessor_config_file = get_file_from_repo(
                pretrained_model_name_or_path,
                FEATURE_EXTRACTOR_NAME,
                **get_file_from_repo_kwargs,
            )
            if preprocessor_config_file is not None:
                config_dict, _ = ImageProcessingMixin.get_image_processor_dict(
                    pretrained_model_name_or_path, **kwargs
                )
                processor_class = config_dict.get("processor_class", None)

            # If not found, let's check whether the processor class is saved in a feature extractor config
            if preprocessor_config_file is not None and processor_class is None:
                config_dict, _ = FeatureExtractionMixin.get_feature_extractor_dict(
                    pretrained_model_name_or_path, **kwargs
                )
                processor_class = config_dict.get("processor_class", None)

        if processor_class is None:
            # Next, let's check whether the processor class is saved in a tokenizer
            tokenizer_config_file = get_file_from_repo(
                pretrained_model_name_or_path,
                TOKENIZER_CONFIG_FILE,
                **get_file_from_repo_kwargs,
            )
            if tokenizer_config_file is not None:
                with open(tokenizer_config_file, encoding="utf-8") as reader:
                    config_dict = json.load(reader)

                processor_class = config_dict.get("processor_class", None)

        if processor_class is None:
            # Otherwise, load config, if it can be loaded.
            if not isinstance(config, PretrainedConfig):
                config = AutoConfig.from_pretrained(
                    pretrained_model_name_or_path,
                    trust_remote_code=trust_remote_code,
                    **kwargs,
                )

            # And check if the config contains the processor class.
            processor_class = getattr(config, "processor_class", None)

        if processor_class is not None:
            processor_class = processor_class_from_name(processor_class)

        if processor_class is not None:
            return processor_class.from_pretrained(
                pretrained_model_name_or_path,
                trust_remote_code=trust_remote_code,
                **kwargs,
            )
        # Last try: we use the PROCESSOR_MAPPING.
        if type(config) in PROCESSOR_MAPPING:
            return PROCESSOR_MAPPING[type(config)].from_pretrained(
                pretrained_model_name_or_path, **kwargs
            )

        # At this stage, there doesn't seem to be a `Processor` class available for this model, so let's try a
        # tokenizer.
        try:
            return AutoTokenizer.from_pretrained(
                pretrained_model_name_or_path,
                trust_remote_code=trust_remote_code,
                **kwargs,
            )
        except Exception:
            try:
                return AutoImageProcessor.from_pretrained(
                    pretrained_model_name_or_path,
                    trust_remote_code=trust_remote_code,
                    **kwargs,
                )
            except Exception:
                pass

            try:
                return AutoFeatureExtractor.from_pretrained(
                    pretrained_model_name_or_path,
                    trust_remote_code=trust_remote_code,
                    **kwargs,
                )
            except Exception:
                pass

        raise ValueError(
            f"Unrecognized processing class in {pretrained_model_name_or_path}. Can't instantiate a processor, a "
            "tokenizer, an image processor or a feature extractor for this model. Make sure the repository contains "
            "the files of at least one of those processing classes."
        )

    @staticmethod
    def register(config_class, processor_class, exist_ok=False):
        """
        Register a new processor for this class.

        Args:
            config_class ([`PretrainedConfig`]):
                The configuration corresponding to the model to register.
            processor_class ([`FeatureExtractorMixin`]): The processor to register.
        """
        PROCESSOR_MAPPING.register(config_class, processor_class, exist_ok=exist_ok)

mindnlp.transformers.models.auto.processing_auto.AutoProcessor.__init__()

init(self) Initializes a new instance of the AutoProcessor class.

PARAMETER DESCRIPTION
self

The instance of the AutoProcessor class.

TYPE: object

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
EnvironmentError

This method raises an EnvironmentError with the message 'AutoProcessor is designed to be

Source code in mindnlp/transformers/models/auto/processing_auto.py
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
def __init__(self):
    """
    Class: AutoProcessor

    __init__(self)
        Initializes a new instance of the AutoProcessor class.

    Args:
        self (object): The instance of the AutoProcessor class.

    Returns:
        None.

    Raises:
        EnvironmentError: This method raises an EnvironmentError with the message 'AutoProcessor is designed to be
        instantiated using the `AutoProcessor.from_pretrained(pretrained_model_name_or_path)`method.'

    """
    raise EnvironmentError(
        "AutoProcessor is designed to be instantiated "
        "using the `AutoProcessor.from_pretrained(pretrained_model_name_or_path)` method."
    )

mindnlp.transformers.models.auto.processing_auto.AutoProcessor.from_pretrained(pretrained_model_name_or_path, **kwargs) classmethod

Instantiate one of the processor classes of the library from a pretrained model vocabulary.

The processor class to instantiate is selected based on the model_type property of the config object (either passed as an argument or loaded from pretrained_model_name_or_path if possible):

List options

PARAMETER DESCRIPTION
pretrained_model_name_or_path

This can be either:

  • a string, the model id of a pretrained feature_extractor hosted inside a model repo on hf-mirror.com.
  • a path to a directory containing a processor files saved using the save_pretrained() method, e.g., ./my_model_directory/.

TYPE: `str` or `os.PathLike`

cache_dir

Path to a directory in which a downloaded pretrained model feature extractor should be cached if the standard cache should not be used.

TYPE: `str` or `os.PathLike`, *optional*

force_download

Whether or not to force to (re-)download the feature extractor files and override the cached versions if they exist.

TYPE: `bool`, *optional*, defaults to `False`

resume_download

Whether or not to delete incompletely received file. Attempts to resume the download if such a file exists.

TYPE: `bool`, *optional*, defaults to `False`

proxies

A dictionary of proxy servers to use by protocol or endpoint, e.g., {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}. The proxies are used on each request.

TYPE: `Dict[str, str]`, *optional*

token

The token to use as HTTP bearer authorization for remote files. If True, will use the token generated when running huggingface-cli login (stored in ~/.huggingface).

TYPE: `str` or *bool*, *optional*

revision

The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a git-based system for storing models and other artifacts on hf-mirror.com, so revision can be any identifier allowed by git.

TYPE: `str`, *optional*, defaults to `"main"`

return_unused_kwargs

If False, then this function returns just the final feature extractor object. If True, then this functions returns a Tuple(feature_extractor, unused_kwargs) where unused_kwargs is a dictionary consisting of the key/value pairs whose keys are not feature extractor attributes: i.e., the part of kwargs which has not been used to update feature_extractor and is otherwise ignored.

TYPE: `bool`, *optional*, defaults to `False`

trust_remote_code

Whether or not to allow for custom models defined on the Hub in their own modeling files. This option should only be set to True for repositories you trust and in which you have read the code, as it will execute code present on the Hub on your local machine.

TYPE: `bool`, *optional*, defaults to `False`

kwargs

The values in kwargs of any keys which are feature extractor attributes will be used to override the loaded values. Behavior concerning key/value pairs whose keys are not feature extractor attributes is controlled by the return_unused_kwargs keyword parameter.

TYPE: `Dict[str, Any]`, *optional* DEFAULT: {}

Passing token=True is required when you want to use a private model.

Example
>>> from transformers import AutoProcessor
...
>>> # Download processor from hf-mirror.com and cache.
>>> processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
...
>>> # If processor files are in a directory (e.g. processor was saved using *save_pretrained('./test/saved_model/')*)
>>> # processor = AutoProcessor.from_pretrained("./test/saved_model/")
Source code in mindnlp/transformers/models/auto/processing_auto.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
@classmethod
@replace_list_option_in_docstrings(PROCESSOR_MAPPING_NAMES)
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
    r"""
    Instantiate one of the processor classes of the library from a pretrained model vocabulary.

    The processor class to instantiate is selected based on the `model_type` property of the config object (either
    passed as an argument or loaded from `pretrained_model_name_or_path` if possible):

    List options

    Params:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

           - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
              hf-mirror.com.
           - a path to a *directory* containing a processor files saved using the `save_pretrained()` method,
              e.g., `./my_model_directory/`.
        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model feature extractor should be cached if the
            standard cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the feature extractor files and override the cached versions
            if they exist.
        resume_download (`bool`, *optional*, defaults to `False`):
            Whether or not to delete incompletely received file. Attempts to resume the download if such a file
            exists.
        proxies (`Dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `huggingface-cli login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on hf-mirror.com, so `revision` can be any
            identifier allowed by git.
        return_unused_kwargs (`bool`, *optional*, defaults to `False`):
            If `False`, then this function returns just the final feature extractor object. If `True`, then this
            functions returns a `Tuple(feature_extractor, unused_kwargs)` where *unused_kwargs* is a dictionary
            consisting of the key/value pairs whose keys are not feature extractor attributes: i.e., the part of
            `kwargs` which has not been used to update `feature_extractor` and is otherwise ignored.
        trust_remote_code (`bool`, *optional*, defaults to `False`):
            Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
            should only be set to `True` for repositories you trust and in which you have read the code, as it will
            execute code present on the Hub on your local machine.
        kwargs (`Dict[str, Any]`, *optional*):
            The values in kwargs of any keys which are feature extractor attributes will be used to override the
            loaded values. Behavior concerning key/value pairs whose keys are *not* feature extractor attributes is
            controlled by the `return_unused_kwargs` keyword parameter.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Example:
        ```python
        >>> from transformers import AutoProcessor
        ...
        >>> # Download processor from hf-mirror.com and cache.
        >>> processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
        ...
        >>> # If processor files are in a directory (e.g. processor was saved using *save_pretrained('./test/saved_model/')*)
        >>> # processor = AutoProcessor.from_pretrained("./test/saved_model/")
        ```
    """
    use_auth_token = kwargs.pop("use_auth_token", None)
    if use_auth_token is not None:
        warnings.warn(
            "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
            FutureWarning,
        )
        if kwargs.get("token", None) is not None:
            raise ValueError(
                "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
            )
        kwargs["token"] = use_auth_token

    config = kwargs.pop("config", None)
    trust_remote_code = kwargs.pop("trust_remote_code", None)
    kwargs["_from_auto"] = True

    processor_class = None

    # First, let's see if we have a processor or preprocessor config.
    # Filter the kwargs for `get_file_from_repo`.
    get_file_from_repo_kwargs = {
        key: kwargs[key]
        for key in inspect.signature(get_file_from_repo).parameters.keys()
        if key in kwargs
    }

    # Let's start by checking whether the processor class is saved in a processor config
    processor_config_file = get_file_from_repo(
        pretrained_model_name_or_path, PROCESSOR_NAME, **get_file_from_repo_kwargs
    )
    if processor_config_file is not None:
        config_dict, _ = ProcessorMixin.get_processor_dict(
            pretrained_model_name_or_path, **kwargs
        )
        processor_class = config_dict.get("processor_class", None)

    if processor_class is None:
        # If not found, let's check whether the processor class is saved in an image processor config
        preprocessor_config_file = get_file_from_repo(
            pretrained_model_name_or_path,
            FEATURE_EXTRACTOR_NAME,
            **get_file_from_repo_kwargs,
        )
        if preprocessor_config_file is not None:
            config_dict, _ = ImageProcessingMixin.get_image_processor_dict(
                pretrained_model_name_or_path, **kwargs
            )
            processor_class = config_dict.get("processor_class", None)

        # If not found, let's check whether the processor class is saved in a feature extractor config
        if preprocessor_config_file is not None and processor_class is None:
            config_dict, _ = FeatureExtractionMixin.get_feature_extractor_dict(
                pretrained_model_name_or_path, **kwargs
            )
            processor_class = config_dict.get("processor_class", None)

    if processor_class is None:
        # Next, let's check whether the processor class is saved in a tokenizer
        tokenizer_config_file = get_file_from_repo(
            pretrained_model_name_or_path,
            TOKENIZER_CONFIG_FILE,
            **get_file_from_repo_kwargs,
        )
        if tokenizer_config_file is not None:
            with open(tokenizer_config_file, encoding="utf-8") as reader:
                config_dict = json.load(reader)

            processor_class = config_dict.get("processor_class", None)

    if processor_class is None:
        # Otherwise, load config, if it can be loaded.
        if not isinstance(config, PretrainedConfig):
            config = AutoConfig.from_pretrained(
                pretrained_model_name_or_path,
                trust_remote_code=trust_remote_code,
                **kwargs,
            )

        # And check if the config contains the processor class.
        processor_class = getattr(config, "processor_class", None)

    if processor_class is not None:
        processor_class = processor_class_from_name(processor_class)

    if processor_class is not None:
        return processor_class.from_pretrained(
            pretrained_model_name_or_path,
            trust_remote_code=trust_remote_code,
            **kwargs,
        )
    # Last try: we use the PROCESSOR_MAPPING.
    if type(config) in PROCESSOR_MAPPING:
        return PROCESSOR_MAPPING[type(config)].from_pretrained(
            pretrained_model_name_or_path, **kwargs
        )

    # At this stage, there doesn't seem to be a `Processor` class available for this model, so let's try a
    # tokenizer.
    try:
        return AutoTokenizer.from_pretrained(
            pretrained_model_name_or_path,
            trust_remote_code=trust_remote_code,
            **kwargs,
        )
    except Exception:
        try:
            return AutoImageProcessor.from_pretrained(
                pretrained_model_name_or_path,
                trust_remote_code=trust_remote_code,
                **kwargs,
            )
        except Exception:
            pass

        try:
            return AutoFeatureExtractor.from_pretrained(
                pretrained_model_name_or_path,
                trust_remote_code=trust_remote_code,
                **kwargs,
            )
        except Exception:
            pass

    raise ValueError(
        f"Unrecognized processing class in {pretrained_model_name_or_path}. Can't instantiate a processor, a "
        "tokenizer, an image processor or a feature extractor for this model. Make sure the repository contains "
        "the files of at least one of those processing classes."
    )

mindnlp.transformers.models.auto.processing_auto.AutoProcessor.register(config_class, processor_class, exist_ok=False) staticmethod

Register a new processor for this class.

PARAMETER DESCRIPTION
config_class

The configuration corresponding to the model to register.

TYPE: [`PretrainedConfig`]

processor_class

The processor to register.

TYPE: [`FeatureExtractorMixin`]

Source code in mindnlp/transformers/models/auto/processing_auto.py
375
376
377
378
379
380
381
382
383
384
385
@staticmethod
def register(config_class, processor_class, exist_ok=False):
    """
    Register a new processor for this class.

    Args:
        config_class ([`PretrainedConfig`]):
            The configuration corresponding to the model to register.
        processor_class ([`FeatureExtractorMixin`]): The processor to register.
    """
    PROCESSOR_MAPPING.register(config_class, processor_class, exist_ok=exist_ok)

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_AUDIO_XVECTOR_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_BACKBONE_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_BACKBONE_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_CAUSAL_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_CAUSAL_LM_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_CTC_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_CTC_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_DEPTH_ESTIMATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_IMAGE_TO_IMAGE_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_MASK_GENERATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_MASK_GENERATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_MASKED_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_LM_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_MULTIPLE_CHOICE_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_OBJECT_DETECTION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_PRETRAINING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_PRETRAINING_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_TEXT_ENCODING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_TEXT_ENCODING_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_VISION_2_SEQ_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.MODEL_WITH_LM_HEAD_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_WITH_LM_HEAD_MAPPING_NAMES) module-attribute

mindnlp.transformers.models.auto.modeling_auto.AutoBackbone

Bases: _BaseAutoModelClass

Represents an AutoBackbone Python class that inherits from _BaseAutoModelClass.

The AutoBackbone class is a specialized class that provides functionality for generating automatic backbones in Python. It is designed to be used as a base class for creating custom backbone models. The class inherits from the _BaseAutoModelClass, which provides common functionality for all auto models.

Usage

To use the AutoBackbone class, simply create a new instance and customize it as needed. As a base class, it does not provide any specific attributes or methods. Its purpose is to serve as a starting point for creating custom backbone models.

Inheritance

The AutoBackbone class inherits from the _BaseAutoModelClass, which is a base class for all auto models. This allows the AutoBackbone class to leverage common functionality and adhere to a consistent interface across different auto models.

Note

It is recommended to review the documentation of the _BaseAutoModelClass for a better understanding of the common functionality and attributes available in the AutoBackbone class.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
class AutoBackbone(_BaseAutoModelClass):
    """
    Represents an AutoBackbone Python class that inherits from _BaseAutoModelClass.

    The AutoBackbone class is a specialized class that provides functionality for generating automatic backbones in Python.
    It is designed to be used as a base class for creating custom backbone models.
    The class inherits from the _BaseAutoModelClass, which provides common functionality for all auto models.

    Attributes:
        None

    Methods:
        None

    Usage:
        To use the AutoBackbone class, simply create a new instance and customize it as needed.
        As a base class, it does not provide any specific attributes or methods.
        Its purpose is to serve as a starting point for creating custom backbone models.

    Inheritance:
        The AutoBackbone class inherits from the _BaseAutoModelClass, which is a base class for all auto models.
        This allows the AutoBackbone class to leverage common functionality and adhere to a consistent interface
        across different auto models.

    Note:
        It is recommended to review the documentation of the _BaseAutoModelClass for a better understanding of
        the common functionality and attributes available in the AutoBackbone class.

    """

    _model_mapping = MODEL_FOR_BACKBONE_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModel

Bases: _BaseAutoModelClass

Represents an automated model for performing various tasks related to vehicle models.

This class inherits from _BaseAutoModelClass and provides functionalities for managing and analyzing vehicle models in an automated manner. It includes methods for data processing, model training, evaluation, and prediction. The AutoModel class serves as a foundation for building automated systems that work with vehicle models efficiently.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
class AutoModel(_BaseAutoModelClass):
    """
    Represents an automated model for performing various tasks related to vehicle models.

    This class inherits from _BaseAutoModelClass and provides functionalities for managing and analyzing vehicle models in an automated manner.
    It includes methods for data processing, model training, evaluation, and prediction.
    The AutoModel class serves as a foundation for building automated systems that work with vehicle models efficiently.
    """

    _model_mapping = MODEL_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForAudioClassification

Bases: _BaseAutoModelClass

This class represents an automatic model for audio classification tasks. It inherits from the _BaseAutoModelClass, providing functionalities for processing audio data and making predictions for classification. The class provides methods and attributes for training, evaluating, and using the model for audio classification tasks.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1683
1684
1685
1686
1687
1688
1689
1690
class AutoModelForAudioClassification(_BaseAutoModelClass):
    """
    This class represents an automatic model for audio classification tasks. It inherits from the _BaseAutoModelClass,
    providing functionalities for processing audio data and making predictions for classification.
    The class provides methods and attributes for training, evaluating, and using the model for audio classification tasks.
    """

    _model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForAudioFrameClassification

Bases: _BaseAutoModelClass

Represents an auto model for audio frame classification tasks.

This class serves as a template for creating neural network models specifically designed for audio frame classification. It inherits functionality from the _BaseAutoModelClass, providing a foundation for implementing automatic model selection and configuration.

This class is intended to be extended and customized for specific audio classification projects, allowing for efficient development and experimentation in the audio signal processing domain.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
class AutoModelForAudioFrameClassification(_BaseAutoModelClass):
    """
    Represents an auto model for audio frame classification tasks.

    This class serves as a template for creating neural network models specifically designed for audio frame classification.
    It inherits functionality from the _BaseAutoModelClass, providing a foundation for implementing automatic model selection and configuration.

    Attributes:
        Inherited attributes from _BaseAutoModelClass

    Methods:
        Inherited methods from _BaseAutoModelClass
        Additional methods for audio frame classification tasks

    This class is intended to be extended and customized for specific audio classification projects,
    allowing for efficient development and experimentation in the audio signal processing domain.
    """

    _model_mapping = MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForAudioXVector

Bases: _BaseAutoModelClass

The 'AutoModelForAudioXVector' class is a specialized class for automatic audio feature extraction using x-vectors. It is designed to provide a convenient interface for extracting audio features and performing various machine learning tasks using the x-vector representation.

This class inherits from the '_BaseAutoModelClass', which provides the basic functionality for automatic feature extraction. By inheriting from this base class, the 'AutoModelForAudioXVector' class gains access to common methods and attributes required for audio feature extraction and machine learning.

The 'AutoModelForAudioXVector' class encapsulates the logic and algorithms necessary for extracting x-vector features from audio data. It provides methods for loading audio files, preprocessing the audio data, and extracting x-vectors using a pre-trained model.

One of the key features of the 'AutoModelForAudioXVector' class is its ability to perform various machine learning tasks using the extracted x-vectors. It includes methods for tasks such as speaker identification, speaker verification, and speech recognition. These methods leverage the power of the x-vector representation to achieve accurate results.

Overall, the 'AutoModelForAudioXVector' class is a powerful tool for automatic audio feature extraction using x-vectors. It simplifies the process of extracting and working with x-vector features, enabling users to focus on their specific machine learning tasks without having to worry about the underlying implementation details.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
class AutoModelForAudioXVector(_BaseAutoModelClass):
    """
    The 'AutoModelForAudioXVector' class is a specialized class for automatic audio feature extraction using x-vectors.
    It is designed to provide a convenient interface for extracting audio features and performing various machine
    learning tasks using the x-vector representation.

    This class inherits from the '_BaseAutoModelClass', which provides the basic functionality for automatic feature extraction.
    By inheriting from this base class, the 'AutoModelForAudioXVector' class gains access to common methods and attributes
    required for audio feature extraction and machine learning.

    The 'AutoModelForAudioXVector' class encapsulates the logic and algorithms necessary for extracting x-vector features
    from audio data.
    It provides methods for loading audio files, preprocessing the audio data, and extracting x-vectors using a pre-trained model.

    One of the key features of the 'AutoModelForAudioXVector' class is its ability to perform various
    machine learning tasks using the extracted x-vectors.
    It includes methods for tasks such as speaker identification, speaker verification, and speech recognition.
    These methods leverage the power of the x-vector representation to achieve accurate results.

    Overall, the 'AutoModelForAudioXVector' class is a powerful tool for automatic audio feature extraction using x-vectors.
    It simplifies the process of extracting and working with x-vector features, enabling users to focus on
    their specific machine learning tasks without having to worry about the underlying implementation details.
    """

    _model_mapping = MODEL_FOR_AUDIO_XVECTOR_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForCausalLM

Bases: _BaseAutoModelClass

Represents a Python class for an automatic model tailored for Causal Language Modeling tasks. This class inherits from the _BaseAutoModelClass and provides functionality for training, fine-tuning, and utilizing models for causal language modeling tasks. It includes methods for loading pre-trained models, generating text sequences, and evaluating model performance.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1267
1268
1269
1270
1271
1272
1273
1274
class AutoModelForCausalLM(_BaseAutoModelClass):
    """
    Represents a Python class for an automatic model tailored for Causal Language Modeling tasks.
    This class inherits from the _BaseAutoModelClass and provides functionality for training, fine-tuning, and utilizing models for causal language modeling tasks.
    It includes methods for loading pre-trained models, generating text sequences, and evaluating model performance.
    """

    _model_mapping = MODEL_FOR_CAUSAL_LM_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForCTC

Bases: _BaseAutoModelClass

This class represents an automatic model for Connectionist Temporal Classification (CTC) tasks in Python.

The 'AutoModelForCTC' class inherits from the '_BaseAutoModelClass' class and provides a high-level interface for training, evaluating, and using CTC models. CTC is a type of sequence transduction problem where the input and output sequences have different lengths. It is commonly used in speech recognition and handwriting recognition tasks.

The 'AutoModelForCTC' class encapsulates all the necessary components for building, training, and using CTC models. It provides methods for loading data, preprocessing, model architecture selection, hyperparameter tuning, training, evaluation, and inference. It also supports various options for customization and fine-tuning.

To use this class, instantiate an object of the 'AutoModelForCTC' class and specify the desired configuration. Then, call the appropriate methods to perform the desired operations. The class takes care of handling the complexities of CTC model training and usage, allowing users to focus on their specific tasks.

Note that this class assumes a basic understanding of CTC and neural networks. It is recommended to have prior knowledge of deep learning concepts before using this class. Detailed information about CTC and neural networks can be found in relevant literature and online resources.

For more details on the available methods and functionalities of the 'AutoModelForCTC' class, refer to the documentation and code comments.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
class AutoModelForCTC(_BaseAutoModelClass):
    """
    This class represents an automatic model for Connectionist Temporal Classification (CTC) tasks in Python.

    The 'AutoModelForCTC' class inherits from the '_BaseAutoModelClass' class and provides a high-level interface for
    training, evaluating, and using CTC models.
    CTC is a type of sequence transduction problem where the input and output sequences have different lengths.
    It is commonly used in speech recognition and handwriting recognition tasks.

    The 'AutoModelForCTC' class encapsulates all the necessary components for building, training, and using CTC models.
    It provides methods for loading data, preprocessing, model architecture selection,
    hyperparameter tuning, training, evaluation, and inference.
    It also supports various options for customization and fine-tuning.

    To use this class, instantiate an object of the 'AutoModelForCTC' class and specify the desired configuration.
    Then, call the appropriate methods to perform the desired operations.
    The class takes care of handling the complexities of CTC model training and usage,
    allowing users to focus on their specific tasks.

    Note that this class assumes a basic understanding of CTC and neural networks.
    It is recommended to have prior knowledge of deep learning concepts before using this class.
    Detailed information about CTC and neural networks can be found in relevant literature and online resources.

    For more details on the available methods and functionalities of the 'AutoModelForCTC' class, refer to the documentation and code comments.

    """

    _model_mapping = MODEL_FOR_CTC_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForDepthEstimation

Bases: _BaseAutoModelClass

Represents a specialized class for automatically generating models for depth estimation tasks. This class inherits functionality from the _BaseAutoModelClass to provide a base structure for creating depth estimation models.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1620
1621
1622
1623
1624
1625
1626
class AutoModelForDepthEstimation(_BaseAutoModelClass):
    """
    Represents a specialized class for automatically generating models for depth estimation tasks.
    This class inherits functionality from the _BaseAutoModelClass to provide a base structure for creating depth estimation models.
    """

    _model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForDocumentQuestionAnswering

Bases: _BaseAutoModelClass

This class represents an auto model for document question answering tasks. It inherits from the _BaseAutoModelClass, providing functionalities for processing text input and generating answers to questions based on the provided document context.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1417
1418
1419
1420
1421
1422
1423
1424
class AutoModelForDocumentQuestionAnswering(_BaseAutoModelClass):
    """
    This class represents an auto model for document question answering tasks.
    It inherits from the _BaseAutoModelClass, providing functionalities for processing text input
    and generating answers to questions based on the provided document context.
    """

    _model_mapping = MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForImageToImage

Bases: _BaseAutoModelClass

Represents an automatic model for image-to-image tasks.

This class inherits from the _BaseAutoModelClass and provides functionality for automatically selecting and using models for image-to-image tasks. It encapsulates the logic for model selection, configuration, and inference for image-to-image transformation tasks. Users can leverage this class to streamline the process of selecting and using the most suitable model for their specific image-to-image transformation needs.

ATTRIBUTE DESCRIPTION
_BaseAutoModelClass

The base class providing foundational functionality for automatic model selection and usage.

Note

This class is designed to streamline the process of model selection and utilization for image-to-image transformation tasks. It encapsulates the underlying complexities of model selection and configuration, enabling users to focus on the specifics of their image transformation requirements.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
class AutoModelForImageToImage(_BaseAutoModelClass):
    """
    Represents an automatic model for image-to-image tasks.

    This class inherits from the _BaseAutoModelClass and provides functionality for automatically selecting and
    using models for image-to-image tasks.
    It encapsulates the logic for model selection, configuration, and inference for image-to-image transformation tasks.
    Users can leverage this class to streamline the process of selecting and using the most suitable model
    for their specific image-to-image transformation needs.

    Attributes:
        _BaseAutoModelClass: The base class providing foundational functionality for automatic model selection and usage.

    Note:
        This class is designed to streamline the process of model selection and utilization for image-to-image transformation tasks.
        It encapsulates the underlying complexities of model selection and configuration, enabling users to focus on
        the specifics of their image transformation requirements.

    """

    _model_mapping = MODEL_FOR_IMAGE_TO_IMAGE_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForInstanceSegmentation

Bases: _BaseAutoModelClass

" Represents a class for automatic model generation for instance segmentation tasks.

This class provides functionality for automatically generating models tailored for instance segmentation, which is the task of identifying and delineating individual objects within an image. The class inherits from _BaseAutoModelClass, providing a base for creating specialized instance segmentation models.

ATTRIBUTE DESCRIPTION
_BaseAutoModelClass

The base class for automatic model generation, providing foundational functionality for creating custom models.

Usage

(Include any usage examples or guidelines here)

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
class AutoModelForInstanceSegmentation(_BaseAutoModelClass):
    """ "
    Represents a class for automatic model generation for instance segmentation tasks.

    This class provides functionality for automatically generating models tailored for instance segmentation,
    which is the task of identifying and delineating individual objects within an image. The class
    inherits from _BaseAutoModelClass, providing a base for creating specialized instance segmentation models.

    Attributes:
        _BaseAutoModelClass:
            The base class for automatic model generation, providing foundational functionality for creating custom models.

    Methods:
        (Include any specific methods and their functionality here)

    Usage:
        (Include any usage examples or guidelines here)
    """

    _model_mapping = MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForMaskedLM

Bases: _BaseAutoModelClass

Represents a class for automatically generating masked language model outputs based on a pre-trained model.

This class serves as a specialized extension of the _BaseAutoModelClass, inheriting its core functionality and adding specific methods and attributes tailored for masked language model tasks. It provides a convenient interface for utilizing pre-trained language models to predict masked tokens within a given input sequence.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
class AutoModelForMaskedLM(_BaseAutoModelClass):
    """
    Represents a class for automatically generating masked language model outputs based on a pre-trained model.

    This class serves as a specialized extension of the _BaseAutoModelClass,
    inheriting its core functionality and adding specific methods and attributes tailored for masked language model tasks.
    It provides a convenient interface for utilizing pre-trained language models to predict masked tokens
    within a given input sequence.
    """

    _model_mapping = MODEL_FOR_MASKED_LM_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForMaskGeneration

Bases: _BaseAutoModelClass

Represents a class for generating masks automatically based on a given model. This class inherits functionality from the _BaseAutoModelClass, providing methods and attributes for mask generation.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1165
1166
1167
1168
1169
1170
1171
class AutoModelForMaskGeneration(_BaseAutoModelClass):
    """
    Represents a class for generating masks automatically based on a given model.
    This class inherits functionality from the _BaseAutoModelClass, providing methods and attributes for mask generation.
    """

    _model_mapping = MODEL_FOR_MASK_GENERATION_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForMultipleChoice

Bases: _BaseAutoModelClass

Represents a class for automatically generating a model for multiple choice tasks.

This class inherits from the _BaseAutoModelClass and provides functionality for creating a model specifically designed for handling multiple choice questions. It encapsulates the logic and operations required for training and inference on multiple choice datasets.

The AutoModelForMultipleChoice class offers a set of methods and attributes for fine-tuning, evaluating, and utilizing the model for multiple choice tasks. It leverages the underlying architecture and components inherited from the _BaseAutoModelClass while adding specific functionality tailored to the requirements of multiple choice scenarios.

Users can instantiate objects of this class to create, customize, and deploy models for multiple choice tasks, enabling seamless integration of machine learning capabilities into applications and workflows dealing with multiple choice question answering.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
class AutoModelForMultipleChoice(_BaseAutoModelClass):
    """
    Represents a class for automatically generating a model for multiple choice tasks.

    This class inherits from the _BaseAutoModelClass and provides functionality for creating a model specifically
    designed for handling multiple choice questions.
    It encapsulates the logic and operations required for training and inference on multiple choice datasets.

    The AutoModelForMultipleChoice class offers a set of methods and attributes for
    fine-tuning, evaluating, and utilizing the model for multiple choice tasks.
    It leverages the underlying architecture and components inherited from the _BaseAutoModelClass
    while adding specific functionality tailored to the requirements of multiple choice scenarios.

    Users can instantiate objects of this class to create, customize, and deploy models for multiple choice tasks,
    enabling seamless integration of machine learning capabilities into applications and workflows
    dealing with multiple choice question answering.
    """

    _model_mapping = MODEL_FOR_MULTIPLE_CHOICE_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForNextSentencePrediction

Bases: _BaseAutoModelClass

A class representing an autoencoder model for next sentence prediction.

This class inherits from _BaseAutoModelClass and provides a pre-trained model for next sentence prediction tasks. It can be used to generate predictions for whether a given pair of sentences are likely to be consecutive in a text sequence.

ATTRIBUTE DESCRIPTION
config

The configuration class used to instantiate the model.

TYPE: AutoConfig

base_model_prefix

The prefix for the base model.

TYPE: str

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
class AutoModelForNextSentencePrediction(_BaseAutoModelClass):
    """
    A class representing an autoencoder model for next sentence prediction.

    This class inherits from _BaseAutoModelClass and provides a pre-trained model for next sentence prediction tasks.
    It can be used to generate predictions for whether a given pair of sentences are likely to be consecutive in a text sequence.

    Attributes:
        config (AutoConfig): The configuration class used to instantiate the model.
        base_model_prefix (str): The prefix for the base model.

    """

    _model_mapping = MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForObjectDetection

Bases: _BaseAutoModelClass

Represents a class for automatic model selection and configuration for object detection tasks.

This class inherits from _BaseAutoModelClass and provides methods for automatically selecting and configuring a model for object detection tasks based on input data and performance metrics.

The AutoModelForObjectDetection class encapsulates functionality for model selection, hyperparameter optimization, and model evaluation, making it a convenient and efficient tool for automating the process of model selection and configuration for object detection applications.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
class AutoModelForObjectDetection(_BaseAutoModelClass):
    """
    Represents a class for automatic model selection and configuration for object detection tasks.

    This class inherits from _BaseAutoModelClass and provides methods for automatically selecting and configuring
    a model for object detection tasks based on input data and performance metrics.

    The AutoModelForObjectDetection class encapsulates functionality for model selection, hyperparameter optimization,
    and model evaluation, making it a convenient and efficient tool for automating the process
    of model selection and configuration for object detection applications.
    """

    _model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForPreTraining

Bases: _BaseAutoModelClass

Represents a Python class for an auto model used for pre-training natural language processing (NLP) tasks. This class inherits functionality from the _BaseAutoModelClass, providing a foundation for pre-training NLP models. It encapsulates methods and attributes specific to pre-training tasks, allowing for efficient development and training of NLP models.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1219
1220
1221
1222
1223
1224
1225
1226
class AutoModelForPreTraining(_BaseAutoModelClass):
    """
    Represents a Python class for an auto model used for pre-training natural language processing (NLP) tasks.
    This class inherits functionality from the _BaseAutoModelClass, providing a foundation for pre-training NLP models.
    It encapsulates methods and attributes specific to pre-training tasks, allowing for efficient development and training of NLP models.
    """

    _model_mapping = MODEL_FOR_PRETRAINING_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForQuestionAnswering

Bases: _BaseAutoModelClass

This class represents an automatic model for question answering in Python. It is a subclass of the _BaseAutoModelClass, which provides a base implementation for automatic models.

The AutoModelForQuestionAnswering class is designed to handle the task of question answering, where given a question and a context, it predicts the answer within the given context. It leverages pre-trained models and fine-tuning techniques to achieve high accuracy and performance.

ATTRIBUTE DESCRIPTION
model_name_or_path

The name or path of the pre-trained model to be used for question answering.

TYPE: str

config

The configuration object that holds the model's configuration settings.

TYPE: AutoConfig

tokenizer

The tokenizer used to preprocess input data for the model.

TYPE: PreTrainedTokenizer

model

The pre-trained model for question answering.

TYPE: PreTrainedModel

METHOD DESCRIPTION
from_pretrained

Class method that loads a pre-trained model and returns an instance of the AutoModelForQuestionAnswering class.

forward

Performs forward pass through the model given input IDs and other optional arguments, and returns the predicted answer.

save_pretrained

Saves the model and its configuration to the specified directory for future use.

from_config

Class method that creates an instance of the AutoModelForQuestionAnswering class from a provided configuration object.

resize_token_embeddings

Resizes the token embeddings of the model to match the new number of tokens.

Example
>>> # Instantiate the AutoModelForQuestionAnswering class
>>> model = AutoModelForQuestionAnswering.from_pretrained('bert-base-uncased')
...
>>> # Perform question answering
>>> question = "What is the capital of France?"
>>> context = "Paris is the capital of France."
>>> input_ids = tokenizer.encode(question, context)
>>> answer = model.forward(input_ids)
...
>>> # Save the model
>>> model.save_pretrained('models/qa_model')
...
>>> # Load the saved model
>>> loaded_model = AutoModelForQuestionAnswering.from_pretrained('models/qa_model')
Note

The AutoModelForQuestionAnswering class is built on top of the transformers library, which provides a wide range of pre-trained models for various NLP tasks. It is recommended to refer to the transformers documentation for more details on using this class and customizing its behavior.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
class AutoModelForQuestionAnswering(_BaseAutoModelClass):
    """
    This class represents an automatic model for question answering in Python. It is a subclass of the _BaseAutoModelClass,
    which provides a base implementation for automatic models.

    The AutoModelForQuestionAnswering class is designed to handle the task of question answering,
    where given a question and a context, it predicts the answer within the given context.
    It leverages pre-trained models and fine-tuning techniques to achieve high accuracy and performance.

    Attributes:
        model_name_or_path (str): The name or path of the pre-trained model to be used for question answering.
        config (AutoConfig): The configuration object that holds the model's configuration settings.
        tokenizer (PreTrainedTokenizer): The tokenizer used to preprocess input data for the model.
        model (PreTrainedModel): The pre-trained model for question answering.

    Methods:
        from_pretrained: Class method that loads a pre-trained model and returns an instance of the AutoModelForQuestionAnswering class.
        forward: Performs forward pass through the model given input IDs and other optional arguments, and returns the predicted answer.
        save_pretrained: Saves the model and its configuration to the specified directory for future use.
        from_config: Class method that creates an instance of the AutoModelForQuestionAnswering class from a provided configuration object.
        resize_token_embeddings: Resizes the token embeddings of the model to match the new number of tokens.

    Example:
        ```python
        >>> # Instantiate the AutoModelForQuestionAnswering class
        >>> model = AutoModelForQuestionAnswering.from_pretrained('bert-base-uncased')
        ...
        >>> # Perform question answering
        >>> question = "What is the capital of France?"
        >>> context = "Paris is the capital of France."
        >>> input_ids = tokenizer.encode(question, context)
        >>> answer = model.forward(input_ids)
        ...
        >>> # Save the model
        >>> model.save_pretrained('models/qa_model')
        ...
        >>> # Load the saved model
        >>> loaded_model = AutoModelForQuestionAnswering.from_pretrained('models/qa_model')
        ```

    Note:
        The AutoModelForQuestionAnswering class is built on top of the transformers library,
        which provides a wide range of pre-trained models for various NLP tasks.
        It is recommended to refer to the transformers documentation for more details on using this class and customizing its behavior.
    """

    _model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForSeq2SeqLM

Bases: _BaseAutoModelClass

Represents a class for automatic generation of models for sequence-to-sequence language modeling tasks. This class inherits functionality from the _BaseAutoModelClass, providing a base for creating and customizing sequence-to-sequence language models for various natural language processing applications.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1290
1291
1292
1293
1294
1295
1296
1297
class AutoModelForSeq2SeqLM(_BaseAutoModelClass):
    """
    Represents a class for automatic generation of models for sequence-to-sequence language modeling tasks.
    This class inherits functionality from the _BaseAutoModelClass, providing a base for creating and customizing
    sequence-to-sequence language models for various natural language processing applications.
    """

    _model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForSequenceClassification

Bases: _BaseAutoModelClass

The 'AutoModelForSequenceClassification' class represents an automatic model for sequence classification tasks in Python. This class inherits functionality from the '_BaseAutoModelClass' class and provides a high-level interface for creating and utilizing pre-trained models for sequence classification tasks.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1300
1301
1302
1303
1304
1305
1306
1307
class AutoModelForSequenceClassification(_BaseAutoModelClass):
    """
    The 'AutoModelForSequenceClassification' class represents an automatic model for sequence classification tasks in Python.
    This class inherits functionality from the '_BaseAutoModelClass' class and provides a high-level interface for
    creating and utilizing pre-trained models for sequence classification tasks.
    """

    _model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForSpeechSeq2Seq

Bases: _BaseAutoModelClass

This class represents an automatic model for speech sequence-to-sequence (Seq2Seq) tasks in Python.

The 'AutoModelForSpeechSeq2Seq' class is a subclass of the '_BaseAutoModelClass' and provides a pre-trained model for speech-to-text conversion tasks. It is designed to simplify the process of building and training speech Seq2Seq models by providing a high-level interface for developers.

The class inherits all the properties and methods from the '_BaseAutoModelClass', which includes functionalities for model configuration, training, and inference. It also contains additional methods specific to speech Seq2Seq tasks, such as audio preprocessing, text tokenization, and attention mechanisms.

To use this class, instantiate an object of the 'AutoModelForSpeechSeq2Seq' class and provide the necessary parameters for model initialization. Once the model is initialized, you can use the provided methods to train the model on your speech dataset or perform inference on new speech inputs.

Note that this class assumes the availability of a pre-trained model for speech Seq2Seq tasks. If you don't have a pre-trained model, you can refer to the documentation for the '_BaseAutoModelClass' on how to train a model from scratch.

Example
>>> model = AutoModelForSpeechSeq2Seq(model_name='speech_model', num_layers=3)
>>> model.train(dataset)
>>> transcriptions = model.transcribe(audio_inputs)

Please refer to the documentation of the '_BaseAutoModelClass' for more details on general model functionalities and best practices for training and fine-tuning models.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
class AutoModelForSpeechSeq2Seq(_BaseAutoModelClass):
    """
    This class represents an automatic model for speech sequence-to-sequence (Seq2Seq) tasks in Python.

    The 'AutoModelForSpeechSeq2Seq' class is a subclass of the '_BaseAutoModelClass' and provides a pre-trained model
    for speech-to-text conversion tasks. It is designed to simplify the process of building and training
    speech Seq2Seq models by providing a high-level interface for developers.

    The class inherits all the properties and methods from the '_BaseAutoModelClass', which includes functionalities
    for model configuration, training, and inference.
    It also contains additional methods specific to speech Seq2Seq tasks,
    such as audio preprocessing, text tokenization, and attention mechanisms.

    To use this class, instantiate an object of the 'AutoModelForSpeechSeq2Seq' class
    and provide the necessary parameters for model initialization.
    Once the model is initialized, you can use the provided methods to train the model on
    your speech dataset or perform inference on new speech inputs.

    Note that this class assumes the availability of a pre-trained model for speech Seq2Seq tasks.
    If you don't have a pre-trained model, you can refer to the documentation for the '_BaseAutoModelClass' on how
    to train a model from scratch.

    Example:
        ```python
        >>> model = AutoModelForSpeechSeq2Seq(model_name='speech_model', num_layers=3)
        >>> model.train(dataset)
        >>> transcriptions = model.transcribe(audio_inputs)
        ```

    Please refer to the documentation of the '_BaseAutoModelClass' for more details on general model functionalities and best practices for training and fine-tuning models.
    """

    _model_mapping = MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForTableQuestionAnswering

Bases: _BaseAutoModelClass

AutoModelForTableQuestionAnswering is a Python class that represents a model for table-based question answering tasks. This class inherits from the _BaseAutoModelClass, providing functionality for processing and generating answers for questions related to tables.

This class encapsulates the necessary methods and attributes for initializing, loading, and utilizing a pre-trained model for table question answering. It provides an interface for encoding table data and questions, and generating answers based on the learned patterns and representations.

The AutoModelForTableQuestionAnswering class is designed to be flexible and customizable, allowing users to fine-tune and adapt the model to specific table question answering tasks. It serves as a high-level abstraction for working with table-based question answering models, enabling seamless integration into various applications and workflows.

Users can leverage the capabilities of this class to efficiently handle table question answering tasks, benefiting from the underlying mechanisms for processing and interpreting tabular data in the context of natural language questions. The class facilitates the integration of table question answering functionality into larger projects, providing a powerful and efficient solution for handling such tasks within a Python environment.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
class AutoModelForTableQuestionAnswering(_BaseAutoModelClass):
    """
    AutoModelForTableQuestionAnswering is a Python class that represents a model for table-based question answering tasks.
    This class inherits from the _BaseAutoModelClass, providing functionality for processing and generating answers for questions related to tables.

    This class encapsulates the necessary methods and attributes for initializing, loading,
    and utilizing a pre-trained model for table question answering.
    It provides an interface for encoding table data and questions, and generating answers
    based on the learned patterns and representations.

    The AutoModelForTableQuestionAnswering class is designed to be flexible and customizable,
    allowing users to fine-tune and adapt the model to specific table question answering tasks.
    It serves as a high-level abstraction for working with table-based question answering models,
    enabling seamless integration into various applications and workflows.

    Users can leverage the capabilities of this class to efficiently handle table question answering tasks,
    benefiting from the underlying mechanisms for processing and interpreting tabular data in the context
    of natural language questions.
    The class facilitates the integration of table question answering functionality into larger projects,
    providing a powerful and efficient solution for handling such tasks within a Python environment.
    """

    _model_mapping = MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForTextEncoding

Bases: _BaseAutoModelClass

The AutoModelForTextEncoding class represents a model for encoding text data. It is a subclass of the _BaseAutoModelClass and inherits its behavior and attributes. This class provides functionality for automatically encoding text data and can be used for various natural language processing tasks.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1174
1175
1176
1177
1178
1179
1180
1181
class AutoModelForTextEncoding(_BaseAutoModelClass):
    """
    The AutoModelForTextEncoding class represents a model for encoding text data.
    It is a subclass of the _BaseAutoModelClass and inherits its behavior and attributes.
    This class provides functionality for automatically encoding text data and can be used for various natural language processing tasks.
    """

    _model_mapping = MODEL_FOR_TEXT_ENCODING_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForTextToSpectrogram

Bases: _BaseAutoModelClass

Represents a Python class for generating spectrograms from text using an auto model for text-to-spectrogram conversion. This class inherits from the _BaseAutoModelClass, providing additional functionality and customization options for text-to-spectrogram processing.

The AutoModelForTextToSpectrogram class encapsulates the necessary methods and attributes for processing text inputs and generating corresponding spectrograms. It leverages the functionalities inherited from the _BaseAutoModelClass and extends them with specific capabilities tailored for the text-to-spectrogram transformation.

This class serves as a powerful tool for converting textual data into visual representations in the form of spectrograms, enabling advanced analysis and visualization of linguistic patterns and acoustic features. By utilizing the AutoModelForTextToSpectrogram, users can efficiently process text inputs and obtain corresponding spectrogram outputs, facilitating a wide range of applications in fields such as natural language processing, speech recognition, and audio processing.

Note

Please refer to the _BaseAutoModelClass documentation for inherited methods and attributes.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
class AutoModelForTextToSpectrogram(_BaseAutoModelClass):
    """
    Represents a Python class for generating spectrograms from text using an auto model for text-to-spectrogram conversion.
    This class inherits from the _BaseAutoModelClass, providing additional functionality and customization options
    for text-to-spectrogram processing.

    The AutoModelForTextToSpectrogram class encapsulates the necessary methods and attributes for processing text inputs
    and generating corresponding spectrograms.
    It leverages the functionalities inherited from the _BaseAutoModelClass and extends them with specific capabilities
    tailored for the text-to-spectrogram transformation.

    This class serves as a powerful tool for converting textual data into visual representations in the form of spectrograms,
    enabling advanced analysis and visualization of linguistic patterns and acoustic features.
    By utilizing the AutoModelForTextToSpectrogram, users can efficiently process text inputs and obtain corresponding spectrogram outputs,
    facilitating a wide range of applications in fields such as natural language processing, speech recognition, and audio processing.

    Note:
        Please refer to the _BaseAutoModelClass documentation for inherited methods and attributes.
    """

    _model_mapping = MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForTextToWaveform

Bases: _BaseAutoModelClass

AutoModelForTextToWaveform is a Python class that represents an automatic model for converting text to waveform data. This class inherits from the _BaseAutoModelClass, which provides a base implementation for automatic models.

The AutoModelForTextToWaveform class is specifically designed for processing text and generating corresponding waveform data. It leverages various natural language processing techniques and audio generation algorithms to achieve this functionality.

ATTRIBUTE DESCRIPTION
model_name_or_path

The name or path of the pre-trained model to be used for text-to-waveform conversion.

TYPE: str

tokenizer

An instance of the Tokenizer class used for tokenizing text input.

TYPE: Tokenizer

audio_generator

An instance of the AudioGenerator class used for generating waveform data from tokenized text.

TYPE: AudioGenerator

METHOD DESCRIPTION
__init__

Initializes a new instance of the AutoModelForTextToWaveform class with the specified pre-trained model.

preprocess_text

Preprocesses the input text by tokenizing and applying any necessary transformations.

generate_waveform

Generates waveform data for the given input text using the pre-trained model and audio generation techniques.

save_model

Saves the current model and associated resources to the specified directory.

load_model

Loads a pre-trained model and associated resources from the specified directory.

Example
>>> # Initialize an AutoModelForTextToWaveform instance with a pre-trained model
>>> model = AutoModelForTextToWaveform('model_name')
...
>>> # Preprocess text and generate waveform data
>>> preprocessed_text = model.preprocess_text('Hello, how are you?')
>>> waveform_data = model.generate_waveform(preprocessed_text)
...
>>> # Save and load the model
>>> model.save_model('saved_model')
>>> model.load_model('saved_model')
Source code in mindnlp/transformers/models/auto/modeling_auto.py
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
class AutoModelForTextToWaveform(_BaseAutoModelClass):
    """
    AutoModelForTextToWaveform is a Python class that represents an automatic model for converting text to waveform data.
    This class inherits from the _BaseAutoModelClass, which provides a base implementation for automatic models.

    The AutoModelForTextToWaveform class is specifically designed for processing text and generating corresponding waveform data.
    It leverages various natural language processing techniques and audio generation algorithms to achieve this functionality.

    Attributes:
        model_name_or_path (str): The name or path of the pre-trained model to be used for text-to-waveform conversion.
        tokenizer (Tokenizer): An instance of the Tokenizer class used for tokenizing text input.
        audio_generator (AudioGenerator): An instance of the AudioGenerator class used for generating waveform data from tokenized text.

    Methods:
        __init__: Initializes a new instance of the AutoModelForTextToWaveform class with the specified pre-trained model.
        preprocess_text: Preprocesses the input text by tokenizing and applying any necessary transformations.
        generate_waveform: Generates waveform data for the given input text using the pre-trained model and audio generation techniques.
        save_model: Saves the current model and associated resources to the specified directory.
        load_model: Loads a pre-trained model and associated resources from the specified directory.

    Example:
        ```python
        >>> # Initialize an AutoModelForTextToWaveform instance with a pre-trained model
        >>> model = AutoModelForTextToWaveform('model_name')
        ...
        >>> # Preprocess text and generate waveform data
        >>> preprocessed_text = model.preprocess_text('Hello, how are you?')
        >>> waveform_data = model.generate_waveform(preprocessed_text)
        ...
        >>> # Save and load the model
        >>> model.save_model('saved_model')
        >>> model.load_model('saved_model')
        ```
    """

    _model_mapping = MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForTokenClassification

Bases: _BaseAutoModelClass

AutoModelForTokenClassification is a class that represents an automatic model for token classification in Python. It inherits from _BaseAutoModelClass and provides functionality for token classification tasks. This class is designed to be used with pre-trained models and offers methods for token classification tasks, such as named entity recognition and part-of-speech tagging.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1427
1428
1429
1430
1431
1432
1433
1434
1435
class AutoModelForTokenClassification(_BaseAutoModelClass):
    """
    AutoModelForTokenClassification is a class that represents an automatic model for token classification in Python.
    It inherits from _BaseAutoModelClass and provides functionality for token classification tasks.
    This class is designed to be used with pre-trained models and offers methods for token classification tasks,
    such as named entity recognition and part-of-speech tagging.
    """

    _model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForUniversalSegmentation

Bases: _BaseAutoModelClass

This class represents an automatic model for universal segmentation in Python. It is a subclass of the _BaseAutoModelClass, which provides a base implementation for automatic models.

Universal segmentation is the task of dividing an input sequence into meaningful segments or units. The AutoModelForUniversalSegmentation class encapsulates the functionality required to automatically train and evaluate models for this task.

ATTRIBUTE DESCRIPTION
model_name_or_path

The pre-trained model name or path.

TYPE: str

tokenizer

The tokenizer used for tokenizing the input sequences.

TYPE: AutoTokenizer

model

The underlying pre-trained model for universal segmentation.

TYPE: AutoModel

device

The device (e.g., 'cpu', 'cuda') on which the model is loaded.

TYPE: str

config

The configuration for the pre-trained model.

TYPE: AutoConfig

METHOD DESCRIPTION
__init__

Initializes a new instance of AutoModelForUniversalSegmentation.

train

Trains the model using the provided training dataset and evaluates it on the evaluation dataset. Additional keyword arguments can be passed to customize the training process.

predict

Predicts the segments for the given input sequence using the trained model.

save_model

Saves the trained model to the specified output directory.

load_model

Loads a pre-trained model from the specified path.

Inherited Attributes
  • base_attribute_1 (type): Description of the attribute inherited from _BaseAutoModelClass.
  • base_attribute_2 (type): Description of another attribute inherited from _BaseAutoModelClass.
Inherited Methods
  • base_method_1: Description of the method inherited from _BaseAutoModelClass.
  • base_method_2: Description of another method inherited from _BaseAutoModelClass.
Note

This class assumes that the input sequences are already tokenized and encoded using the tokenizer. The predict method returns a list of Segment objects, where each Segment represents a segment of the input sequence.

Example
>>> model = AutoModelForUniversalSegmentation(model_name_or_path='bert-base-uncased')
>>> model.train(train_dataset, eval_dataset)
>>> segments = model.predict('This is an example sentence.')
>>> model.save_model('output/model')
>>> model.load_model('output/model')

For more details on the usage and available models, refer to the documentation and examples provided with this class.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
class AutoModelForUniversalSegmentation(_BaseAutoModelClass):
    """
    This class represents an automatic model for universal segmentation in Python.
    It is a subclass of the _BaseAutoModelClass, which provides a base implementation for automatic models.

    Universal segmentation is the task of dividing an input sequence into meaningful segments or units.
    The AutoModelForUniversalSegmentation class encapsulates the functionality required to automatically
    train and evaluate models for this task.

    Attributes:
        model_name_or_path (str): The pre-trained model name or path.
        tokenizer (AutoTokenizer): The tokenizer used for tokenizing the input sequences.
        model (AutoModel): The underlying pre-trained model for universal segmentation.
        device (str): The device (e.g., 'cpu', 'cuda') on which the model is loaded.
        config (AutoConfig): The configuration for the pre-trained model.

    Methods:
        __init__: Initializes a new instance of AutoModelForUniversalSegmentation.
        train: Trains the model using the provided training dataset and evaluates it on the evaluation dataset. Additional
            keyword arguments can be passed to customize the training process.
        predict: Predicts the segments for the given input sequence using the trained model.
        save_model: Saves the trained model to the specified output directory.
        load_model: Loads a pre-trained model from the specified path.

    Inherited Attributes:
        - base_attribute_1 (type): Description of the attribute inherited from _BaseAutoModelClass.
        - base_attribute_2 (type): Description of another attribute inherited from _BaseAutoModelClass.

    Inherited Methods:
        - base_method_1: Description of the method inherited from _BaseAutoModelClass.
        - base_method_2: Description of another method inherited from _BaseAutoModelClass.

    Note:
        This class assumes that the input sequences are already tokenized and encoded using the tokenizer.
        The predict method returns a list of Segment objects, where each Segment represents a segment of the input sequence.

    Example:
        ```python
        >>> model = AutoModelForUniversalSegmentation(model_name_or_path='bert-base-uncased')
        >>> model.train(train_dataset, eval_dataset)
        >>> segments = model.predict('This is an example sentence.')
        >>> model.save_model('output/model')
        >>> model.load_model('output/model')
        ```

    For more details on the usage and available models, refer to the documentation and examples provided with this class.
    """

    _model_mapping = MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForVideoClassification

Bases: _BaseAutoModelClass

Represents a class for automatic model selection for video classification tasks.

This class serves as a specialized implementation for selecting the optimal model for video classification based on specified criteria. It inherits functionality from the _BaseAutoModelClass, providing a foundation for automatic model selection with a focus on video classification tasks.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
class AutoModelForVideoClassification(_BaseAutoModelClass):
    """
    Represents a class for automatic model selection for video classification tasks.

    This class serves as a specialized implementation for selecting the optimal model for video classification
    based on specified criteria.
    It inherits functionality from the _BaseAutoModelClass, providing a foundation for automatic model selection
    with a focus on video classification tasks.
    """

    _model_mapping = MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForVision2Seq

Bases: _BaseAutoModelClass

AutoModelForVision2Seq is a Python class that represents an automatic model for vision-to-sequence tasks. This class inherits from the _BaseAutoModelClass, providing additional functionalities specific to vision-to-sequence tasks.

ATTRIBUTE DESCRIPTION
model_name_or_path

The pre-trained model name or path.

TYPE: str

config

The configuration class for the model.

TYPE: AutoConfig

feature_extractor

The feature extractor for the model.

TYPE: FeatureExtractor

encoder

The encoder module for the model.

TYPE: Encoder

decoder

The decoder module for the model.

TYPE: Decoder

tokenizer

The tokenizer used for tokenization tasks.

TYPE: Tokenizer

vision_embedding

The module responsible for embedding the visual features.

TYPE: Module

sequence_embedding

The module responsible for embedding the sequence input.

TYPE: Module

classifier

The classifier module for the model.

TYPE: Module

METHOD DESCRIPTION
forward

Performs a forward pass through the model, taking visual features and sequence input as input.

encode_visual_features

Encodes the visual features using the vision_embedding module.

encode_sequence_input

Encodes the sequence input using the sequence_embedding module.

generate

Generates a sequence output based on the encoded visual features and sequence input.

save_pretrained

Saves the model and its configuration to the specified path.

from_pretrained

Loads a pre-trained model and its configuration from the specified path.

resize_token_embeddings

Resizes the token embeddings of the tokenizer.

Note

AutoModelForVision2Seq is designed to be used for vision-to-sequence tasks, where the model takes in visual features and sequence input, and generates a sequence output. It provides an interface for loading pre-trained models, performing inference, and fine-tuning on custom datasets. The class inherits from _BaseAutoModelClass to leverage the shared functionalities across different automatic models.

Example
>>> model = AutoModelForVision2Seq.from_pretrained('model_name')
>>> outputs = model.forward(visual_features, sequence_input)
Source code in mindnlp/transformers/models/auto/modeling_auto.py
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
class AutoModelForVision2Seq(_BaseAutoModelClass):
    """
    AutoModelForVision2Seq is a Python class that represents an automatic model for vision-to-sequence tasks.
    This class inherits from the _BaseAutoModelClass, providing additional functionalities specific to vision-to-sequence tasks.

    Attributes:
        model_name_or_path (str): The pre-trained model name or path.
        config (AutoConfig): The configuration class for the model.
        feature_extractor (FeatureExtractor): The feature extractor for the model.
        encoder (Encoder): The encoder module for the model.
        decoder (Decoder): The decoder module for the model.
        tokenizer (Tokenizer): The tokenizer used for tokenization tasks.
        vision_embedding (nn.Module): The module responsible for embedding the visual features.
        sequence_embedding (nn.Module): The module responsible for embedding the sequence input.
        classifier (nn.Module): The classifier module for the model.

    Methods:
        forward: Performs a forward pass through the model, taking visual features and sequence input as input.
        encode_visual_features: Encodes the visual features using the vision_embedding module.
        encode_sequence_input: Encodes the sequence input using the sequence_embedding module.
        generate: Generates a sequence output based on the encoded visual features and sequence input.
        save_pretrained: Saves the model and its configuration to the specified path.
        from_pretrained: Loads a pre-trained model and its configuration from the specified path.
        resize_token_embeddings: Resizes the token embeddings of the tokenizer.

    Note:
        AutoModelForVision2Seq is designed to be used for vision-to-sequence tasks, where the model takes in visual features
        and sequence input, and generates a sequence output. It provides an interface for loading pre-trained models,
        performing inference, and fine-tuning on custom datasets. The class inherits from _BaseAutoModelClass to leverage
        the shared functionalities across different automatic models.

    Example:
        ```python
        >>> model = AutoModelForVision2Seq.from_pretrained('model_name')
        >>> outputs = model.forward(visual_features, sequence_input)
        ```
    """

    _model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForVisualQuestionAnswering

Bases: _BaseAutoModelClass

Represents a specialized model class for visual question answering (VQA) tasks.

This class serves as an extension of the _BaseAutoModelClass and provides functionality tailored specifically for visual question answering applications. It encapsulates the necessary components and methods for processing both visual and textual inputs to generate accurate answers to questions related to images. Users can leverage the capabilities of this class to build, train, and deploy VQA models with ease.

ATTRIBUTE DESCRIPTION
Inherits

A base class that defines essential attributes and methods for auto-generated model classes.

TYPE: from _BaseAutoModelClass

Usage

Instantiate an object of AutoModelForVisualQuestionAnswering to access its VQA-specific functionalities and utilize them in developing VQA solutions. Users can extend and customize the class to adapt to different datasets and requirements, enhancing the model's performance on varying VQA tasks.

Note

It is recommended to refer to the documentation of _BaseAutoModelClass for general information on inherited attributes and methods.

For detailed information on the implementation and usage of AutoModelForVisualQuestionAnswering, please refer to the official documentation or codebase.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
class AutoModelForVisualQuestionAnswering(_BaseAutoModelClass):
    """
    Represents a specialized model class for visual question answering (VQA) tasks.

    This class serves as an extension of the _BaseAutoModelClass and provides functionality tailored specifically
    for visual question answering applications.
    It encapsulates the necessary components and methods for processing both visual and textual inputs to
    generate accurate answers to questions related to images. Users can leverage the capabilities of this class to
    build, train, and deploy VQA models with ease.

    Attributes:
        Inherits from _BaseAutoModelClass: A base class that defines essential attributes and methods for auto-generated model classes.
        Additional attributes specific to visual question answering tasks may be present within this class.

    Methods:
        Specific methods for processing visual data, textual data, and combining them to produce answers to given questions.
        Utility functions for preprocessing input data, handling model inference, and post-processing the output for interpretation.
        Customizable parameters and settings to fine-tune the model's behavior for different VQA scenarios.

    Usage:
        Instantiate an object of AutoModelForVisualQuestionAnswering to access its VQA-specific functionalities
        and utilize them in developing VQA solutions. Users can extend and customize the class to adapt
        to different datasets and requirements, enhancing the model's performance on varying VQA tasks.

    Note:
        It is recommended to refer to the documentation of _BaseAutoModelClass for general information on inherited attributes and methods.

    For detailed information on the implementation and usage of AutoModelForVisualQuestionAnswering, please refer to the official documentation or codebase.
    """

    _model_mapping = MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForZeroShotImageClassification

Bases: _BaseAutoModelClass

This class represents an automatic model for zero-shot image classification in Python.

The 'AutoModelForZeroShotImageClassification' class is a subclass of the '_BaseAutoModelClass' class, which provides a base implementation for automatic models. It is designed specifically for zero-shot image classification tasks, where images are classified into predefined classes based on their visual content.

The class encapsulates the necessary functionality to automatically train, evaluate, and use a model for zero-shot image classification. It includes methods for data preprocessing, model training, hyperparameter tuning, model evaluation, and inference. Additionally, it provides convenient interfaces to load and save trained models, as well as to fine-tune pre-trained models for specific tasks.

One of the key features of this class is its ability to handle zero-shot learning, where the model can classify images into classes that were not seen during training. This is achieved through the use of semantic embeddings or textual descriptions associated with each class. By leveraging the semantic information, the model can make predictions for unseen classes based on their similarity to the seen classes.

To use this class, you can instantiate an object of the 'AutoModelForZeroShotImageClassification' class and provide the necessary parameters, such as the training data, class labels, and hyperparameters. Once the model is trained, you can use it to classify new images by calling the appropriate methods.

Note that this class assumes the input images are in a suitable format and the class labels or semantic embeddings are provided for zero-shot learning. It is recommended to preprocess the data and ensure the proper format before using this class.

For more details on how to use this class, please refer to the documentation and examples provided with the package.

METHOD DESCRIPTION
__init__

Initializes the 'AutoModelForZeroShotImageClassification' object with the given parameters.

preprocess_data

Preprocesses the input data, such as resizing images, normalizing pixel values, etc.

train

Trains the model using the provided training data and labels.

tune_hyperparameters

Performs hyperparameter tuning to optimize the model's performance.

evaluate

Evaluates the trained model on the provided test data and labels.

classify

Classifies the given images into their respective classes.

save_model

Saves the trained model to the specified filepath.

load_model

Loads a pre-trained model from the specified filepath.

fine_tune

Fine-tunes the pre-trained model on new data and labels for transfer learning.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
class AutoModelForZeroShotImageClassification(_BaseAutoModelClass):
    """
    This class represents an automatic model for zero-shot image classification in Python.

    The 'AutoModelForZeroShotImageClassification' class is a subclass of the '_BaseAutoModelClass' class,
    which provides a base implementation for automatic models.
    It is designed specifically for zero-shot image classification tasks,
    where images are classified into predefined classes based on their visual content.

    The class encapsulates the necessary functionality to automatically train, evaluate, and use a model for zero-shot image classification.
    It includes methods for data preprocessing, model training, hyperparameter tuning, model evaluation, and inference.
    Additionally, it provides convenient interfaces to load and save trained models, as well as to fine-tune pre-trained models for specific tasks.

    One of the key features of this class is its ability to handle zero-shot learning,
    where the model can classify images into classes that were not seen during training.
    This is achieved through the use of semantic embeddings or textual descriptions associated with each class.
    By leveraging the semantic information, the model can make predictions for unseen classes based on their similarity to the seen classes.

    To use this class, you can instantiate an object of the 'AutoModelForZeroShotImageClassification' class
    and provide the necessary parameters, such as the training data, class labels, and hyperparameters.
    Once the model is trained, you can use it to classify new images by calling the appropriate methods.

    Note that this class assumes the input images are in a suitable format and the class labels or
    semantic embeddings are provided for zero-shot learning.
    It is recommended to preprocess the data and ensure the proper format before using this class.

    For more details on how to use this class, please refer to the documentation and examples provided with the package.

    Attributes:
        None.

    Methods:
        __init__(self, *args, **kwargs): Initializes the 'AutoModelForZeroShotImageClassification' object with the given parameters.
        preprocess_data(self, data): Preprocesses the input data, such as resizing images, normalizing pixel values, etc.
        train(self, train_data, train_labels, **kwargs): Trains the model using the provided training data and labels.
        tune_hyperparameters(self, train_data, train_labels, **kwargs): Performs hyperparameter tuning to optimize the model's performance.
        evaluate(self, test_data, test_labels): Evaluates the trained model on the provided test data and labels.
        classify(self, images): Classifies the given images into their respective classes.
        save_model(self, filepath): Saves the trained model to the specified filepath.
        load_model(self, filepath): Loads a pre-trained model from the specified filepath.
        fine_tune(self, new_data, new_labels): Fine-tunes the pre-trained model on new data and labels for transfer learning.

    """

    _model_mapping = MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelForZeroShotObjectDetection

Bases: _BaseAutoModelClass

The AutoModelForZeroShotObjectDetection class represents an automatic model for zero-shot object detection. It inherits from the _BaseAutoModelClass and provides functionality for detecting objects in images without the need for training on specific object classes.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1610
1611
1612
1613
1614
1615
1616
1617
class AutoModelForZeroShotObjectDetection(_BaseAutoModelClass):
    """
    The AutoModelForZeroShotObjectDetection class represents an automatic model for zero-shot object detection.
    It inherits from the _BaseAutoModelClass and provides functionality for detecting objects in images without
    the need for training on specific object classes.
    """

    _model_mapping = MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING

mindnlp.transformers.models.auto.modeling_auto.AutoModelWithLMHead

Bases: _AutoModelWithLMHead

This class represents a deprecated version of AutoModelWithLMHead and will be removed in a future version. It is recommended to use AutoModelForCausalLM for causal language models, AutoModelForMaskedLM for masked language models, and AutoModelForSeq2SeqLM for encoder-decoder models instead.

Inherits from: _AutoModelWithLMHead

METHOD DESCRIPTION
from_config
  • This method is used to create an instance of the class from a configuration object.
  • Parameters:

  • config: The configuration object used to initialize the class instance.

  • Returns:

  • An instance of the class.

from_pretrained
  • This method is used to create an instance of the class from a pretrained model.
  • Parameters:

  • pretrained_model_name_or_path: The name or path of the pretrained model.

  • *model_args: Additional model-specific arguments.
  • **kwargs: Additional keyword arguments.

  • Returns:

  • An instance of the class.

Note

This class is deprecated and should not be used in new implementations. Please refer to the appropriate classes mentioned above based on your specific use case.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
class AutoModelWithLMHead(_AutoModelWithLMHead):
    """
    This class represents a deprecated version of `AutoModelWithLMHead` and will be removed in a future version.
    It is recommended to use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM`
    for masked language models, and `AutoModelForSeq2SeqLM` for encoder-decoder models instead.

    Inherits from: `_AutoModelWithLMHead`

    Methods:
        from_config:

           - This method is used to create an instance of the class from a configuration object.
           - Parameters:

               - config: The configuration object used to initialize the class instance.
           - Returns:

               - An instance of the class.

        from_pretrained:

           - This method is used to create an instance of the class from a pretrained model.
           - Parameters:

               - pretrained_model_name_or_path: The name or path of the pretrained model.
               - *model_args: Additional model-specific arguments.
               - **kwargs: Additional keyword arguments.

           - Returns:

               - An instance of the class.

    Note:
        This class is deprecated and should not be used in new implementations.
        Please refer to the appropriate classes mentioned above based on your specific use case.
    """

    @classmethod
    def from_config(cls, config):
        """
        This method creates an instance of the 'AutoModelWithLMHead' class based on the provided 'config' parameter.

        Args:
            cls (class): The class method is called from.
            config (object): The configuration object used to create the instance. It contains the necessary information to initialize the model.

        Returns:
            None.

        Raises:
            FutureWarning: If the 'AutoModelWithLMHead' class is used, a warning is issued to inform the user
            that it is deprecated and will be removed in a future version.
            Users are advised to use 'AutoModelForCausalLM' for causal language models, 'AutoModelForMaskedLM'
            for masked language models, and 'AutoModelForSeq2SeqLM' for encoder-decoder models instead.
        """
        warnings.warn(
            "The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use "
            "`AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and "
            "`AutoModelForSeq2SeqLM` for encoder-decoder models.",
            FutureWarning,
        )
        return super().from_config(config)

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
        """
        Loads a pretrained model from a given model name or path.

        Args:
            cls (class): The class itself.
            pretrained_model_name_or_path (str): The name or path of the pretrained model.
                This can be a local path or a URL to a pretrained model repository.

        Returns:
            None

        Raises:
            FutureWarning: If using the deprecated class `AutoModelWithLMHead`.
                Please use `AutoModelForCausalLM` for causal language models,
                `AutoModelForMaskedLM` for masked language models, and
                `AutoModelForSeq2SeqLM` for encoder-decoder models.
        """
        warnings.warn(
            "The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use "
            "`AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and "
            "`AutoModelForSeq2SeqLM` for encoder-decoder models.",
            FutureWarning,
        )
        return super().from_pretrained(
            pretrained_model_name_or_path, *model_args, **kwargs
        )

mindnlp.transformers.models.auto.modeling_auto.AutoModelWithLMHead.from_config(config) classmethod

This method creates an instance of the 'AutoModelWithLMHead' class based on the provided 'config' parameter.

PARAMETER DESCRIPTION
cls

The class method is called from.

TYPE: class

config

The configuration object used to create the instance. It contains the necessary information to initialize the model.

TYPE: object

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
FutureWarning

If the 'AutoModelWithLMHead' class is used, a warning is issued to inform the user

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
@classmethod
def from_config(cls, config):
    """
    This method creates an instance of the 'AutoModelWithLMHead' class based on the provided 'config' parameter.

    Args:
        cls (class): The class method is called from.
        config (object): The configuration object used to create the instance. It contains the necessary information to initialize the model.

    Returns:
        None.

    Raises:
        FutureWarning: If the 'AutoModelWithLMHead' class is used, a warning is issued to inform the user
        that it is deprecated and will be removed in a future version.
        Users are advised to use 'AutoModelForCausalLM' for causal language models, 'AutoModelForMaskedLM'
        for masked language models, and 'AutoModelForSeq2SeqLM' for encoder-decoder models instead.
    """
    warnings.warn(
        "The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use "
        "`AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and "
        "`AutoModelForSeq2SeqLM` for encoder-decoder models.",
        FutureWarning,
    )
    return super().from_config(config)

mindnlp.transformers.models.auto.modeling_auto.AutoModelWithLMHead.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs) classmethod

Loads a pretrained model from a given model name or path.

PARAMETER DESCRIPTION
cls

The class itself.

TYPE: class

pretrained_model_name_or_path

The name or path of the pretrained model. This can be a local path or a URL to a pretrained model repository.

TYPE: str

RETURNS DESCRIPTION

None

RAISES DESCRIPTION
FutureWarning

If using the deprecated class AutoModelWithLMHead. Please use AutoModelForCausalLM for causal language models, AutoModelForMaskedLM for masked language models, and AutoModelForSeq2SeqLM for encoder-decoder models.

Source code in mindnlp/transformers/models/auto/modeling_auto.py
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
    """
    Loads a pretrained model from a given model name or path.

    Args:
        cls (class): The class itself.
        pretrained_model_name_or_path (str): The name or path of the pretrained model.
            This can be a local path or a URL to a pretrained model repository.

    Returns:
        None

    Raises:
        FutureWarning: If using the deprecated class `AutoModelWithLMHead`.
            Please use `AutoModelForCausalLM` for causal language models,
            `AutoModelForMaskedLM` for masked language models, and
            `AutoModelForSeq2SeqLM` for encoder-decoder models.
    """
    warnings.warn(
        "The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use "
        "`AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and "
        "`AutoModelForSeq2SeqLM` for encoder-decoder models.",
        FutureWarning,
    )
    return super().from_pretrained(
        pretrained_model_name_or_path, *model_args, **kwargs
    )