Skip to content

mpnet

mindnlp.transformers.models.mpnet.configuration_mpnet

MPNet model configuration

mindnlp.transformers.models.mpnet.configuration_mpnet.MPNetConfig

Bases: PretrainedConfig

This is the configuration class to store the configuration of a [MPNetModel] or a [TFMPNetModel]. It is used to instantiate a MPNet model according to the specified arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of the MPNet microsoft/mpnet-base architecture. ```

Source code in mindnlp/transformers/models/mpnet/configuration_mpnet.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
class MPNetConfig(PretrainedConfig):
    r"""
    This is the configuration class to store the configuration of a [`MPNetModel`] or a [`TFMPNetModel`]. It is used to
    instantiate a MPNet model according to the specified arguments, defining the model architecture. Instantiating a
    configuration with the defaults will yield a similar configuration to that of the MPNet
    [microsoft/mpnet-base](https://huggingface.co/microsoft/mpnet-base) architecture.
    ```"""
    model_type = "mpnet"

    def __init__(
        self,
        vocab_size=30527,
        hidden_size=768,
        num_hidden_layers=12,
        num_attention_heads=12,
        intermediate_size=3072,
        hidden_act="gelu",
        hidden_dropout_prob=0.1,
        attention_probs_dropout_prob=0.1,
        max_position_embeddings=512,
        initializer_range=0.02,
        layer_norm_eps=1e-12,
        relative_attention_num_buckets=32,
        pad_token_id=1,
        bos_token_id=0,
        eos_token_id=2,
        **kwargs,
    ):
        """Initializes a new instance of the MPNetConfig class.

        Args:
            vocab_size (int, optional): The size of the vocabulary. Defaults to 30527.
            hidden_size (int, optional): The size of the hidden states. Defaults to 768.
            num_hidden_layers (int, optional): The number of hidden layers. Defaults to 12.
            num_attention_heads (int, optional): The number of attention heads. Defaults to 12.
            intermediate_size (int, optional): The size of the intermediate layer in the feedforward network. Defaults to 3072.
            hidden_act (str, optional): The activation function for the hidden layers. Defaults to 'gelu'.
            hidden_dropout_prob (float, optional): The dropout probability for the hidden layers. Defaults to 0.1.
            attention_probs_dropout_prob (float, optional): The dropout probability for the attention probabilities. Defaults to 0.1.
            max_position_embeddings (int, optional): The maximum number of positional embeddings. Defaults to 512.
            initializer_range (float, optional): The range for the random weight initialization. Defaults to 0.02.
            layer_norm_eps (float, optional): The epsilon value for layer normalization. Defaults to 1e-12.
            relative_attention_num_buckets (int, optional): The number of buckets for relative attention. Defaults to 32.
            pad_token_id (int, optional): The token ID for padding. Defaults to 1.
            bos_token_id (int, optional): The token ID for the beginning of sequence. Defaults to 0.
            eos_token_id (int, optional): The token ID for the end of sequence. Defaults to 2.

        Returns:
            None.

        Raises:
            None.
        """
        super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)

        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.num_hidden_layers = num_hidden_layers
        self.num_attention_heads = num_attention_heads
        self.hidden_act = hidden_act
        self.intermediate_size = intermediate_size
        self.hidden_dropout_prob = hidden_dropout_prob
        self.attention_probs_dropout_prob = attention_probs_dropout_prob
        self.max_position_embeddings = max_position_embeddings
        self.initializer_range = initializer_range
        self.layer_norm_eps = layer_norm_eps
        self.relative_attention_num_buckets = relative_attention_num_buckets

mindnlp.transformers.models.mpnet.configuration_mpnet.MPNetConfig.__init__(vocab_size=30527, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act='gelu', hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, initializer_range=0.02, layer_norm_eps=1e-12, relative_attention_num_buckets=32, pad_token_id=1, bos_token_id=0, eos_token_id=2, **kwargs)

Initializes a new instance of the MPNetConfig class.

PARAMETER DESCRIPTION
vocab_size

The size of the vocabulary. Defaults to 30527.

TYPE: int DEFAULT: 30527

hidden_size

The size of the hidden states. Defaults to 768.

TYPE: int DEFAULT: 768

num_hidden_layers

The number of hidden layers. Defaults to 12.

TYPE: int DEFAULT: 12

num_attention_heads

The number of attention heads. Defaults to 12.

TYPE: int DEFAULT: 12

intermediate_size

The size of the intermediate layer in the feedforward network. Defaults to 3072.

TYPE: int DEFAULT: 3072

hidden_act

The activation function for the hidden layers. Defaults to 'gelu'.

TYPE: str DEFAULT: 'gelu'

hidden_dropout_prob

The dropout probability for the hidden layers. Defaults to 0.1.

TYPE: float DEFAULT: 0.1

attention_probs_dropout_prob

The dropout probability for the attention probabilities. Defaults to 0.1.

TYPE: float DEFAULT: 0.1

max_position_embeddings

The maximum number of positional embeddings. Defaults to 512.

TYPE: int DEFAULT: 512

initializer_range

The range for the random weight initialization. Defaults to 0.02.

TYPE: float DEFAULT: 0.02

layer_norm_eps

The epsilon value for layer normalization. Defaults to 1e-12.

TYPE: float DEFAULT: 1e-12

relative_attention_num_buckets

The number of buckets for relative attention. Defaults to 32.

TYPE: int DEFAULT: 32

pad_token_id

The token ID for padding. Defaults to 1.

TYPE: int DEFAULT: 1

bos_token_id

The token ID for the beginning of sequence. Defaults to 0.

TYPE: int DEFAULT: 0

eos_token_id

The token ID for the end of sequence. Defaults to 2.

TYPE: int DEFAULT: 2

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/mpnet/configuration_mpnet.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def __init__(
    self,
    vocab_size=30527,
    hidden_size=768,
    num_hidden_layers=12,
    num_attention_heads=12,
    intermediate_size=3072,
    hidden_act="gelu",
    hidden_dropout_prob=0.1,
    attention_probs_dropout_prob=0.1,
    max_position_embeddings=512,
    initializer_range=0.02,
    layer_norm_eps=1e-12,
    relative_attention_num_buckets=32,
    pad_token_id=1,
    bos_token_id=0,
    eos_token_id=2,
    **kwargs,
):
    """Initializes a new instance of the MPNetConfig class.

    Args:
        vocab_size (int, optional): The size of the vocabulary. Defaults to 30527.
        hidden_size (int, optional): The size of the hidden states. Defaults to 768.
        num_hidden_layers (int, optional): The number of hidden layers. Defaults to 12.
        num_attention_heads (int, optional): The number of attention heads. Defaults to 12.
        intermediate_size (int, optional): The size of the intermediate layer in the feedforward network. Defaults to 3072.
        hidden_act (str, optional): The activation function for the hidden layers. Defaults to 'gelu'.
        hidden_dropout_prob (float, optional): The dropout probability for the hidden layers. Defaults to 0.1.
        attention_probs_dropout_prob (float, optional): The dropout probability for the attention probabilities. Defaults to 0.1.
        max_position_embeddings (int, optional): The maximum number of positional embeddings. Defaults to 512.
        initializer_range (float, optional): The range for the random weight initialization. Defaults to 0.02.
        layer_norm_eps (float, optional): The epsilon value for layer normalization. Defaults to 1e-12.
        relative_attention_num_buckets (int, optional): The number of buckets for relative attention. Defaults to 32.
        pad_token_id (int, optional): The token ID for padding. Defaults to 1.
        bos_token_id (int, optional): The token ID for the beginning of sequence. Defaults to 0.
        eos_token_id (int, optional): The token ID for the end of sequence. Defaults to 2.

    Returns:
        None.

    Raises:
        None.
    """
    super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)

    self.vocab_size = vocab_size
    self.hidden_size = hidden_size
    self.num_hidden_layers = num_hidden_layers
    self.num_attention_heads = num_attention_heads
    self.hidden_act = hidden_act
    self.intermediate_size = intermediate_size
    self.hidden_dropout_prob = hidden_dropout_prob
    self.attention_probs_dropout_prob = attention_probs_dropout_prob
    self.max_position_embeddings = max_position_embeddings
    self.initializer_range = initializer_range
    self.layer_norm_eps = layer_norm_eps
    self.relative_attention_num_buckets = relative_attention_num_buckets

mindnlp.transformers.models.mpnet.modeling_mpnet

MindSpore MPNet model.

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetAttention

Bases: Module

Multi-head self-attention mechanism for MPNet.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
class MPNetAttention(nn.Module):
    """
    Multi-head self-attention mechanism for MPNet.
    """
    def __init__(self, config):
        """
        Initializes an instance of the MPNetAttention class.

        Args:
            self: The instance of the MPNetAttention class.
            config: A configuration object containing the settings for the MPNetAttention.

        Returns:
            None.

        Raises:
            None.
        """
        super().__init__()
        self.attn = MPNetSelfAttention(config)
        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(p=config.hidden_dropout_prob)

        self.pruned_heads = set()

    def prune_heads(self, heads):
        """Prunes specified attention heads from the multi-head self-attention layer."""
        if len(heads) == 0:
            return
        heads, index = find_pruneable_heads_and_indices(
            heads, self.attn.num_attention_heads, self.attn.attention_head_size, self.pruned_heads
        )

        self.attn.q = prune_linear_layer(self.attn.q, index)
        self.attn.k = prune_linear_layer(self.attn.k, index)
        self.attn.v = prune_linear_layer(self.attn.v, index)
        self.attn.o = prune_linear_layer(self.attn.o, index, dim=1)

        self.attn.num_attention_heads = self.attn.num_attention_heads - len(heads)
        self.attn.all_head_size = self.attn.attention_head_size * self.attn.num_attention_heads
        self.pruned_heads = self.pruned_heads.union(heads)

    def forward(
        self,
        hidden_states,
        attention_mask=None,
        head_mask=None,
        position_bias=None,
        output_attentions=False,
        **kwargs,
    ):
        """
        Constructs the attention layer for the MPNetAttention class.

        Args:
            self (MPNetAttention): An instance of the MPNetAttention class.
            hidden_states (Tensor): The input hidden states tensor of shape (batch_size, sequence_length, hidden_size).
            attention_mask (Optional[Tensor]): A tensor of shape (batch_size, sequence_length)
                indicating which tokens should be attended to and which ones should be ignored. Defaults to None.
            head_mask (Optional[Tensor]): A tensor of shape (num_heads,) representing the mask to be applied to the
                attention scores of each head. Defaults to None.
            position_bias (Optional[Tensor]): A tensor of shape (num_heads, sequence_length, sequence_length)
                representing the position bias to be added to the attention scores. Defaults to None.
            output_attentions (bool): Whether to output the attention scores. Defaults to False.
            **kwargs: Additional keyword arguments.

        Returns:
            Tuple[Union[Tensor, Tuple[Tensor]], ...]: A tuple containing the attention output tensor of shape
                (batch_size, sequence_length, hidden_size) and any additional outputs returned by the attention layer.

        Raises:
            None.
        """
        self_outputs = self.attn(
            hidden_states,
            attention_mask,
            head_mask,
            position_bias,
            output_attentions=output_attentions,
        )
        attention_output = self.LayerNorm(self.dropout(self_outputs[0]) + hidden_states)
        outputs = (attention_output,) + self_outputs[1:]  # add attentions if we output them
        return outputs

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetAttention.__init__(config)

Initializes an instance of the MPNetAttention class.

PARAMETER DESCRIPTION
self

The instance of the MPNetAttention class.

config

A configuration object containing the settings for the MPNetAttention.

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
def __init__(self, config):
    """
    Initializes an instance of the MPNetAttention class.

    Args:
        self: The instance of the MPNetAttention class.
        config: A configuration object containing the settings for the MPNetAttention.

    Returns:
        None.

    Raises:
        None.
    """
    super().__init__()
    self.attn = MPNetSelfAttention(config)
    self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
    self.dropout = nn.Dropout(p=config.hidden_dropout_prob)

    self.pruned_heads = set()

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetAttention.forward(hidden_states, attention_mask=None, head_mask=None, position_bias=None, output_attentions=False, **kwargs)

Constructs the attention layer for the MPNetAttention class.

PARAMETER DESCRIPTION
self

An instance of the MPNetAttention class.

TYPE: MPNetAttention

hidden_states

The input hidden states tensor of shape (batch_size, sequence_length, hidden_size).

TYPE: Tensor

attention_mask

A tensor of shape (batch_size, sequence_length) indicating which tokens should be attended to and which ones should be ignored. Defaults to None.

TYPE: Optional[Tensor] DEFAULT: None

head_mask

A tensor of shape (num_heads,) representing the mask to be applied to the attention scores of each head. Defaults to None.

TYPE: Optional[Tensor] DEFAULT: None

position_bias

A tensor of shape (num_heads, sequence_length, sequence_length) representing the position bias to be added to the attention scores. Defaults to None.

TYPE: Optional[Tensor] DEFAULT: None

output_attentions

Whether to output the attention scores. Defaults to False.

TYPE: bool DEFAULT: False

**kwargs

Additional keyword arguments.

DEFAULT: {}

RETURNS DESCRIPTION

Tuple[Union[Tensor, Tuple[Tensor]], ...]: A tuple containing the attention output tensor of shape (batch_size, sequence_length, hidden_size) and any additional outputs returned by the attention layer.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
def forward(
    self,
    hidden_states,
    attention_mask=None,
    head_mask=None,
    position_bias=None,
    output_attentions=False,
    **kwargs,
):
    """
    Constructs the attention layer for the MPNetAttention class.

    Args:
        self (MPNetAttention): An instance of the MPNetAttention class.
        hidden_states (Tensor): The input hidden states tensor of shape (batch_size, sequence_length, hidden_size).
        attention_mask (Optional[Tensor]): A tensor of shape (batch_size, sequence_length)
            indicating which tokens should be attended to and which ones should be ignored. Defaults to None.
        head_mask (Optional[Tensor]): A tensor of shape (num_heads,) representing the mask to be applied to the
            attention scores of each head. Defaults to None.
        position_bias (Optional[Tensor]): A tensor of shape (num_heads, sequence_length, sequence_length)
            representing the position bias to be added to the attention scores. Defaults to None.
        output_attentions (bool): Whether to output the attention scores. Defaults to False.
        **kwargs: Additional keyword arguments.

    Returns:
        Tuple[Union[Tensor, Tuple[Tensor]], ...]: A tuple containing the attention output tensor of shape
            (batch_size, sequence_length, hidden_size) and any additional outputs returned by the attention layer.

    Raises:
        None.
    """
    self_outputs = self.attn(
        hidden_states,
        attention_mask,
        head_mask,
        position_bias,
        output_attentions=output_attentions,
    )
    attention_output = self.LayerNorm(self.dropout(self_outputs[0]) + hidden_states)
    outputs = (attention_output,) + self_outputs[1:]  # add attentions if we output them
    return outputs

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetAttention.prune_heads(heads)

Prunes specified attention heads from the multi-head self-attention layer.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
def prune_heads(self, heads):
    """Prunes specified attention heads from the multi-head self-attention layer."""
    if len(heads) == 0:
        return
    heads, index = find_pruneable_heads_and_indices(
        heads, self.attn.num_attention_heads, self.attn.attention_head_size, self.pruned_heads
    )

    self.attn.q = prune_linear_layer(self.attn.q, index)
    self.attn.k = prune_linear_layer(self.attn.k, index)
    self.attn.v = prune_linear_layer(self.attn.v, index)
    self.attn.o = prune_linear_layer(self.attn.o, index, dim=1)

    self.attn.num_attention_heads = self.attn.num_attention_heads - len(heads)
    self.attn.all_head_size = self.attn.attention_head_size * self.attn.num_attention_heads
    self.pruned_heads = self.pruned_heads.union(heads)

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetClassificationHead

Bases: Module

Head for sentence-level classification tasks.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
class MPNetClassificationHead(nn.Module):
    """Head for sentence-level classification tasks."""
    def __init__(self, config):
        """
        Initializes an instance of the MPNetClassificationHead class.

        Args:
            self: The instance of the class itself.
            config:
                An object containing configuration parameters for the head, including:

                - hidden_size (int): The size of the hidden layer.
                - hidden_dropout_prob (float): The dropout probability for the hidden layer.
                - num_labels (int): The number of output labels.

        Returns:
            None.

        Raises:
            TypeError: If the provided config parameter is not of the expected type.
            ValueError: If any of the configuration parameters are invalid or missing.
        """
        super().__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.dropout = nn.Dropout(p=config.hidden_dropout_prob)
        self.out_proj = nn.Linear(config.hidden_size, config.num_labels)

    def forward(self, features, **kwargs):
        """
        Constructs the MPNetClassificationHead by performing a series of operations on the input features.

        Args:
            self: The instance of the MPNetClassificationHead class.
            features (Tensor): The input features to be processed.
                It should be a tensor of shape (batch_size, sequence_length, num_features).

        Returns:
            None

        Raises:
            None
        """
        x = features[:, 0, :]  # take <s> token (equiv. to BERT's [CLS] token)
        x = self.dropout(x)
        x = self.dense(x)
        x = ops.tanh(x)
        x = self.dropout(x)
        x = self.out_proj(x)
        return x

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetClassificationHead.__init__(config)

Initializes an instance of the MPNetClassificationHead class.

PARAMETER DESCRIPTION
self

The instance of the class itself.

config

An object containing configuration parameters for the head, including:

  • hidden_size (int): The size of the hidden layer.
  • hidden_dropout_prob (float): The dropout probability for the hidden layer.
  • num_labels (int): The number of output labels.

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
TypeError

If the provided config parameter is not of the expected type.

ValueError

If any of the configuration parameters are invalid or missing.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
def __init__(self, config):
    """
    Initializes an instance of the MPNetClassificationHead class.

    Args:
        self: The instance of the class itself.
        config:
            An object containing configuration parameters for the head, including:

            - hidden_size (int): The size of the hidden layer.
            - hidden_dropout_prob (float): The dropout probability for the hidden layer.
            - num_labels (int): The number of output labels.

    Returns:
        None.

    Raises:
        TypeError: If the provided config parameter is not of the expected type.
        ValueError: If any of the configuration parameters are invalid or missing.
    """
    super().__init__()
    self.dense = nn.Linear(config.hidden_size, config.hidden_size)
    self.dropout = nn.Dropout(p=config.hidden_dropout_prob)
    self.out_proj = nn.Linear(config.hidden_size, config.num_labels)

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetClassificationHead.forward(features, **kwargs)

Constructs the MPNetClassificationHead by performing a series of operations on the input features.

PARAMETER DESCRIPTION
self

The instance of the MPNetClassificationHead class.

features

The input features to be processed. It should be a tensor of shape (batch_size, sequence_length, num_features).

TYPE: Tensor

RETURNS DESCRIPTION

None

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
def forward(self, features, **kwargs):
    """
    Constructs the MPNetClassificationHead by performing a series of operations on the input features.

    Args:
        self: The instance of the MPNetClassificationHead class.
        features (Tensor): The input features to be processed.
            It should be a tensor of shape (batch_size, sequence_length, num_features).

    Returns:
        None

    Raises:
        None
    """
    x = features[:, 0, :]  # take <s> token (equiv. to BERT's [CLS] token)
    x = self.dropout(x)
    x = self.dense(x)
    x = ops.tanh(x)
    x = self.dropout(x)
    x = self.out_proj(x)
    return x

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetEmbeddings

Bases: Module

forward the embeddings from word, position and token_type embeddings.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
class MPNetEmbeddings(nn.Module):
    """forward the embeddings from word, position and token_type embeddings."""
    def __init__(self, config):
        """
        Initializes an instance of the MPNetEmbeddings class.

        Args:
            self: The object instance.
            config: An object of type 'config' containing the configuration parameters.

        Returns:
            None

        Raises:
            None
        """
        super().__init__()
        self.padding_idx = 1
        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=self.padding_idx)
        self.position_embeddings = nn.Embedding(
            config.max_position_embeddings, config.hidden_size, padding_idx=self.padding_idx
        )

        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(p=config.hidden_dropout_prob)
        self.position_ids = ops.arange(config.max_position_embeddings).broadcast_to((1, -1))

    def forward(self, input_ids=None, position_ids=None, inputs_embeds=None, **kwargs):
        """
        Method 'forward' in the class 'MPNetEmbeddings'.

        Args:
            self: The instance of the class.
            input_ids (torch.Tensor, optional): The input tensor representing token indices. Defaults to None.
            position_ids (torch.Tensor, optional): The input tensor representing position indices. Defaults to None.
            inputs_embeds (torch.Tensor, optional): The input tensor representing embeddings. Defaults to None.

        Returns:
            embeddings: The method returns the forwarded embeddings of input data.

        Raises:
            ValueError: If both 'input_ids' and 'inputs_embeds' are None.
            ValueError: If shape inconsistency is detected between 'input_ids' and 'inputs_embeds'.
            IndexError: If an index is out of bounds while accessing tensors.
            TypeError: If the input types are not torch tensors.
        """
        if position_ids is None:
            if input_ids is not None:
                position_ids = create_position_ids_from_input_ids(input_ids, self.padding_idx)
            else:
                position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)

        if input_ids is not None:
            input_shape = input_ids.shape
        else:
            input_shape = inputs_embeds.shape[:-1]

        seq_length = input_shape[1]

        if position_ids is None:
            position_ids = self.position_ids[:, :seq_length]

        if inputs_embeds is None:
            inputs_embeds = self.word_embeddings(input_ids)
        position_embeddings = self.position_embeddings(position_ids)

        embeddings = inputs_embeds + position_embeddings
        embeddings = self.LayerNorm(embeddings)
        embeddings = self.dropout(embeddings)
        return embeddings

    def create_position_ids_from_inputs_embeds(self, inputs_embeds):
        """
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: mindspore.Tensor

        Returns: mindspore.Tensor
        """
        input_shape = inputs_embeds.shape[:-1]
        sequence_length = input_shape[1]

        position_ids = ops.arange(
            self.padding_idx + 1, sequence_length + self.padding_idx + 1, dtype=mindspore.int64
        )
        return position_ids.unsqueeze(0).broadcast_to(input_shape)

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetEmbeddings.__init__(config)

Initializes an instance of the MPNetEmbeddings class.

PARAMETER DESCRIPTION
self

The object instance.

config

An object of type 'config' containing the configuration parameters.

RETURNS DESCRIPTION

None

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def __init__(self, config):
    """
    Initializes an instance of the MPNetEmbeddings class.

    Args:
        self: The object instance.
        config: An object of type 'config' containing the configuration parameters.

    Returns:
        None

    Raises:
        None
    """
    super().__init__()
    self.padding_idx = 1
    self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=self.padding_idx)
    self.position_embeddings = nn.Embedding(
        config.max_position_embeddings, config.hidden_size, padding_idx=self.padding_idx
    )

    self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
    self.dropout = nn.Dropout(p=config.hidden_dropout_prob)
    self.position_ids = ops.arange(config.max_position_embeddings).broadcast_to((1, -1))

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetEmbeddings.create_position_ids_from_inputs_embeds(inputs_embeds)

We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

PARAMETER DESCRIPTION
inputs_embeds

mindspore.Tensor

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def create_position_ids_from_inputs_embeds(self, inputs_embeds):
    """
    We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

    Args:
        inputs_embeds: mindspore.Tensor

    Returns: mindspore.Tensor
    """
    input_shape = inputs_embeds.shape[:-1]
    sequence_length = input_shape[1]

    position_ids = ops.arange(
        self.padding_idx + 1, sequence_length + self.padding_idx + 1, dtype=mindspore.int64
    )
    return position_ids.unsqueeze(0).broadcast_to(input_shape)

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetEmbeddings.forward(input_ids=None, position_ids=None, inputs_embeds=None, **kwargs)

Method 'forward' in the class 'MPNetEmbeddings'.

PARAMETER DESCRIPTION
self

The instance of the class.

input_ids

The input tensor representing token indices. Defaults to None.

TYPE: Tensor DEFAULT: None

position_ids

The input tensor representing position indices. Defaults to None.

TYPE: Tensor DEFAULT: None

inputs_embeds

The input tensor representing embeddings. Defaults to None.

TYPE: Tensor DEFAULT: None

RETURNS DESCRIPTION
embeddings

The method returns the forwarded embeddings of input data.

RAISES DESCRIPTION
ValueError

If both 'input_ids' and 'inputs_embeds' are None.

ValueError

If shape inconsistency is detected between 'input_ids' and 'inputs_embeds'.

IndexError

If an index is out of bounds while accessing tensors.

TypeError

If the input types are not torch tensors.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def forward(self, input_ids=None, position_ids=None, inputs_embeds=None, **kwargs):
    """
    Method 'forward' in the class 'MPNetEmbeddings'.

    Args:
        self: The instance of the class.
        input_ids (torch.Tensor, optional): The input tensor representing token indices. Defaults to None.
        position_ids (torch.Tensor, optional): The input tensor representing position indices. Defaults to None.
        inputs_embeds (torch.Tensor, optional): The input tensor representing embeddings. Defaults to None.

    Returns:
        embeddings: The method returns the forwarded embeddings of input data.

    Raises:
        ValueError: If both 'input_ids' and 'inputs_embeds' are None.
        ValueError: If shape inconsistency is detected between 'input_ids' and 'inputs_embeds'.
        IndexError: If an index is out of bounds while accessing tensors.
        TypeError: If the input types are not torch tensors.
    """
    if position_ids is None:
        if input_ids is not None:
            position_ids = create_position_ids_from_input_ids(input_ids, self.padding_idx)
        else:
            position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)

    if input_ids is not None:
        input_shape = input_ids.shape
    else:
        input_shape = inputs_embeds.shape[:-1]

    seq_length = input_shape[1]

    if position_ids is None:
        position_ids = self.position_ids[:, :seq_length]

    if inputs_embeds is None:
        inputs_embeds = self.word_embeddings(input_ids)
    position_embeddings = self.position_embeddings(position_ids)

    embeddings = inputs_embeds + position_embeddings
    embeddings = self.LayerNorm(embeddings)
    embeddings = self.dropout(embeddings)
    return embeddings

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetEncoder

Bases: Module

Encoder module for the MPNet model.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
class MPNetEncoder(nn.Module):
    """Encoder module for the MPNet model."""
    def __init__(self, config):
        """
        Initializes an instance of the MPNetEncoder class.

        Args:
            self: The current object instance.
            config (object):
                The configuration object containing the settings for the MPNetEncoder.

                - Type: object
                - Purpose: Specifies the configuration settings for the MPNetEncoder.
                - Restrictions: None

        Returns:
            None

        Raises:
            None
        """
        super().__init__()
        self.config = config
        self.n_heads = config.num_attention_heads
        self.layer = nn.ModuleList([MPNetLayer(config) for _ in range(config.num_hidden_layers)])
        self.relative_attention_bias = nn.Embedding(config.relative_attention_num_buckets, self.n_heads)

    def forward(
        self,
        hidden_states: mindspore.Tensor,
        attention_mask: Optional[mindspore.Tensor] = None,
        head_mask: Optional[mindspore.Tensor] = None,
        output_attentions: bool = False,
        output_hidden_states: bool = False,
        return_dict: bool = False,
        **kwargs,
    ):
        """
        Construct method in the MPNetEncoder class.

        Args:
            self: The instance of the MPNetEncoder class.
            hidden_states (mindspore.Tensor): The input hidden states to be processed by the encoder.
            attention_mask (Optional[mindspore.Tensor]): An optional tensor specifying which positions
                should be attended to. Defaults to None.
            head_mask (Optional[mindspore.Tensor]): An optional tensor specifying which heads to mask out.
                Defaults to None.
            output_attentions (bool): A flag indicating whether to output the attention weights. Defaults to False.
            output_hidden_states (bool): A flag indicating whether to output hidden states for all layers.
                Defaults to False.
            return_dict (bool): A flag indicating whether to return the outputs as a dictionary. Defaults to False.

        Returns:
            None

        Raises:
            TypeError: If the input parameters are not of the expected types.
            ValueError: If the input parameters are not within the expected ranges.
        """
        position_bias = self.compute_position_bias(hidden_states)
        all_hidden_states = () if output_hidden_states else None
        all_attentions = () if output_attentions else None
        for i, layer_module in enumerate(self.layer):
            if output_hidden_states:
                all_hidden_states = all_hidden_states + (hidden_states,)

            layer_outputs = layer_module(
                hidden_states,
                attention_mask,
                head_mask[i],
                position_bias,
                output_attentions=output_attentions,
                **kwargs,
            )
            hidden_states = layer_outputs[0]

            if output_attentions:
                all_attentions = all_attentions + (layer_outputs[1],)

        # Add last layer
        if output_hidden_states:
            all_hidden_states = all_hidden_states + (hidden_states,)

        if not return_dict:
            return tuple(v for v in [hidden_states, all_hidden_states, all_attentions] if v is not None)
        return BaseModelOutput(
            last_hidden_state=hidden_states,
            hidden_states=all_hidden_states,
            attentions=all_attentions,
        )

    def compute_position_bias(self, x, position_ids=None, num_buckets=32):
        """
        Computes the position bias for relative attention in the MPNet model.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, sequence_length, hidden_size).
            position_ids (torch.Tensor, optional): Tensor containing position indices. If provided,
                the position indices are used to compute relative positions; otherwise, indices
                are generated based on the input tensor's sequence length. Defaults to None.
            num_buckets (int, optional): Number of buckets for relative position encoding.
                Defaults to 32.

        Returns:
            torch.Tensor: Position bias tensor of shape (batch_size, num_heads, sequence_length, sequence_length).
        """
        bsz, qlen, klen = x.shape[0], x.shape[1], x.shape[1]
        if position_ids is not None:
            context_position = position_ids[:, :, None]
            memory_position = position_ids[:, None, :]
        else:
            context_position = ops.arange(qlen, dtype=mindspore.int64)[:, None]
            memory_position = ops.arange(klen, dtype=mindspore.int64)[None, :]

        relative_position = memory_position - context_position

        rp_bucket = self.relative_position_bucket(relative_position, num_buckets=num_buckets)
        values = self.relative_attention_bias(rp_bucket)
        values = values.permute([2, 0, 1]).unsqueeze(0)
        values = values.broadcast_to((bsz, -1, qlen, klen))
        return values

    @staticmethod
    def relative_position_bucket(relative_position, num_buckets=32, max_distance=128):
        """
        Bucketizes relative positions for relative attention in the MPNet model.

        Args:
            relative_position (torch.Tensor): Tensor containing relative positions.
            num_buckets (int, optional): Number of buckets for bucketization. Defaults to 32.
            max_distance (int, optional): Maximum distance for bucketization. Defaults to 128.

        Returns:
            torch.Tensor: Bucketized relative positions.
        """
        ret = 0
        n = -relative_position

        num_buckets //= 2
        ret += (n < 0).to(mindspore.int64) * num_buckets
        n = ops.abs(n)

        max_exact = num_buckets // 2
        is_small = n < max_exact

        val_if_large = max_exact + (
            ops.log(n.float() / max_exact) / math.log(max_distance / max_exact) * (num_buckets - max_exact)
        ).to(mindspore.int64)

        val_if_large_np = np.minimum(val_if_large.asnumpy(), ops.full_like(val_if_large, num_buckets - 1).asnumpy())
        val_if_large = mindspore.Tensor(val_if_large_np)
        ret += ops.where(is_small, n, val_if_large)
        return ret

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetEncoder.__init__(config)

Initializes an instance of the MPNetEncoder class.

PARAMETER DESCRIPTION
self

The current object instance.

config

The configuration object containing the settings for the MPNetEncoder.

  • Type: object
  • Purpose: Specifies the configuration settings for the MPNetEncoder.
  • Restrictions: None

TYPE: object

RETURNS DESCRIPTION

None

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
def __init__(self, config):
    """
    Initializes an instance of the MPNetEncoder class.

    Args:
        self: The current object instance.
        config (object):
            The configuration object containing the settings for the MPNetEncoder.

            - Type: object
            - Purpose: Specifies the configuration settings for the MPNetEncoder.
            - Restrictions: None

    Returns:
        None

    Raises:
        None
    """
    super().__init__()
    self.config = config
    self.n_heads = config.num_attention_heads
    self.layer = nn.ModuleList([MPNetLayer(config) for _ in range(config.num_hidden_layers)])
    self.relative_attention_bias = nn.Embedding(config.relative_attention_num_buckets, self.n_heads)

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetEncoder.compute_position_bias(x, position_ids=None, num_buckets=32)

Computes the position bias for relative attention in the MPNet model.

PARAMETER DESCRIPTION
x

Input tensor of shape (batch_size, sequence_length, hidden_size).

TYPE: Tensor

position_ids

Tensor containing position indices. If provided, the position indices are used to compute relative positions; otherwise, indices are generated based on the input tensor's sequence length. Defaults to None.

TYPE: Tensor DEFAULT: None

num_buckets

Number of buckets for relative position encoding. Defaults to 32.

TYPE: int DEFAULT: 32

RETURNS DESCRIPTION

torch.Tensor: Position bias tensor of shape (batch_size, num_heads, sequence_length, sequence_length).

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
def compute_position_bias(self, x, position_ids=None, num_buckets=32):
    """
    Computes the position bias for relative attention in the MPNet model.

    Args:
        x (torch.Tensor): Input tensor of shape (batch_size, sequence_length, hidden_size).
        position_ids (torch.Tensor, optional): Tensor containing position indices. If provided,
            the position indices are used to compute relative positions; otherwise, indices
            are generated based on the input tensor's sequence length. Defaults to None.
        num_buckets (int, optional): Number of buckets for relative position encoding.
            Defaults to 32.

    Returns:
        torch.Tensor: Position bias tensor of shape (batch_size, num_heads, sequence_length, sequence_length).
    """
    bsz, qlen, klen = x.shape[0], x.shape[1], x.shape[1]
    if position_ids is not None:
        context_position = position_ids[:, :, None]
        memory_position = position_ids[:, None, :]
    else:
        context_position = ops.arange(qlen, dtype=mindspore.int64)[:, None]
        memory_position = ops.arange(klen, dtype=mindspore.int64)[None, :]

    relative_position = memory_position - context_position

    rp_bucket = self.relative_position_bucket(relative_position, num_buckets=num_buckets)
    values = self.relative_attention_bias(rp_bucket)
    values = values.permute([2, 0, 1]).unsqueeze(0)
    values = values.broadcast_to((bsz, -1, qlen, klen))
    return values

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetEncoder.forward(hidden_states, attention_mask=None, head_mask=None, output_attentions=False, output_hidden_states=False, return_dict=False, **kwargs)

Construct method in the MPNetEncoder class.

PARAMETER DESCRIPTION
self

The instance of the MPNetEncoder class.

hidden_states

The input hidden states to be processed by the encoder.

TYPE: Tensor

attention_mask

An optional tensor specifying which positions should be attended to. Defaults to None.

TYPE: Optional[Tensor] DEFAULT: None

head_mask

An optional tensor specifying which heads to mask out. Defaults to None.

TYPE: Optional[Tensor] DEFAULT: None

output_attentions

A flag indicating whether to output the attention weights. Defaults to False.

TYPE: bool DEFAULT: False

output_hidden_states

A flag indicating whether to output hidden states for all layers. Defaults to False.

TYPE: bool DEFAULT: False

return_dict

A flag indicating whether to return the outputs as a dictionary. Defaults to False.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION

None

RAISES DESCRIPTION
TypeError

If the input parameters are not of the expected types.

ValueError

If the input parameters are not within the expected ranges.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
def forward(
    self,
    hidden_states: mindspore.Tensor,
    attention_mask: Optional[mindspore.Tensor] = None,
    head_mask: Optional[mindspore.Tensor] = None,
    output_attentions: bool = False,
    output_hidden_states: bool = False,
    return_dict: bool = False,
    **kwargs,
):
    """
    Construct method in the MPNetEncoder class.

    Args:
        self: The instance of the MPNetEncoder class.
        hidden_states (mindspore.Tensor): The input hidden states to be processed by the encoder.
        attention_mask (Optional[mindspore.Tensor]): An optional tensor specifying which positions
            should be attended to. Defaults to None.
        head_mask (Optional[mindspore.Tensor]): An optional tensor specifying which heads to mask out.
            Defaults to None.
        output_attentions (bool): A flag indicating whether to output the attention weights. Defaults to False.
        output_hidden_states (bool): A flag indicating whether to output hidden states for all layers.
            Defaults to False.
        return_dict (bool): A flag indicating whether to return the outputs as a dictionary. Defaults to False.

    Returns:
        None

    Raises:
        TypeError: If the input parameters are not of the expected types.
        ValueError: If the input parameters are not within the expected ranges.
    """
    position_bias = self.compute_position_bias(hidden_states)
    all_hidden_states = () if output_hidden_states else None
    all_attentions = () if output_attentions else None
    for i, layer_module in enumerate(self.layer):
        if output_hidden_states:
            all_hidden_states = all_hidden_states + (hidden_states,)

        layer_outputs = layer_module(
            hidden_states,
            attention_mask,
            head_mask[i],
            position_bias,
            output_attentions=output_attentions,
            **kwargs,
        )
        hidden_states = layer_outputs[0]

        if output_attentions:
            all_attentions = all_attentions + (layer_outputs[1],)

    # Add last layer
    if output_hidden_states:
        all_hidden_states = all_hidden_states + (hidden_states,)

    if not return_dict:
        return tuple(v for v in [hidden_states, all_hidden_states, all_attentions] if v is not None)
    return BaseModelOutput(
        last_hidden_state=hidden_states,
        hidden_states=all_hidden_states,
        attentions=all_attentions,
    )

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetEncoder.relative_position_bucket(relative_position, num_buckets=32, max_distance=128) staticmethod

Bucketizes relative positions for relative attention in the MPNet model.

PARAMETER DESCRIPTION
relative_position

Tensor containing relative positions.

TYPE: Tensor

num_buckets

Number of buckets for bucketization. Defaults to 32.

TYPE: int DEFAULT: 32

max_distance

Maximum distance for bucketization. Defaults to 128.

TYPE: int DEFAULT: 128

RETURNS DESCRIPTION

torch.Tensor: Bucketized relative positions.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
@staticmethod
def relative_position_bucket(relative_position, num_buckets=32, max_distance=128):
    """
    Bucketizes relative positions for relative attention in the MPNet model.

    Args:
        relative_position (torch.Tensor): Tensor containing relative positions.
        num_buckets (int, optional): Number of buckets for bucketization. Defaults to 32.
        max_distance (int, optional): Maximum distance for bucketization. Defaults to 128.

    Returns:
        torch.Tensor: Bucketized relative positions.
    """
    ret = 0
    n = -relative_position

    num_buckets //= 2
    ret += (n < 0).to(mindspore.int64) * num_buckets
    n = ops.abs(n)

    max_exact = num_buckets // 2
    is_small = n < max_exact

    val_if_large = max_exact + (
        ops.log(n.float() / max_exact) / math.log(max_distance / max_exact) * (num_buckets - max_exact)
    ).to(mindspore.int64)

    val_if_large_np = np.minimum(val_if_large.asnumpy(), ops.full_like(val_if_large, num_buckets - 1).asnumpy())
    val_if_large = mindspore.Tensor(val_if_large_np)
    ret += ops.where(is_small, n, val_if_large)
    return ret

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForMaskedLM

Bases: MPNetPreTrainedModel

MPNet model for masked language modeling.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
class MPNetForMaskedLM(MPNetPreTrainedModel):
    """MPNet model for masked language modeling."""
    _tied_weights_keys = ["lm_head.decoder"]

    def __init__(self, config):
        """
        Initializes an instance of the MPNetForMaskedLM class.

        Args:
            self: The object itself.
            config (MPNetConfig): The configuration object that defines the model architecture and hyperparameters.

        Returns:
            None

        Raises:
            None.
        """
        super().__init__(config)

        self.mpnet = MPNetModel(config, add_pooling_layer=False)
        self.lm_head = MPNetLMHead(config)

        # Initialize weights and apply final processing
        self.post_init()

    def get_output_embeddings(self):
        """
        Retrieve the output embeddings from the decoder of the language model head.

        Args:
            self (MPNetForMaskedLM): An instance of the MPNetForMaskedLM class.
                Represents the model for Masked Language Modeling.

        Returns:
            None: The method returns the output embeddings from the decoder of the language model head.

        Raises:
            None.
        """
        return self.lm_head.decoder

    def set_output_embeddings(self, new_embeddings):
        """
        Set the output embeddings for MPNetForMaskedLM model.

        Args:
            self (MPNetForMaskedLM): The instance of the MPNetForMaskedLM class.
            new_embeddings (torch.nn.Module): The new embeddings to be set as the output embeddings for the model.

        Returns:
            None.

        Raises:
            None.
        """
        self.lm_head.decoder = new_embeddings

    def forward(
        self,
        input_ids: Optional[mindspore.Tensor] = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        head_mask: Optional[mindspore.Tensor] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        labels: Optional[mindspore.Tensor] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ) -> Union[Tuple[mindspore.Tensor], MaskedLMOutput]:
        r"""
        Args:
            labels (`mindspore.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
                Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
                config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
                loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.mpnet(
            input_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        sequence_output = outputs[0]
        prediction_scores = self.lm_head(sequence_output)

        masked_lm_loss = None
        if labels is not None:
            masked_lm_loss = ops.cross_entropy(prediction_scores.view(-1, self.config.vocab_size), labels.view(-1))

        if not return_dict:
            output = (prediction_scores,) + outputs[2:]
            return ((masked_lm_loss,) + output) if masked_lm_loss is not None else output

        return MaskedLMOutput(
            loss=masked_lm_loss,
            logits=prediction_scores,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForMaskedLM.__init__(config)

Initializes an instance of the MPNetForMaskedLM class.

PARAMETER DESCRIPTION
self

The object itself.

config

The configuration object that defines the model architecture and hyperparameters.

TYPE: MPNetConfig

RETURNS DESCRIPTION

None

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
def __init__(self, config):
    """
    Initializes an instance of the MPNetForMaskedLM class.

    Args:
        self: The object itself.
        config (MPNetConfig): The configuration object that defines the model architecture and hyperparameters.

    Returns:
        None

    Raises:
        None.
    """
    super().__init__(config)

    self.mpnet = MPNetModel(config, add_pooling_layer=False)
    self.lm_head = MPNetLMHead(config)

    # Initialize weights and apply final processing
    self.post_init()

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForMaskedLM.forward(input_ids=None, attention_mask=None, position_ids=None, head_mask=None, inputs_embeds=None, labels=None, output_attentions=None, output_hidden_states=None, return_dict=None)

PARAMETER DESCRIPTION
labels

Labels for computing the masked language modeling loss. Indices should be in [-100, 0, ..., config.vocab_size] (see input_ids docstring) Tokens with indices set to -100 are ignored (masked), the loss is only computed for the tokens with labels in [0, ..., config.vocab_size]

TYPE: `mindspore.Tensor` of shape `(batch_size, sequence_length)`, *optional* DEFAULT: None

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
def forward(
    self,
    input_ids: Optional[mindspore.Tensor] = None,
    attention_mask: Optional[mindspore.Tensor] = None,
    position_ids: Optional[mindspore.Tensor] = None,
    head_mask: Optional[mindspore.Tensor] = None,
    inputs_embeds: Optional[mindspore.Tensor] = None,
    labels: Optional[mindspore.Tensor] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple[mindspore.Tensor], MaskedLMOutput]:
    r"""
    Args:
        labels (`mindspore.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
    """
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    outputs = self.mpnet(
        input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
        head_mask=head_mask,
        inputs_embeds=inputs_embeds,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
    )

    sequence_output = outputs[0]
    prediction_scores = self.lm_head(sequence_output)

    masked_lm_loss = None
    if labels is not None:
        masked_lm_loss = ops.cross_entropy(prediction_scores.view(-1, self.config.vocab_size), labels.view(-1))

    if not return_dict:
        output = (prediction_scores,) + outputs[2:]
        return ((masked_lm_loss,) + output) if masked_lm_loss is not None else output

    return MaskedLMOutput(
        loss=masked_lm_loss,
        logits=prediction_scores,
        hidden_states=outputs.hidden_states,
        attentions=outputs.attentions,
    )

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForMaskedLM.get_output_embeddings()

Retrieve the output embeddings from the decoder of the language model head.

PARAMETER DESCRIPTION
self

An instance of the MPNetForMaskedLM class. Represents the model for Masked Language Modeling.

TYPE: MPNetForMaskedLM

RETURNS DESCRIPTION
None

The method returns the output embeddings from the decoder of the language model head.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
def get_output_embeddings(self):
    """
    Retrieve the output embeddings from the decoder of the language model head.

    Args:
        self (MPNetForMaskedLM): An instance of the MPNetForMaskedLM class.
            Represents the model for Masked Language Modeling.

    Returns:
        None: The method returns the output embeddings from the decoder of the language model head.

    Raises:
        None.
    """
    return self.lm_head.decoder

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForMaskedLM.set_output_embeddings(new_embeddings)

Set the output embeddings for MPNetForMaskedLM model.

PARAMETER DESCRIPTION
self

The instance of the MPNetForMaskedLM class.

TYPE: MPNetForMaskedLM

new_embeddings

The new embeddings to be set as the output embeddings for the model.

TYPE: Module

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
def set_output_embeddings(self, new_embeddings):
    """
    Set the output embeddings for MPNetForMaskedLM model.

    Args:
        self (MPNetForMaskedLM): The instance of the MPNetForMaskedLM class.
        new_embeddings (torch.nn.Module): The new embeddings to be set as the output embeddings for the model.

    Returns:
        None.

    Raises:
        None.
    """
    self.lm_head.decoder = new_embeddings

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForMultipleChoice

Bases: MPNetPreTrainedModel

MPNet model for multiple choice tasks.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
class MPNetForMultipleChoice(MPNetPreTrainedModel):
    """MPNet model for multiple choice tasks."""
    def __init__(self, config):
        """
        Initializes an instance of the MPNetForMultipleChoice class.

        Args:
            self (MPNetForMultipleChoice): An instance of the MPNetForMultipleChoice class.
            config (object): The configuration object for the MPNetModel.

        Returns:
            None

        Raises:
            None
        """
        super().__init__(config)

        self.mpnet = MPNetModel(config)
        self.dropout = nn.Dropout(p=config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, 1)

        # Initialize weights and apply final processing
        self.post_init()

    def forward(
        self,
        input_ids: Optional[mindspore.Tensor] = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        head_mask: Optional[mindspore.Tensor] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        labels: Optional[mindspore.Tensor] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ) -> Union[Tuple[mindspore.Tensor], MultipleChoiceModelOutput]:
        r"""
        Args:
            labels (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
                Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
                num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
                `input_ids` above)
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
        num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]

        flat_input_ids = input_ids.view(-1, input_ids.shape[-1]) if input_ids is not None else None
        flat_position_ids = position_ids.view(-1, position_ids.shape[-1]) if position_ids is not None else None
        flat_attention_mask = attention_mask.view(-1, attention_mask.shape[-1]) if attention_mask is not None else None
        flat_inputs_embeds = (
            inputs_embeds.view(-1, inputs_embeds.shape[-2], inputs_embeds.shape[-1])
            if inputs_embeds is not None
            else None
        )

        outputs = self.mpnet(
            flat_input_ids,
            position_ids=flat_position_ids,
            attention_mask=flat_attention_mask,
            head_mask=head_mask,
            inputs_embeds=flat_inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
        pooled_output = outputs[1]

        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        reshaped_logits = logits.view(-1, num_choices)

        loss = None
        if labels is not None:
            loss = ops.cross_entropy(reshaped_logits, labels)

        if not return_dict:
            output = (reshaped_logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return MultipleChoiceModelOutput(
            loss=loss,
            logits=reshaped_logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForMultipleChoice.__init__(config)

Initializes an instance of the MPNetForMultipleChoice class.

PARAMETER DESCRIPTION
self

An instance of the MPNetForMultipleChoice class.

TYPE: MPNetForMultipleChoice

config

The configuration object for the MPNetModel.

TYPE: object

RETURNS DESCRIPTION

None

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
def __init__(self, config):
    """
    Initializes an instance of the MPNetForMultipleChoice class.

    Args:
        self (MPNetForMultipleChoice): An instance of the MPNetForMultipleChoice class.
        config (object): The configuration object for the MPNetModel.

    Returns:
        None

    Raises:
        None
    """
    super().__init__(config)

    self.mpnet = MPNetModel(config)
    self.dropout = nn.Dropout(p=config.hidden_dropout_prob)
    self.classifier = nn.Linear(config.hidden_size, 1)

    # Initialize weights and apply final processing
    self.post_init()

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForMultipleChoice.forward(input_ids=None, attention_mask=None, position_ids=None, head_mask=None, inputs_embeds=None, labels=None, output_attentions=None, output_hidden_states=None, return_dict=None)

PARAMETER DESCRIPTION
labels

Labels for computing the multiple choice classification loss. Indices should be in [0, ..., num_choices-1] where num_choices is the size of the second dimension of the input tensors. (See input_ids above)

TYPE: `mindspore.Tensor` of shape `(batch_size,)`, *optional* DEFAULT: None

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
def forward(
    self,
    input_ids: Optional[mindspore.Tensor] = None,
    attention_mask: Optional[mindspore.Tensor] = None,
    position_ids: Optional[mindspore.Tensor] = None,
    head_mask: Optional[mindspore.Tensor] = None,
    inputs_embeds: Optional[mindspore.Tensor] = None,
    labels: Optional[mindspore.Tensor] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple[mindspore.Tensor], MultipleChoiceModelOutput]:
    r"""
    Args:
        labels (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
    """
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict
    num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]

    flat_input_ids = input_ids.view(-1, input_ids.shape[-1]) if input_ids is not None else None
    flat_position_ids = position_ids.view(-1, position_ids.shape[-1]) if position_ids is not None else None
    flat_attention_mask = attention_mask.view(-1, attention_mask.shape[-1]) if attention_mask is not None else None
    flat_inputs_embeds = (
        inputs_embeds.view(-1, inputs_embeds.shape[-2], inputs_embeds.shape[-1])
        if inputs_embeds is not None
        else None
    )

    outputs = self.mpnet(
        flat_input_ids,
        position_ids=flat_position_ids,
        attention_mask=flat_attention_mask,
        head_mask=head_mask,
        inputs_embeds=flat_inputs_embeds,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
    )
    pooled_output = outputs[1]

    pooled_output = self.dropout(pooled_output)
    logits = self.classifier(pooled_output)
    reshaped_logits = logits.view(-1, num_choices)

    loss = None
    if labels is not None:
        loss = ops.cross_entropy(reshaped_logits, labels)

    if not return_dict:
        output = (reshaped_logits,) + outputs[2:]
        return ((loss,) + output) if loss is not None else output

    return MultipleChoiceModelOutput(
        loss=loss,
        logits=reshaped_logits,
        hidden_states=outputs.hidden_states,
        attentions=outputs.attentions,
    )

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForQuestionAnswering

Bases: MPNetPreTrainedModel

MPNet model for question answering tasks.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
class MPNetForQuestionAnswering(MPNetPreTrainedModel):
    """MPNet model for question answering tasks."""
    def __init__(self, config):
        """
        Initialize the MPNetForQuestionAnswering class.

        Args:
            self (object): The instance of the MPNetForQuestionAnswering class.
            config (object):
                An object containing configuration settings for the model.

                - Type: Custom class object
                - Purpose: Specifies the configuration parameters for the model initialization.
                - Restrictions: Must contain the 'num_labels' attribute.

        Returns:
            None.

        Raises:
            AttributeError: If the 'config' object does not have the 'num_labels' attribute.
            TypeError: If the 'config' parameter is not of the expected type.
        """
        super().__init__(config)

        self.num_labels = config.num_labels
        self.mpnet = MPNetModel(config, add_pooling_layer=False)
        self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)

        # Initialize weights and apply final processing
        self.post_init()

    def forward(
        self,
        input_ids: Optional[mindspore.Tensor] = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        head_mask: Optional[mindspore.Tensor] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        start_positions: Optional[mindspore.Tensor] = None,
        end_positions: Optional[mindspore.Tensor] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ) -> Union[Tuple[mindspore.Tensor], QuestionAnsweringModelOutput]:
        r"""
        Args:
            start_positions (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
                Labels for position (index) of the start of the labelled span for computing the token classification loss.
                Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
                are not taken into account for computing the loss.
            end_positions (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
                Labels for position (index) of the end of the labelled span for computing the token classification loss.
                Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
                are not taken into account for computing the loss.
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.mpnet(
            input_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        sequence_output = outputs[0]

        logits = self.qa_outputs(sequence_output)
        start_logits, end_logits = logits.split(1, axis=-1)
        start_logits = start_logits.squeeze(-1)
        end_logits = end_logits.squeeze(-1)

        total_loss = None
        if start_positions is not None and end_positions is not None:
            # If we are on multi-GPU, split add a dimension
            if len(start_positions.shape) > 1:
                start_positions = start_positions.squeeze(-1)
            if len(end_positions.shape) > 1:
                end_positions = end_positions.squeeze(-1)
            # sometimes the start/end positions are outside our model inputs, we ignore these terms
            ignored_index = start_logits.shape[1]
            start_positions = start_positions.clamp(0, ignored_index)
            end_positions = end_positions.clamp(0, ignored_index)

            start_loss = ops.cross_entropy(start_logits, start_positions, ignore_index=ignored_index)
            end_loss = ops.cross_entropy(end_logits, end_positions, ignore_index=ignored_index)
            total_loss = (start_loss + end_loss) / 2

        if not return_dict:
            output = (start_logits, end_logits) + outputs[2:]
            return ((total_loss,) + output) if total_loss is not None else output

        return QuestionAnsweringModelOutput(
            loss=total_loss,
            start_logits=start_logits,
            end_logits=end_logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForQuestionAnswering.__init__(config)

Initialize the MPNetForQuestionAnswering class.

PARAMETER DESCRIPTION
self

The instance of the MPNetForQuestionAnswering class.

TYPE: object

config

An object containing configuration settings for the model.

  • Type: Custom class object
  • Purpose: Specifies the configuration parameters for the model initialization.
  • Restrictions: Must contain the 'num_labels' attribute.

TYPE: object

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
AttributeError

If the 'config' object does not have the 'num_labels' attribute.

TypeError

If the 'config' parameter is not of the expected type.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
def __init__(self, config):
    """
    Initialize the MPNetForQuestionAnswering class.

    Args:
        self (object): The instance of the MPNetForQuestionAnswering class.
        config (object):
            An object containing configuration settings for the model.

            - Type: Custom class object
            - Purpose: Specifies the configuration parameters for the model initialization.
            - Restrictions: Must contain the 'num_labels' attribute.

    Returns:
        None.

    Raises:
        AttributeError: If the 'config' object does not have the 'num_labels' attribute.
        TypeError: If the 'config' parameter is not of the expected type.
    """
    super().__init__(config)

    self.num_labels = config.num_labels
    self.mpnet = MPNetModel(config, add_pooling_layer=False)
    self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)

    # Initialize weights and apply final processing
    self.post_init()

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForQuestionAnswering.forward(input_ids=None, attention_mask=None, position_ids=None, head_mask=None, inputs_embeds=None, start_positions=None, end_positions=None, output_attentions=None, output_hidden_states=None, return_dict=None)

PARAMETER DESCRIPTION
start_positions

Labels for position (index) of the start of the labelled span for computing the token classification loss. Positions are clamped to the length of the sequence (sequence_length). Position outside of the sequence are not taken into account for computing the loss.

TYPE: `mindspore.Tensor` of shape `(batch_size,)`, *optional* DEFAULT: None

end_positions

Labels for position (index) of the end of the labelled span for computing the token classification loss. Positions are clamped to the length of the sequence (sequence_length). Position outside of the sequence are not taken into account for computing the loss.

TYPE: `mindspore.Tensor` of shape `(batch_size,)`, *optional* DEFAULT: None

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
def forward(
    self,
    input_ids: Optional[mindspore.Tensor] = None,
    attention_mask: Optional[mindspore.Tensor] = None,
    position_ids: Optional[mindspore.Tensor] = None,
    head_mask: Optional[mindspore.Tensor] = None,
    inputs_embeds: Optional[mindspore.Tensor] = None,
    start_positions: Optional[mindspore.Tensor] = None,
    end_positions: Optional[mindspore.Tensor] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple[mindspore.Tensor], QuestionAnsweringModelOutput]:
    r"""
    Args:
        start_positions (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
    """
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    outputs = self.mpnet(
        input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
        head_mask=head_mask,
        inputs_embeds=inputs_embeds,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
    )

    sequence_output = outputs[0]

    logits = self.qa_outputs(sequence_output)
    start_logits, end_logits = logits.split(1, axis=-1)
    start_logits = start_logits.squeeze(-1)
    end_logits = end_logits.squeeze(-1)

    total_loss = None
    if start_positions is not None and end_positions is not None:
        # If we are on multi-GPU, split add a dimension
        if len(start_positions.shape) > 1:
            start_positions = start_positions.squeeze(-1)
        if len(end_positions.shape) > 1:
            end_positions = end_positions.squeeze(-1)
        # sometimes the start/end positions are outside our model inputs, we ignore these terms
        ignored_index = start_logits.shape[1]
        start_positions = start_positions.clamp(0, ignored_index)
        end_positions = end_positions.clamp(0, ignored_index)

        start_loss = ops.cross_entropy(start_logits, start_positions, ignore_index=ignored_index)
        end_loss = ops.cross_entropy(end_logits, end_positions, ignore_index=ignored_index)
        total_loss = (start_loss + end_loss) / 2

    if not return_dict:
        output = (start_logits, end_logits) + outputs[2:]
        return ((total_loss,) + output) if total_loss is not None else output

    return QuestionAnsweringModelOutput(
        loss=total_loss,
        start_logits=start_logits,
        end_logits=end_logits,
        hidden_states=outputs.hidden_states,
        attentions=outputs.attentions,
    )

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForSequenceClassification

Bases: MPNetPreTrainedModel

MPNet model for sequence classification tasks.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
class MPNetForSequenceClassification(MPNetPreTrainedModel):
    """MPNet model for sequence classification tasks."""
    def __init__(self, config):
        """
        Initializes an instance of MPNetForSequenceClassification.

        Args:
            self (object): The instance of the class.
            config (object): The configuration object containing settings for the model initialization.
                Must contain the attribute 'num_labels' specifying the number of labels for classification.

        Returns:
            None.

        Raises:
            ValueError: If the 'config' object does not have the 'num_labels' attribute.
        """
        super().__init__(config)

        self.num_labels = config.num_labels
        self.mpnet = MPNetModel(config, add_pooling_layer=False)
        self.classifier = MPNetClassificationHead(config)

        # Initialize weights and apply final processing
        self.post_init()

    def forward(
        self,
        input_ids: Optional[mindspore.Tensor] = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        head_mask: Optional[mindspore.Tensor] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        labels: Optional[mindspore.Tensor] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ) -> Union[Tuple[mindspore.Tensor], SequenceClassifierOutput]:
        r"""
        Args:
            labels (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
                Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
                config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
                `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.mpnet(
            input_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
        sequence_output = outputs[0]
        logits = self.classifier(sequence_output)

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels > 1 and (labels.dtype == mindspore.int64 or labels.dtype == mindspore.int32):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = nn.MSELoss()
                if self.num_labels == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss = ops.cross_entropy(logits.view(-1, self.num_labels), labels.view(-1))
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = nn.BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForSequenceClassification.__init__(config)

Initializes an instance of MPNetForSequenceClassification.

PARAMETER DESCRIPTION
self

The instance of the class.

TYPE: object

config

The configuration object containing settings for the model initialization. Must contain the attribute 'num_labels' specifying the number of labels for classification.

TYPE: object

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
ValueError

If the 'config' object does not have the 'num_labels' attribute.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
def __init__(self, config):
    """
    Initializes an instance of MPNetForSequenceClassification.

    Args:
        self (object): The instance of the class.
        config (object): The configuration object containing settings for the model initialization.
            Must contain the attribute 'num_labels' specifying the number of labels for classification.

    Returns:
        None.

    Raises:
        ValueError: If the 'config' object does not have the 'num_labels' attribute.
    """
    super().__init__(config)

    self.num_labels = config.num_labels
    self.mpnet = MPNetModel(config, add_pooling_layer=False)
    self.classifier = MPNetClassificationHead(config)

    # Initialize weights and apply final processing
    self.post_init()

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForSequenceClassification.forward(input_ids=None, attention_mask=None, position_ids=None, head_mask=None, inputs_embeds=None, labels=None, output_attentions=None, output_hidden_states=None, return_dict=None)

PARAMETER DESCRIPTION
labels

Labels for computing the sequence classification/regression loss. Indices should be in [0, ..., config.num_labels - 1]. If config.num_labels == 1 a regression loss is computed (Mean-Square loss), If config.num_labels > 1 a classification loss is computed (Cross-Entropy).

TYPE: `mindspore.Tensor` of shape `(batch_size,)`, *optional* DEFAULT: None

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
def forward(
    self,
    input_ids: Optional[mindspore.Tensor] = None,
    attention_mask: Optional[mindspore.Tensor] = None,
    position_ids: Optional[mindspore.Tensor] = None,
    head_mask: Optional[mindspore.Tensor] = None,
    inputs_embeds: Optional[mindspore.Tensor] = None,
    labels: Optional[mindspore.Tensor] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple[mindspore.Tensor], SequenceClassifierOutput]:
    r"""
    Args:
        labels (`mindspore.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
    """
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    outputs = self.mpnet(
        input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
        head_mask=head_mask,
        inputs_embeds=inputs_embeds,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
    )
    sequence_output = outputs[0]
    logits = self.classifier(sequence_output)

    loss = None
    if labels is not None:
        if self.config.problem_type is None:
            if self.num_labels == 1:
                self.config.problem_type = "regression"
            elif self.num_labels > 1 and (labels.dtype == mindspore.int64 or labels.dtype == mindspore.int32):
                self.config.problem_type = "single_label_classification"
            else:
                self.config.problem_type = "multi_label_classification"

        if self.config.problem_type == "regression":
            loss_fct = nn.MSELoss()
            if self.num_labels == 1:
                loss = loss_fct(logits.squeeze(), labels.squeeze())
            else:
                loss = loss_fct(logits, labels)
        elif self.config.problem_type == "single_label_classification":
            loss = ops.cross_entropy(logits.view(-1, self.num_labels), labels.view(-1))
        elif self.config.problem_type == "multi_label_classification":
            loss_fct = nn.BCEWithLogitsLoss()
            loss = loss_fct(logits, labels)
    if not return_dict:
        output = (logits,) + outputs[2:]
        return ((loss,) + output) if loss is not None else output

    return SequenceClassifierOutput(
        loss=loss,
        logits=logits,
        hidden_states=outputs.hidden_states,
        attentions=outputs.attentions,
    )

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForTokenClassification

Bases: MPNetPreTrainedModel

MPNet model for token classification tasks.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
class MPNetForTokenClassification(MPNetPreTrainedModel):
    """MPNet model for token classification tasks."""
    def __init__(self, config):
        """
        Initializes a new instance of the MPNetForTokenClassification class.

        Args:
            self: An instance of the MPNetForTokenClassification class.
            config: An instance of the MPNetConfig class containing the configuration parameters for the model.

        Returns:
            None.

        Raises:
            None.
        """
        super().__init__(config)
        self.num_labels = config.num_labels

        self.mpnet = MPNetModel(config, add_pooling_layer=False)
        self.dropout = nn.Dropout(p=config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)

        # Initialize weights and apply final processing
        self.post_init()

    def forward(
        self,
        input_ids: Optional[mindspore.Tensor] = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        head_mask: Optional[mindspore.Tensor] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        labels: Optional[mindspore.Tensor] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ) -> Union[Tuple[mindspore.Tensor], TokenClassifierOutput]:
        r"""
        Args:
            labels (`mindspore.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
                Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.mpnet(
            input_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        sequence_output = outputs[0]

        sequence_output = self.dropout(sequence_output)
        logits = self.classifier(sequence_output)

        loss = None
        if labels is not None:
            loss = ops.cross_entropy(logits.view(-1, self.num_labels), labels.view(-1))

        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return TokenClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForTokenClassification.__init__(config)

Initializes a new instance of the MPNetForTokenClassification class.

PARAMETER DESCRIPTION
self

An instance of the MPNetForTokenClassification class.

config

An instance of the MPNetConfig class containing the configuration parameters for the model.

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
def __init__(self, config):
    """
    Initializes a new instance of the MPNetForTokenClassification class.

    Args:
        self: An instance of the MPNetForTokenClassification class.
        config: An instance of the MPNetConfig class containing the configuration parameters for the model.

    Returns:
        None.

    Raises:
        None.
    """
    super().__init__(config)
    self.num_labels = config.num_labels

    self.mpnet = MPNetModel(config, add_pooling_layer=False)
    self.dropout = nn.Dropout(p=config.hidden_dropout_prob)
    self.classifier = nn.Linear(config.hidden_size, config.num_labels)

    # Initialize weights and apply final processing
    self.post_init()

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetForTokenClassification.forward(input_ids=None, attention_mask=None, position_ids=None, head_mask=None, inputs_embeds=None, labels=None, output_attentions=None, output_hidden_states=None, return_dict=None)

PARAMETER DESCRIPTION
labels

Labels for computing the token classification loss. Indices should be in [0, ..., config.num_labels - 1].

TYPE: `mindspore.Tensor` of shape `(batch_size, sequence_length)`, *optional* DEFAULT: None

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
def forward(
    self,
    input_ids: Optional[mindspore.Tensor] = None,
    attention_mask: Optional[mindspore.Tensor] = None,
    position_ids: Optional[mindspore.Tensor] = None,
    head_mask: Optional[mindspore.Tensor] = None,
    inputs_embeds: Optional[mindspore.Tensor] = None,
    labels: Optional[mindspore.Tensor] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple[mindspore.Tensor], TokenClassifierOutput]:
    r"""
    Args:
        labels (`mindspore.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
    """
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    outputs = self.mpnet(
        input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
        head_mask=head_mask,
        inputs_embeds=inputs_embeds,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
    )

    sequence_output = outputs[0]

    sequence_output = self.dropout(sequence_output)
    logits = self.classifier(sequence_output)

    loss = None
    if labels is not None:
        loss = ops.cross_entropy(logits.view(-1, self.num_labels), labels.view(-1))

    if not return_dict:
        output = (logits,) + outputs[2:]
        return ((loss,) + output) if loss is not None else output

    return TokenClassifierOutput(
        loss=loss,
        logits=logits,
        hidden_states=outputs.hidden_states,
        attentions=outputs.attentions,
    )

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetIntermediate

Bases: Module

Copied from transformers.models.bert.modeling_bert.BertIntermediate

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
class MPNetIntermediate(nn.Module):
    """Copied from transformers.models.bert.modeling_bert.BertIntermediate"""
    def __init__(self, config):
        """
        Initializes an instance of the MPNetIntermediate class.

        Args:
            self: The instance of the MPNetIntermediate class.
            config:
                An object containing configuration parameters for the MPNetIntermediate instance.

                - Type: Any
                - Purpose: Contains configuration settings for the MPNetIntermediate instance.
                - Restrictions: None

        Returns:
            None

        Raises:
            TypeError: If the config parameter is not provided.
            ValueError: If the hidden activation function specified in the config is not supported.
        """
        super().__init__()
        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
        if isinstance(config.hidden_act, str):
            self.intermediate_act_fn = ACT2FN[config.hidden_act]
        else:
            self.intermediate_act_fn = config.hidden_act

    def forward(self, hidden_states: mindspore.Tensor) -> mindspore.Tensor:
        """
        Constructs the intermediate layer of the MPNet model.

        Args:
            self (MPNetIntermediate): The instance of the MPNetIntermediate class.
            hidden_states (mindspore.Tensor): The input tensor of shape (batch_size, sequence_length, hidden_size)
                representing the hidden states.

        Returns:
            mindspore.Tensor: The output tensor of shape (batch_size, sequence_length, hidden_size) containing
                the processed hidden states.

        Raises:
            TypeError: If the input 'hidden_states' is not a mindspore.Tensor.
            ValueError: If the shape of 'hidden_states' is not (batch_size, sequence_length, hidden_size).
        """
        hidden_states = self.dense(hidden_states)
        hidden_states = self.intermediate_act_fn(hidden_states)
        return hidden_states

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetIntermediate.__init__(config)

Initializes an instance of the MPNetIntermediate class.

PARAMETER DESCRIPTION
self

The instance of the MPNetIntermediate class.

config

An object containing configuration parameters for the MPNetIntermediate instance.

  • Type: Any
  • Purpose: Contains configuration settings for the MPNetIntermediate instance.
  • Restrictions: None

RETURNS DESCRIPTION

None

RAISES DESCRIPTION
TypeError

If the config parameter is not provided.

ValueError

If the hidden activation function specified in the config is not supported.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
def __init__(self, config):
    """
    Initializes an instance of the MPNetIntermediate class.

    Args:
        self: The instance of the MPNetIntermediate class.
        config:
            An object containing configuration parameters for the MPNetIntermediate instance.

            - Type: Any
            - Purpose: Contains configuration settings for the MPNetIntermediate instance.
            - Restrictions: None

    Returns:
        None

    Raises:
        TypeError: If the config parameter is not provided.
        ValueError: If the hidden activation function specified in the config is not supported.
    """
    super().__init__()
    self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
    if isinstance(config.hidden_act, str):
        self.intermediate_act_fn = ACT2FN[config.hidden_act]
    else:
        self.intermediate_act_fn = config.hidden_act

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetIntermediate.forward(hidden_states)

Constructs the intermediate layer of the MPNet model.

PARAMETER DESCRIPTION
self

The instance of the MPNetIntermediate class.

TYPE: MPNetIntermediate

hidden_states

The input tensor of shape (batch_size, sequence_length, hidden_size) representing the hidden states.

TYPE: Tensor

RETURNS DESCRIPTION
Tensor

mindspore.Tensor: The output tensor of shape (batch_size, sequence_length, hidden_size) containing the processed hidden states.

RAISES DESCRIPTION
TypeError

If the input 'hidden_states' is not a mindspore.Tensor.

ValueError

If the shape of 'hidden_states' is not (batch_size, sequence_length, hidden_size).

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
def forward(self, hidden_states: mindspore.Tensor) -> mindspore.Tensor:
    """
    Constructs the intermediate layer of the MPNet model.

    Args:
        self (MPNetIntermediate): The instance of the MPNetIntermediate class.
        hidden_states (mindspore.Tensor): The input tensor of shape (batch_size, sequence_length, hidden_size)
            representing the hidden states.

    Returns:
        mindspore.Tensor: The output tensor of shape (batch_size, sequence_length, hidden_size) containing
            the processed hidden states.

    Raises:
        TypeError: If the input 'hidden_states' is not a mindspore.Tensor.
        ValueError: If the shape of 'hidden_states' is not (batch_size, sequence_length, hidden_size).
    """
    hidden_states = self.dense(hidden_states)
    hidden_states = self.intermediate_act_fn(hidden_states)
    return hidden_states

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetLMHead

Bases: Module

MPNet Head for masked and permuted language modeling.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
class MPNetLMHead(nn.Module):
    """MPNet Head for masked and permuted language modeling."""
    def __init__(self, config):
        """
        This method initializes an instance of the MPNetLMHead class.

        Args:
            self: The instance of the MPNetLMHead class.
            config:
                An object containing configuration parameters for the MPNetLMHead model.

                - Type: Config object
                - Purpose: Specifies the configuration settings for the MPNetLMHead model.
                - Restrictions: Must be a valid configuration object.

        Returns:
            None

        Raises:
            ValueError: If the configuration object is invalid or missing required parameters.
            TypeError: If the configuration object is not of the expected type.
        """
        super().__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.layer_norm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)

        self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
        self.bias = mindspore.Parameter(ops.zeros(config.vocab_size))

        # Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
        self.decoder.bias = self.bias

    def forward(self, features, **kwargs):
        """
        This method forwards the output by processing the input features through various layers.

        Args:
            self (MPNetLMHead): Instance of the MPNetLMHead class.
            features (tensor): Input features to be processed. Expected to be a tensor data type.

        Returns:
            None: This method returns None after processing the input features through the defined layers.

        Raises:
            None.
        """
        x = self.dense(features)
        x = gelu(x)
        x = self.layer_norm(x)

        # project back to size of vocabulary with bias
        x = self.decoder(x)

        return x

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetLMHead.__init__(config)

This method initializes an instance of the MPNetLMHead class.

PARAMETER DESCRIPTION
self

The instance of the MPNetLMHead class.

config

An object containing configuration parameters for the MPNetLMHead model.

  • Type: Config object
  • Purpose: Specifies the configuration settings for the MPNetLMHead model.
  • Restrictions: Must be a valid configuration object.

RETURNS DESCRIPTION

None

RAISES DESCRIPTION
ValueError

If the configuration object is invalid or missing required parameters.

TypeError

If the configuration object is not of the expected type.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
def __init__(self, config):
    """
    This method initializes an instance of the MPNetLMHead class.

    Args:
        self: The instance of the MPNetLMHead class.
        config:
            An object containing configuration parameters for the MPNetLMHead model.

            - Type: Config object
            - Purpose: Specifies the configuration settings for the MPNetLMHead model.
            - Restrictions: Must be a valid configuration object.

    Returns:
        None

    Raises:
        ValueError: If the configuration object is invalid or missing required parameters.
        TypeError: If the configuration object is not of the expected type.
    """
    super().__init__()
    self.dense = nn.Linear(config.hidden_size, config.hidden_size)
    self.layer_norm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)

    self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
    self.bias = mindspore.Parameter(ops.zeros(config.vocab_size))

    # Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
    self.decoder.bias = self.bias

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetLMHead.forward(features, **kwargs)

This method forwards the output by processing the input features through various layers.

PARAMETER DESCRIPTION
self

Instance of the MPNetLMHead class.

TYPE: MPNetLMHead

features

Input features to be processed. Expected to be a tensor data type.

TYPE: tensor

RETURNS DESCRIPTION
None

This method returns None after processing the input features through the defined layers.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
def forward(self, features, **kwargs):
    """
    This method forwards the output by processing the input features through various layers.

    Args:
        self (MPNetLMHead): Instance of the MPNetLMHead class.
        features (tensor): Input features to be processed. Expected to be a tensor data type.

    Returns:
        None: This method returns None after processing the input features through the defined layers.

    Raises:
        None.
    """
    x = self.dense(features)
    x = gelu(x)
    x = self.layer_norm(x)

    # project back to size of vocabulary with bias
    x = self.decoder(x)

    return x

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetLayer

Bases: Module

Single layer in the MPNet model architecture.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
class MPNetLayer(nn.Module):
    """Single layer in the MPNet model architecture."""
    def __init__(self, config):
        """
        Initializes an instance of the MPNetLayer class.

        Args:
            self (MPNetLayer): The instance of the MPNetLayer class.
            config (object): The configuration object used to initialize the MPNetLayer.
                This object contains the settings and parameters required for the MPNetLayer.

        Returns:
            None.

        Raises:
            None.
        """
        super().__init__()
        self.attention = MPNetAttention(config)
        self.intermediate = MPNetIntermediate(config)
        self.output = MPNetOutput(config)

    def forward(
        self,
        hidden_states,
        attention_mask=None,
        head_mask=None,
        position_bias=None,
        output_attentions=False,
        **kwargs,
    ):
        """
        Constructs an MPNetLayer.

        Args:
            self (object): The object instance.
            hidden_states (tensor): The input hidden states of shape (batch_size, sequence_length, hidden_size).
            attention_mask (tensor, optional): The attention mask of shape (batch_size, sequence_length). Defaults to None.
            head_mask (tensor, optional): The head mask of shape (num_heads). Defaults to None.
            position_bias (tensor, optional): The position bias of shape (num_heads, sequence_length, sequence_length).
                Defaults to None.
            output_attentions (bool, optional): Whether to output attentions. Defaults to False.

        Returns:
            tuple: A tuple containing layer_output of shape (batch_size, sequence_length, hidden_size) and
                additional optional outputs.

        Raises:
            ValueError: If the input dimensions are invalid or incompatible.
            TypeError: If the input types are incorrect.
            RuntimeError: If there is a runtime error during the execution of the method.
            """
        self_attention_outputs = self.attention(
            hidden_states,
            attention_mask,
            head_mask,
            position_bias=position_bias,
            output_attentions=output_attentions,
        )
        attention_output = self_attention_outputs[0]
        outputs = self_attention_outputs[1:]  # add self attentions if we output attention weights

        intermediate_output = self.intermediate(attention_output)
        layer_output = self.output(intermediate_output, attention_output)
        outputs = (layer_output,) + outputs
        return outputs

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetLayer.__init__(config)

Initializes an instance of the MPNetLayer class.

PARAMETER DESCRIPTION
self

The instance of the MPNetLayer class.

TYPE: MPNetLayer

config

The configuration object used to initialize the MPNetLayer. This object contains the settings and parameters required for the MPNetLayer.

TYPE: object

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
def __init__(self, config):
    """
    Initializes an instance of the MPNetLayer class.

    Args:
        self (MPNetLayer): The instance of the MPNetLayer class.
        config (object): The configuration object used to initialize the MPNetLayer.
            This object contains the settings and parameters required for the MPNetLayer.

    Returns:
        None.

    Raises:
        None.
    """
    super().__init__()
    self.attention = MPNetAttention(config)
    self.intermediate = MPNetIntermediate(config)
    self.output = MPNetOutput(config)

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetLayer.forward(hidden_states, attention_mask=None, head_mask=None, position_bias=None, output_attentions=False, **kwargs)

Constructs an MPNetLayer.

PARAMETER DESCRIPTION
self

The object instance.

TYPE: object

hidden_states

The input hidden states of shape (batch_size, sequence_length, hidden_size).

TYPE: tensor

attention_mask

The attention mask of shape (batch_size, sequence_length). Defaults to None.

TYPE: tensor DEFAULT: None

head_mask

The head mask of shape (num_heads). Defaults to None.

TYPE: tensor DEFAULT: None

position_bias

The position bias of shape (num_heads, sequence_length, sequence_length). Defaults to None.

TYPE: tensor DEFAULT: None

output_attentions

Whether to output attentions. Defaults to False.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
tuple

A tuple containing layer_output of shape (batch_size, sequence_length, hidden_size) and additional optional outputs.

RAISES DESCRIPTION
ValueError

If the input dimensions are invalid or incompatible.

TypeError

If the input types are incorrect.

RuntimeError

If there is a runtime error during the execution of the method.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
def forward(
    self,
    hidden_states,
    attention_mask=None,
    head_mask=None,
    position_bias=None,
    output_attentions=False,
    **kwargs,
):
    """
    Constructs an MPNetLayer.

    Args:
        self (object): The object instance.
        hidden_states (tensor): The input hidden states of shape (batch_size, sequence_length, hidden_size).
        attention_mask (tensor, optional): The attention mask of shape (batch_size, sequence_length). Defaults to None.
        head_mask (tensor, optional): The head mask of shape (num_heads). Defaults to None.
        position_bias (tensor, optional): The position bias of shape (num_heads, sequence_length, sequence_length).
            Defaults to None.
        output_attentions (bool, optional): Whether to output attentions. Defaults to False.

    Returns:
        tuple: A tuple containing layer_output of shape (batch_size, sequence_length, hidden_size) and
            additional optional outputs.

    Raises:
        ValueError: If the input dimensions are invalid or incompatible.
        TypeError: If the input types are incorrect.
        RuntimeError: If there is a runtime error during the execution of the method.
        """
    self_attention_outputs = self.attention(
        hidden_states,
        attention_mask,
        head_mask,
        position_bias=position_bias,
        output_attentions=output_attentions,
    )
    attention_output = self_attention_outputs[0]
    outputs = self_attention_outputs[1:]  # add self attentions if we output attention weights

    intermediate_output = self.intermediate(attention_output)
    layer_output = self.output(intermediate_output, attention_output)
    outputs = (layer_output,) + outputs
    return outputs

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetModel

Bases: MPNetPreTrainedModel

MPNet model architecture.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
class MPNetModel(MPNetPreTrainedModel):
    """MPNet model architecture."""
    def __init__(self, config, add_pooling_layer=True):
        """
        Initializes an instance of the MPNetModel class.

        Args:
            self: The instance of the class.
            config (dict): A dictionary containing the configuration parameters for the model.
            add_pooling_layer (bool): A flag indicating whether to include a pooling layer in the model. Defaults to True.

        Returns:
            None.

        Raises:
            None.
        """
        super().__init__(config)
        self.config = config

        self.embeddings = MPNetEmbeddings(config)
        self.encoder = MPNetEncoder(config)
        self.pooler = MPNetPooler(config) if add_pooling_layer else None

        # Initialize weights and apply final processing
        self.post_init()

    def get_input_embeddings(self):
        """
        This method retrieves the input embeddings from the MPNetModel.

        Args:
            self: An instance of the MPNetModel class.

        Returns:
            None: The method returns the input embeddings from the MPNetModel.

        Raises:
            None.
        """
        return self.embeddings.word_embeddings

    def set_input_embeddings(self, value):
        """
        Method to set the input embeddings in the MPNetModel class.

        Args:
            self (MPNetModel): The instance of the MPNetModel class.
            value: The input value representing the embeddings to be set for the model.
                It should be compatible with the expected format for word embeddings.

        Returns:
            None.

        Raises:
            None.
        """
        self.embeddings.word_embeddings = value

    def _prune_heads(self, heads_to_prune):
        """
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        """
        for layer, heads in heads_to_prune.items():
            self.encoder.layer[layer].attention.prune_heads(heads)

    def forward(
        self,
        input_ids: Optional[mindspore.Tensor] = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        head_mask: Optional[mindspore.Tensor] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
        **kwargs,
    ) -> Union[Tuple[mindspore.Tensor], BaseModelOutputWithPooling]:
        """
        Constructs the MPNet model.

        Args:
            self (MPNetModel): The instance of the MPNetModel class.
            input_ids (Optional[mindspore.Tensor]): The input tensor containing the indices of input sequence tokens.
            attention_mask (Optional[mindspore.Tensor]): The optional attention mask tensor specifying which tokens
                should be attended to.
            position_ids (Optional[mindspore.Tensor]): The optional input tensor containing the position indices
                of each input token.
            head_mask (Optional[mindspore.Tensor]): The optional tensor specifying which heads should be masked in
                the self-attention layers.
            inputs_embeds (Optional[mindspore.Tensor]): The optional input tensor containing the embeddings of
                each input token.
            output_attentions (Optional[bool]): Whether to return the attentions.
            output_hidden_states (Optional[bool]): Whether to return the hidden states.
            return_dict (Optional[bool]): Whether to return the output as a dictionary.

        Returns:
            Union[Tuple[mindspore.Tensor], BaseModelOutputWithPooling]:
                The output of the MPNet model.

                - If `return_dict` is `False`, a tuple containing the following elements is returned:

                    - sequence_output (mindspore.Tensor): The output tensor of the encoder.
                    - pooled_output (mindspore.Tensor): The pooled output tensor.
                    - hidden_states (Tuple[mindspore.Tensor]): The hidden states of all layers.
                    - attentions (Tuple[mindspore.Tensor]): The attentions of all layers.

                - If `return_dict` is `True`, an instance of BaseModelOutputWithPooling is returned,
                which contains the following attributes:

                    - last_hidden_state (mindspore.Tensor): The output tensor of the encoder.
                    - pooler_output (mindspore.Tensor): The pooled output tensor.
                    - hidden_states (Tuple[mindspore.Tensor]): The hidden states of all layers.
                    - attentions (Tuple[mindspore.Tensor]): The attentions of all layers.

        Raises:
            ValueError: If both `input_ids` and `inputs_embeds` are provided simultaneously.
            ValueError: If neither `input_ids` nor `inputs_embeds` are provided.
            ValueError: If the dimensions of `input_ids` and `attention_mask` do not match.
        """
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
        )
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        if input_ids is not None and inputs_embeds is not None:
            raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
        elif input_ids is not None:
            self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
            input_shape = input_ids.shape
        elif inputs_embeds is not None:
            input_shape = inputs_embeds.shape[:-1]
        else:
            raise ValueError("You have to specify either input_ids or inputs_embeds")

        if attention_mask is None:
            attention_mask = ops.ones(input_shape)
        extended_attention_mask: mindspore.Tensor = self.get_extended_attention_mask(attention_mask, input_shape)

        head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)
        embedding_output = self.embeddings(input_ids=input_ids, position_ids=position_ids, inputs_embeds=inputs_embeds)
        encoder_outputs = self.encoder(
            embedding_output,
            attention_mask=extended_attention_mask,
            head_mask=head_mask,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
        sequence_output = encoder_outputs[0]
        pooled_output = self.pooler(sequence_output) if self.pooler is not None else None

        if not return_dict:
            return (sequence_output, pooled_output) + encoder_outputs[1:]

        return BaseModelOutputWithPooling(
            last_hidden_state=sequence_output,
            pooler_output=pooled_output,
            hidden_states=encoder_outputs.hidden_states,
            attentions=encoder_outputs.attentions,
        )

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetModel.__init__(config, add_pooling_layer=True)

Initializes an instance of the MPNetModel class.

PARAMETER DESCRIPTION
self

The instance of the class.

config

A dictionary containing the configuration parameters for the model.

TYPE: dict

add_pooling_layer

A flag indicating whether to include a pooling layer in the model. Defaults to True.

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
def __init__(self, config, add_pooling_layer=True):
    """
    Initializes an instance of the MPNetModel class.

    Args:
        self: The instance of the class.
        config (dict): A dictionary containing the configuration parameters for the model.
        add_pooling_layer (bool): A flag indicating whether to include a pooling layer in the model. Defaults to True.

    Returns:
        None.

    Raises:
        None.
    """
    super().__init__(config)
    self.config = config

    self.embeddings = MPNetEmbeddings(config)
    self.encoder = MPNetEncoder(config)
    self.pooler = MPNetPooler(config) if add_pooling_layer else None

    # Initialize weights and apply final processing
    self.post_init()

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetModel.forward(input_ids=None, attention_mask=None, position_ids=None, head_mask=None, inputs_embeds=None, output_attentions=None, output_hidden_states=None, return_dict=None, **kwargs)

Constructs the MPNet model.

PARAMETER DESCRIPTION
self

The instance of the MPNetModel class.

TYPE: MPNetModel

input_ids

The input tensor containing the indices of input sequence tokens.

TYPE: Optional[Tensor] DEFAULT: None

attention_mask

The optional attention mask tensor specifying which tokens should be attended to.

TYPE: Optional[Tensor] DEFAULT: None

position_ids

The optional input tensor containing the position indices of each input token.

TYPE: Optional[Tensor] DEFAULT: None

head_mask

The optional tensor specifying which heads should be masked in the self-attention layers.

TYPE: Optional[Tensor] DEFAULT: None

inputs_embeds

The optional input tensor containing the embeddings of each input token.

TYPE: Optional[Tensor] DEFAULT: None

output_attentions

Whether to return the attentions.

TYPE: Optional[bool] DEFAULT: None

output_hidden_states

Whether to return the hidden states.

TYPE: Optional[bool] DEFAULT: None

return_dict

Whether to return the output as a dictionary.

TYPE: Optional[bool] DEFAULT: None

RETURNS DESCRIPTION
Union[Tuple[Tensor], BaseModelOutputWithPooling]

Union[Tuple[mindspore.Tensor], BaseModelOutputWithPooling]: The output of the MPNet model.

  • If return_dict is False, a tuple containing the following elements is returned:

    • sequence_output (mindspore.Tensor): The output tensor of the encoder.
    • pooled_output (mindspore.Tensor): The pooled output tensor.
    • hidden_states (Tuple[mindspore.Tensor]): The hidden states of all layers.
    • attentions (Tuple[mindspore.Tensor]): The attentions of all layers.
  • If return_dict is True, an instance of BaseModelOutputWithPooling is returned, which contains the following attributes:

    • last_hidden_state (mindspore.Tensor): The output tensor of the encoder.
    • pooler_output (mindspore.Tensor): The pooled output tensor.
    • hidden_states (Tuple[mindspore.Tensor]): The hidden states of all layers.
    • attentions (Tuple[mindspore.Tensor]): The attentions of all layers.
RAISES DESCRIPTION
ValueError

If both input_ids and inputs_embeds are provided simultaneously.

ValueError

If neither input_ids nor inputs_embeds are provided.

ValueError

If the dimensions of input_ids and attention_mask do not match.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
def forward(
    self,
    input_ids: Optional[mindspore.Tensor] = None,
    attention_mask: Optional[mindspore.Tensor] = None,
    position_ids: Optional[mindspore.Tensor] = None,
    head_mask: Optional[mindspore.Tensor] = None,
    inputs_embeds: Optional[mindspore.Tensor] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    return_dict: Optional[bool] = None,
    **kwargs,
) -> Union[Tuple[mindspore.Tensor], BaseModelOutputWithPooling]:
    """
    Constructs the MPNet model.

    Args:
        self (MPNetModel): The instance of the MPNetModel class.
        input_ids (Optional[mindspore.Tensor]): The input tensor containing the indices of input sequence tokens.
        attention_mask (Optional[mindspore.Tensor]): The optional attention mask tensor specifying which tokens
            should be attended to.
        position_ids (Optional[mindspore.Tensor]): The optional input tensor containing the position indices
            of each input token.
        head_mask (Optional[mindspore.Tensor]): The optional tensor specifying which heads should be masked in
            the self-attention layers.
        inputs_embeds (Optional[mindspore.Tensor]): The optional input tensor containing the embeddings of
            each input token.
        output_attentions (Optional[bool]): Whether to return the attentions.
        output_hidden_states (Optional[bool]): Whether to return the hidden states.
        return_dict (Optional[bool]): Whether to return the output as a dictionary.

    Returns:
        Union[Tuple[mindspore.Tensor], BaseModelOutputWithPooling]:
            The output of the MPNet model.

            - If `return_dict` is `False`, a tuple containing the following elements is returned:

                - sequence_output (mindspore.Tensor): The output tensor of the encoder.
                - pooled_output (mindspore.Tensor): The pooled output tensor.
                - hidden_states (Tuple[mindspore.Tensor]): The hidden states of all layers.
                - attentions (Tuple[mindspore.Tensor]): The attentions of all layers.

            - If `return_dict` is `True`, an instance of BaseModelOutputWithPooling is returned,
            which contains the following attributes:

                - last_hidden_state (mindspore.Tensor): The output tensor of the encoder.
                - pooler_output (mindspore.Tensor): The pooled output tensor.
                - hidden_states (Tuple[mindspore.Tensor]): The hidden states of all layers.
                - attentions (Tuple[mindspore.Tensor]): The attentions of all layers.

    Raises:
        ValueError: If both `input_ids` and `inputs_embeds` are provided simultaneously.
        ValueError: If neither `input_ids` nor `inputs_embeds` are provided.
        ValueError: If the dimensions of `input_ids` and `attention_mask` do not match.
    """
    output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
    output_hidden_states = (
        output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
    )
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    if input_ids is not None and inputs_embeds is not None:
        raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
    elif input_ids is not None:
        self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
        input_shape = input_ids.shape
    elif inputs_embeds is not None:
        input_shape = inputs_embeds.shape[:-1]
    else:
        raise ValueError("You have to specify either input_ids or inputs_embeds")

    if attention_mask is None:
        attention_mask = ops.ones(input_shape)
    extended_attention_mask: mindspore.Tensor = self.get_extended_attention_mask(attention_mask, input_shape)

    head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)
    embedding_output = self.embeddings(input_ids=input_ids, position_ids=position_ids, inputs_embeds=inputs_embeds)
    encoder_outputs = self.encoder(
        embedding_output,
        attention_mask=extended_attention_mask,
        head_mask=head_mask,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
    )
    sequence_output = encoder_outputs[0]
    pooled_output = self.pooler(sequence_output) if self.pooler is not None else None

    if not return_dict:
        return (sequence_output, pooled_output) + encoder_outputs[1:]

    return BaseModelOutputWithPooling(
        last_hidden_state=sequence_output,
        pooler_output=pooled_output,
        hidden_states=encoder_outputs.hidden_states,
        attentions=encoder_outputs.attentions,
    )

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetModel.get_input_embeddings()

This method retrieves the input embeddings from the MPNetModel.

PARAMETER DESCRIPTION
self

An instance of the MPNetModel class.

RETURNS DESCRIPTION
None

The method returns the input embeddings from the MPNetModel.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
785
786
787
788
789
790
791
792
793
794
795
796
797
798
def get_input_embeddings(self):
    """
    This method retrieves the input embeddings from the MPNetModel.

    Args:
        self: An instance of the MPNetModel class.

    Returns:
        None: The method returns the input embeddings from the MPNetModel.

    Raises:
        None.
    """
    return self.embeddings.word_embeddings

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetModel.set_input_embeddings(value)

Method to set the input embeddings in the MPNetModel class.

PARAMETER DESCRIPTION
self

The instance of the MPNetModel class.

TYPE: MPNetModel

value

The input value representing the embeddings to be set for the model. It should be compatible with the expected format for word embeddings.

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
def set_input_embeddings(self, value):
    """
    Method to set the input embeddings in the MPNetModel class.

    Args:
        self (MPNetModel): The instance of the MPNetModel class.
        value: The input value representing the embeddings to be set for the model.
            It should be compatible with the expected format for word embeddings.

    Returns:
        None.

    Raises:
        None.
    """
    self.embeddings.word_embeddings = value

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetOutput

Bases: Module

Copied from transformers.models.bert.modeling_bert.BertOutput

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
class MPNetOutput(nn.Module):
    """Copied from transformers.models.bert.modeling_bert.BertOutput"""
    def __init__(self, config):
        """
        Initializes an instance of the MPNetOutput class.

        Args:
            self: The instance of the MPNetOutput class.
            config:
                An object containing configuration parameters.

                - Type: Any
                - Purpose: The configuration object specifying model settings.
                - Restrictions: Must be a valid configuration object.

        Returns:
            None.

        Raises:
            TypeError: If the config parameter is not of the expected type.
            ValueError: If the config parameter does not contain the required attributes.
        """
        super().__init__()
        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(p=config.hidden_dropout_prob)

    def forward(self, hidden_states: mindspore.Tensor, input_tensor: mindspore.Tensor) -> mindspore.Tensor:
        """
        Constructs the MPNetOutput.

        Args:
            self (MPNetOutput): An instance of the MPNetOutput class.
            hidden_states (mindspore.Tensor): A tensor containing the hidden states.
                This tensor is input to the dense layer, which performs a linear transformation on the hidden states.
                The shape of this tensor should be compatible with the dense layer's weight matrix.
            input_tensor (mindspore.Tensor): A tensor containing the input states.
                This tensor is added to the hidden states after the linear transformation and dropout.
                The shape of this tensor should be compatible with the hidden states tensor.

        Returns:
            mindspore.Tensor: A tensor representing the forwarded MPNetOutput.
                The shape of the tensor is the same as the hidden_states tensor.
                The forwarded MPNetOutput is obtained by applying the dense layer, dropout, and LayerNorm operations
                to the hidden states and adding the input tensor.

        Raises:
            None.
        """
        hidden_states = self.dense(hidden_states)
        hidden_states = self.dropout(hidden_states)
        hidden_states = self.LayerNorm(hidden_states + input_tensor)
        return hidden_states

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetOutput.__init__(config)

Initializes an instance of the MPNetOutput class.

PARAMETER DESCRIPTION
self

The instance of the MPNetOutput class.

config

An object containing configuration parameters.

  • Type: Any
  • Purpose: The configuration object specifying model settings.
  • Restrictions: Must be a valid configuration object.

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
TypeError

If the config parameter is not of the expected type.

ValueError

If the config parameter does not contain the required attributes.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
def __init__(self, config):
    """
    Initializes an instance of the MPNetOutput class.

    Args:
        self: The instance of the MPNetOutput class.
        config:
            An object containing configuration parameters.

            - Type: Any
            - Purpose: The configuration object specifying model settings.
            - Restrictions: Must be a valid configuration object.

    Returns:
        None.

    Raises:
        TypeError: If the config parameter is not of the expected type.
        ValueError: If the config parameter does not contain the required attributes.
    """
    super().__init__()
    self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
    self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
    self.dropout = nn.Dropout(p=config.hidden_dropout_prob)

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetOutput.forward(hidden_states, input_tensor)

Constructs the MPNetOutput.

PARAMETER DESCRIPTION
self

An instance of the MPNetOutput class.

TYPE: MPNetOutput

hidden_states

A tensor containing the hidden states. This tensor is input to the dense layer, which performs a linear transformation on the hidden states. The shape of this tensor should be compatible with the dense layer's weight matrix.

TYPE: Tensor

input_tensor

A tensor containing the input states. This tensor is added to the hidden states after the linear transformation and dropout. The shape of this tensor should be compatible with the hidden states tensor.

TYPE: Tensor

RETURNS DESCRIPTION
Tensor

mindspore.Tensor: A tensor representing the forwarded MPNetOutput. The shape of the tensor is the same as the hidden_states tensor. The forwarded MPNetOutput is obtained by applying the dense layer, dropout, and LayerNorm operations to the hidden states and adding the input tensor.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
def forward(self, hidden_states: mindspore.Tensor, input_tensor: mindspore.Tensor) -> mindspore.Tensor:
    """
    Constructs the MPNetOutput.

    Args:
        self (MPNetOutput): An instance of the MPNetOutput class.
        hidden_states (mindspore.Tensor): A tensor containing the hidden states.
            This tensor is input to the dense layer, which performs a linear transformation on the hidden states.
            The shape of this tensor should be compatible with the dense layer's weight matrix.
        input_tensor (mindspore.Tensor): A tensor containing the input states.
            This tensor is added to the hidden states after the linear transformation and dropout.
            The shape of this tensor should be compatible with the hidden states tensor.

    Returns:
        mindspore.Tensor: A tensor representing the forwarded MPNetOutput.
            The shape of the tensor is the same as the hidden_states tensor.
            The forwarded MPNetOutput is obtained by applying the dense layer, dropout, and LayerNorm operations
            to the hidden states and adding the input tensor.

    Raises:
        None.
    """
    hidden_states = self.dense(hidden_states)
    hidden_states = self.dropout(hidden_states)
    hidden_states = self.LayerNorm(hidden_states + input_tensor)
    return hidden_states

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetPooler

Bases: Module

Copied from transformers.models.bert.modeling_bert.BertPooler

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
class MPNetPooler(nn.Module):
    """Copied from transformers.models.bert.modeling_bert.BertPooler"""
    def __init__(self, config):
        """
        Initializes an instance of the MPNetPooler class.

        Args:
            self (MPNetPooler): The current instance of the MPNetPooler class.
            config (object): The configuration object containing parameters for initializing the MPNetPooler.
                The config object should have a 'hidden_size' attribute indicating the size of the hidden layer.

        Returns:
            None.

        Raises:
            None.
        """
        super().__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.activation = nn.Tanh()

    def forward(self, hidden_states: mindspore.Tensor) -> mindspore.Tensor:
        """
        This method forwards a pooled output from the hidden states of the MPNet model.

        Args:
            self: The instance of the MPNetPooler class.
            hidden_states (mindspore.Tensor): A tensor containing the hidden states of the MPNet model.
                It is expected to have a shape of (batch_size, sequence_length, hidden_size), where batch_size is the
                batch size, sequence_length is the length of the input sequence, and hidden_size is the size of the
                hidden state.

        Returns:
            mindspore.Tensor: The pooled output tensor generated from the hidden states.
                It has a shape of (batch_size, hidden_size).

        Raises:
            None.
        """
        # We "pool" the model by simply taking the hidden state corresponding
        # to the first token.
        first_token_tensor = hidden_states[:, 0]
        pooled_output = self.dense(first_token_tensor)
        pooled_output = self.activation(pooled_output)
        return pooled_output

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetPooler.__init__(config)

Initializes an instance of the MPNetPooler class.

PARAMETER DESCRIPTION
self

The current instance of the MPNetPooler class.

TYPE: MPNetPooler

config

The configuration object containing parameters for initializing the MPNetPooler. The config object should have a 'hidden_size' attribute indicating the size of the hidden layer.

TYPE: object

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
def __init__(self, config):
    """
    Initializes an instance of the MPNetPooler class.

    Args:
        self (MPNetPooler): The current instance of the MPNetPooler class.
        config (object): The configuration object containing parameters for initializing the MPNetPooler.
            The config object should have a 'hidden_size' attribute indicating the size of the hidden layer.

    Returns:
        None.

    Raises:
        None.
    """
    super().__init__()
    self.dense = nn.Linear(config.hidden_size, config.hidden_size)
    self.activation = nn.Tanh()

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetPooler.forward(hidden_states)

This method forwards a pooled output from the hidden states of the MPNet model.

PARAMETER DESCRIPTION
self

The instance of the MPNetPooler class.

hidden_states

A tensor containing the hidden states of the MPNet model. It is expected to have a shape of (batch_size, sequence_length, hidden_size), where batch_size is the batch size, sequence_length is the length of the input sequence, and hidden_size is the size of the hidden state.

TYPE: Tensor

RETURNS DESCRIPTION
Tensor

mindspore.Tensor: The pooled output tensor generated from the hidden states. It has a shape of (batch_size, hidden_size).

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
def forward(self, hidden_states: mindspore.Tensor) -> mindspore.Tensor:
    """
    This method forwards a pooled output from the hidden states of the MPNet model.

    Args:
        self: The instance of the MPNetPooler class.
        hidden_states (mindspore.Tensor): A tensor containing the hidden states of the MPNet model.
            It is expected to have a shape of (batch_size, sequence_length, hidden_size), where batch_size is the
            batch size, sequence_length is the length of the input sequence, and hidden_size is the size of the
            hidden state.

    Returns:
        mindspore.Tensor: The pooled output tensor generated from the hidden states.
            It has a shape of (batch_size, hidden_size).

    Raises:
        None.
    """
    # We "pool" the model by simply taking the hidden state corresponding
    # to the first token.
    first_token_tensor = hidden_states[:, 0]
    pooled_output = self.dense(first_token_tensor)
    pooled_output = self.activation(pooled_output)
    return pooled_output

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetPreTrainedModel

Bases: PreTrainedModel

An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained models.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
class MPNetPreTrainedModel(PreTrainedModel):
    """
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    """
    config_class = MPNetConfig
    pretrained_model_archive_map = MPNET_PRETRAINED_MODEL_ARCHIVE_LIST
    base_model_prefix = "mpnet"

    def _init_weights(self, cell):
        """Initialize the weights"""
        if isinstance(cell, nn.Linear):
            # Slightly different from the TF version which uses truncated_normal for initialization
            # cf https://github.com/pytorch/pytorch/pull/5617
            cell.weight.set_data(initializer(Normal(self.config.initializer_range),
                                                    cell.weight.shape, cell.weight.dtype))
            if cell.bias:
                cell.bias.set_data(initializer('zeros', cell.bias.shape, cell.bias.dtype))
        elif isinstance(cell, nn.Embedding):
            weight = np.random.normal(0.0, self.config.initializer_range, cell.weight.shape)
            if cell.padding_idx:
                weight[cell.padding_idx] = 0

            cell.weight.set_data(mindspore.Tensor(weight, cell.weight.dtype))
        elif isinstance(cell, nn.LayerNorm):
            cell.weight.set_data(initializer('ones', cell.weight.shape, cell.weight.dtype))
            cell.bias.set_data(initializer('zeros', cell.bias.shape, cell.bias.dtype))

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetSelfAttention

Bases: Module

SelfAttention Model

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
class MPNetSelfAttention(nn.Module):
    """SelfAttention Model"""
    def __init__(self, config):
        """
        Initializes a new instance of the MPNetSelfAttention class.

        Args:
            self: The instance of the MPNetSelfAttention class.
            config (object):
                An object containing configuration parameters for the self-attention mechanism.

                - hidden_size (int): The size of the hidden layers.
                - num_attention_heads (int): The number of attention heads.
                - embedding_size (int): The size of the embeddings.
                - attention_probs_dropout_prob (float): The dropout probability for attention probabilities.

        Returns:
            None.

        Raises:
            ValueError: If the hidden size is not a multiple of the number of attention heads or if the 'embedding_size'
                attribute is not present in the config object.
        """
        super().__init__()
        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
            raise ValueError(
                f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
                f"heads ({config.num_attention_heads})"
            )

        self.num_attention_heads = config.num_attention_heads
        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
        self.all_head_size = self.num_attention_heads * self.attention_head_size

        self.q = nn.Linear(config.hidden_size, self.all_head_size)
        self.k = nn.Linear(config.hidden_size, self.all_head_size)
        self.v = nn.Linear(config.hidden_size, self.all_head_size)
        self.o = nn.Linear(config.hidden_size, config.hidden_size)

        self.dropout = nn.Dropout(p=config.attention_probs_dropout_prob)

    def transpose_for_scores(self, x):
        """
        Transposes the input tensor `x` to prepare it for multi-head attention scoring.
        """
        new_x_shape = x.shape[:-1] + (self.num_attention_heads, self.attention_head_size)
        x = x.view(*new_x_shape)
        return x.permute(0, 2, 1, 3)

    def forward(
        self,
        hidden_states,
        attention_mask=None,
        head_mask=None,
        position_bias=None,
        output_attentions=False,
        **kwargs,
    ):
        """
        This method forwards self-attention mechanism for MPNetSelfAttention.

        Args:
            self: The instance of the class.
            hidden_states: Tensor containing the input hidden states. Shape: (batch_size, sequence_length, hidden_size).
            attention_mask: Optional tensor to mask out attention scores.
                Shape: (batch_size, sequence_length, sequence_length).
            head_mask: Optional tensor to mask out attention heads.
                Shape: (num_attention_heads, sequence_length, sequence_length).
            position_bias: Optional tensor containing positional bias.
                Shape: (batch_size, num_attention_heads, sequence_length, sequence_length).
            output_attentions: Boolean indicating whether to output attention probabilities.

        Returns:
            Tuple:
                Tuple containing output tensor 'o' and attention probabilities tensor.
                    If output_attentions is False, returns tuple with only 'o'.

        Raises:
            ValueError: If the dimensions of input tensors are incompatible for matrix operations.
            TypeError: If the input parameters are not of expected types.
            RuntimeError: If any runtime error occurs during the execution of the method.
        """
        q = self.q(hidden_states)
        k = self.k(hidden_states)
        v = self.v(hidden_states)

        q = self.transpose_for_scores(q)
        k = self.transpose_for_scores(k)
        v = self.transpose_for_scores(v)

        # Take the dot product between "query" and "key" to get the raw attention scores.
        attention_scores = ops.matmul(q, k.swapaxes(-1, -2))
        attention_scores = attention_scores / math.sqrt(self.attention_head_size)

        # Apply relative position embedding (precomputed in MPNetEncoder) if provided.
        if position_bias is not None:
            attention_scores += position_bias

        if attention_mask is not None:
            attention_scores = attention_scores + attention_mask

        # Normalize the attention scores to probabilities.
        attention_probs = ops.softmax(attention_scores, axis=-1)

        attention_probs = self.dropout(attention_probs)

        if head_mask is not None:
            attention_probs = attention_probs * head_mask

        c = ops.matmul(attention_probs, v)

        c = c.permute(0, 2, 1, 3)
        new_c_shape = c.shape[:-2] + (self.all_head_size,)
        c = c.view(*new_c_shape)

        o = self.o(c)

        outputs = (o, attention_probs) if output_attentions else (o,)
        return outputs

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetSelfAttention.__init__(config)

Initializes a new instance of the MPNetSelfAttention class.

PARAMETER DESCRIPTION
self

The instance of the MPNetSelfAttention class.

config

An object containing configuration parameters for the self-attention mechanism.

  • hidden_size (int): The size of the hidden layers.
  • num_attention_heads (int): The number of attention heads.
  • embedding_size (int): The size of the embeddings.
  • attention_probs_dropout_prob (float): The dropout probability for attention probabilities.

TYPE: object

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
ValueError

If the hidden size is not a multiple of the number of attention heads or if the 'embedding_size' attribute is not present in the config object.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
def __init__(self, config):
    """
    Initializes a new instance of the MPNetSelfAttention class.

    Args:
        self: The instance of the MPNetSelfAttention class.
        config (object):
            An object containing configuration parameters for the self-attention mechanism.

            - hidden_size (int): The size of the hidden layers.
            - num_attention_heads (int): The number of attention heads.
            - embedding_size (int): The size of the embeddings.
            - attention_probs_dropout_prob (float): The dropout probability for attention probabilities.

    Returns:
        None.

    Raises:
        ValueError: If the hidden size is not a multiple of the number of attention heads or if the 'embedding_size'
            attribute is not present in the config object.
    """
    super().__init__()
    if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
        raise ValueError(
            f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
            f"heads ({config.num_attention_heads})"
        )

    self.num_attention_heads = config.num_attention_heads
    self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
    self.all_head_size = self.num_attention_heads * self.attention_head_size

    self.q = nn.Linear(config.hidden_size, self.all_head_size)
    self.k = nn.Linear(config.hidden_size, self.all_head_size)
    self.v = nn.Linear(config.hidden_size, self.all_head_size)
    self.o = nn.Linear(config.hidden_size, config.hidden_size)

    self.dropout = nn.Dropout(p=config.attention_probs_dropout_prob)

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetSelfAttention.forward(hidden_states, attention_mask=None, head_mask=None, position_bias=None, output_attentions=False, **kwargs)

This method forwards self-attention mechanism for MPNetSelfAttention.

PARAMETER DESCRIPTION
self

The instance of the class.

hidden_states

Tensor containing the input hidden states. Shape: (batch_size, sequence_length, hidden_size).

attention_mask

Optional tensor to mask out attention scores. Shape: (batch_size, sequence_length, sequence_length).

DEFAULT: None

head_mask

Optional tensor to mask out attention heads. Shape: (num_attention_heads, sequence_length, sequence_length).

DEFAULT: None

position_bias

Optional tensor containing positional bias. Shape: (batch_size, num_attention_heads, sequence_length, sequence_length).

DEFAULT: None

output_attentions

Boolean indicating whether to output attention probabilities.

DEFAULT: False

RETURNS DESCRIPTION
Tuple

Tuple containing output tensor 'o' and attention probabilities tensor. If output_attentions is False, returns tuple with only 'o'.

RAISES DESCRIPTION
ValueError

If the dimensions of input tensors are incompatible for matrix operations.

TypeError

If the input parameters are not of expected types.

RuntimeError

If any runtime error occurs during the execution of the method.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
def forward(
    self,
    hidden_states,
    attention_mask=None,
    head_mask=None,
    position_bias=None,
    output_attentions=False,
    **kwargs,
):
    """
    This method forwards self-attention mechanism for MPNetSelfAttention.

    Args:
        self: The instance of the class.
        hidden_states: Tensor containing the input hidden states. Shape: (batch_size, sequence_length, hidden_size).
        attention_mask: Optional tensor to mask out attention scores.
            Shape: (batch_size, sequence_length, sequence_length).
        head_mask: Optional tensor to mask out attention heads.
            Shape: (num_attention_heads, sequence_length, sequence_length).
        position_bias: Optional tensor containing positional bias.
            Shape: (batch_size, num_attention_heads, sequence_length, sequence_length).
        output_attentions: Boolean indicating whether to output attention probabilities.

    Returns:
        Tuple:
            Tuple containing output tensor 'o' and attention probabilities tensor.
                If output_attentions is False, returns tuple with only 'o'.

    Raises:
        ValueError: If the dimensions of input tensors are incompatible for matrix operations.
        TypeError: If the input parameters are not of expected types.
        RuntimeError: If any runtime error occurs during the execution of the method.
    """
    q = self.q(hidden_states)
    k = self.k(hidden_states)
    v = self.v(hidden_states)

    q = self.transpose_for_scores(q)
    k = self.transpose_for_scores(k)
    v = self.transpose_for_scores(v)

    # Take the dot product between "query" and "key" to get the raw attention scores.
    attention_scores = ops.matmul(q, k.swapaxes(-1, -2))
    attention_scores = attention_scores / math.sqrt(self.attention_head_size)

    # Apply relative position embedding (precomputed in MPNetEncoder) if provided.
    if position_bias is not None:
        attention_scores += position_bias

    if attention_mask is not None:
        attention_scores = attention_scores + attention_mask

    # Normalize the attention scores to probabilities.
    attention_probs = ops.softmax(attention_scores, axis=-1)

    attention_probs = self.dropout(attention_probs)

    if head_mask is not None:
        attention_probs = attention_probs * head_mask

    c = ops.matmul(attention_probs, v)

    c = c.permute(0, 2, 1, 3)
    new_c_shape = c.shape[:-2] + (self.all_head_size,)
    c = c.view(*new_c_shape)

    o = self.o(c)

    outputs = (o, attention_probs) if output_attentions else (o,)
    return outputs

mindnlp.transformers.models.mpnet.modeling_mpnet.MPNetSelfAttention.transpose_for_scores(x)

Transposes the input tensor x to prepare it for multi-head attention scoring.

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
217
218
219
220
221
222
223
def transpose_for_scores(self, x):
    """
    Transposes the input tensor `x` to prepare it for multi-head attention scoring.
    """
    new_x_shape = x.shape[:-1] + (self.num_attention_heads, self.attention_head_size)
    x = x.view(*new_x_shape)
    return x.permute(0, 2, 1, 3)

mindnlp.transformers.models.mpnet.modeling_mpnet.create_position_ids_from_input_ids(input_ids, padding_idx)

Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols are ignored. This is modified from fairseq's utils.make_positions. :param torch.Tensor x: :return torch.Tensor:

Source code in mindnlp/transformers/models/mpnet/modeling_mpnet.py
1501
1502
1503
1504
1505
1506
1507
1508
1509
def create_position_ids_from_input_ids(input_ids, padding_idx):
    """
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`. :param torch.Tensor x: :return torch.Tensor:
    """
    # The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA.
    mask = input_ids.ne(padding_idx).int()
    incremental_indices = ops.cumsum(mask, axis=1).type_as(mask) * mask
    return incremental_indices.long() + padding_idx

mindnlp.transformers.models.mpnet.tokenization_mpnet

Tokenization classes for MPNet.

mindnlp.transformers.models.mpnet.tokenization_mpnet.BasicTokenizer

Bases: object

Constructs a BasicTokenizer that will run basic tokenization (punctuation splitting, lower casing, etc.).

PARAMETER DESCRIPTION
do_lower_case

Whether or not to lowercase the input when tokenizing.

TYPE: `bool`, *optional*, defaults to `True` DEFAULT: True

never_split

Collection of tokens which will never be split during tokenization. Only has an effect when do_basic_tokenize=True

TYPE: `Iterable`, *optional* DEFAULT: None

tokenize_chinese_chars

Whether or not to tokenize Chinese characters.

This should likely be deactivated for Japanese (see this issue).

TYPE: `bool`, *optional*, defaults to `True` DEFAULT: True

strip_accents

Whether or not to strip all accents. If this option is not specified, then it will be determined by the value for lowercase (as in the original BERT).

TYPE: `bool`, *optional* DEFAULT: None

do_split_on_punc

In some instances we want to skip the basic punctuation splitting so that later tokenization can capture the full context of the words, such as contractions.

TYPE: `bool`, *optional*, defaults to `True` DEFAULT: True

Source code in mindnlp/transformers/models/mpnet/tokenization_mpnet.py
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
class BasicTokenizer(object):
    """
    Constructs a BasicTokenizer that will run basic tokenization (punctuation splitting, lower casing, etc.).

    Args:
        do_lower_case (`bool`, *optional*, defaults to `True`):
            Whether or not to lowercase the input when tokenizing.
        never_split (`Iterable`, *optional*):
            Collection of tokens which will never be split during tokenization. Only has an effect when
            `do_basic_tokenize=True`
        tokenize_chinese_chars (`bool`, *optional*, defaults to `True`):
            Whether or not to tokenize Chinese characters.

            This should likely be deactivated for Japanese (see this
            [issue](https://github.com/huggingface/transformers/issues/328)).
        strip_accents (`bool`, *optional*):
            Whether or not to strip all accents. If this option is not specified, then it will be determined by the
            value for `lowercase` (as in the original BERT).
        do_split_on_punc (`bool`, *optional*, defaults to `True`):
            In some instances we want to skip the basic punctuation splitting so that later tokenization can capture
            the full context of the words, such as contractions.
    """
    def __init__(
        self,
        do_lower_case=True,
        never_split=None,
        tokenize_chinese_chars=True,
        strip_accents=None,
        do_split_on_punc=True,
    ):
        """
        Initializes a BasicTokenizer object with the specified parameters.

        Args:
            self: The instance of the BasicTokenizer class.
            do_lower_case (bool): A flag indicating whether text should be converted to lowercase. Default is True.
            never_split (list): A list of tokens that should never be split during tokenization. Default is an empty list.
            tokenize_chinese_chars (bool): A flag indicating whether to tokenize Chinese characters. Default is True.
            strip_accents (None): Not used in the current implementation.
            do_split_on_punc (bool): A flag indicating whether to split on punctuation marks. Default is True.

        Returns:
            None.

        Raises:
            None.
        """
        if never_split is None:
            never_split = []
        self.do_lower_case = do_lower_case
        self.never_split = set(never_split)
        self.tokenize_chinese_chars = tokenize_chinese_chars
        self.strip_accents = strip_accents
        self.do_split_on_punc = do_split_on_punc

    def tokenize(self, text, never_split=None):
        """
        Basic Tokenization of a piece of text. For sub-word tokenization, see WordPieceTokenizer.

        Args:
            never_split (`List[str]`, *optional*)
                Kept for backward compatibility purposes. Now implemented directly at the base class level (see
                [`PreTrainedTokenizer.tokenize`]) List of token not to split.
        """
        # union() returns a new set by concatenating the two sets.
        never_split = self.never_split.union(set(never_split)) if never_split else self.never_split
        text = self._clean_text(text)

        # This was added on November 1st, 2018 for the multilingual and Chinese
        # models. This is also applied to the English models now, but it doesn't
        # matter since the English models were not trained on any Chinese data
        # and generally don't have any Chinese data in them (there are Chinese
        # characters in the vocabulary because Wikipedia does have some Chinese
        # words in the English Wikipedia.).
        if self.tokenize_chinese_chars:
            text = self._tokenize_chinese_chars(text)
        # prevents treating the same character with different unicode codepoints as different characters
        unicode_normalized_text = unicodedata.normalize("NFC", text)
        orig_tokens = whitespace_tokenize(unicode_normalized_text)
        split_tokens = []
        for token in orig_tokens:
            if token not in never_split:
                if self.do_lower_case:
                    token = token.lower()
                    if self.strip_accents is not False:
                        token = self._run_strip_accents(token)
                elif self.strip_accents:
                    token = self._run_strip_accents(token)
            split_tokens.extend(self._run_split_on_punc(token, never_split))

        output_tokens = whitespace_tokenize(" ".join(split_tokens))
        return output_tokens

    def _run_strip_accents(self, text):
        """Strips accents from a piece of text."""
        text = unicodedata.normalize("NFD", text)
        output = []
        for char in text:
            cat = unicodedata.category(char)
            if cat == "Mn":
                continue
            output.append(char)
        return "".join(output)

    def _run_split_on_punc(self, text, never_split=None):
        """Splits punctuation on a piece of text."""
        if not self.do_split_on_punc or (never_split is not None and text in never_split):
            return [text]
        chars = list(text)
        i = 0
        start_new_word = True
        output = []
        while i < len(chars):
            char = chars[i]
            if _is_punctuation(char):
                output.append([char])
                start_new_word = True
            else:
                if start_new_word:
                    output.append([])
                start_new_word = False
                output[-1].append(char)
            i += 1

        return ["".join(x) for x in output]

    def _tokenize_chinese_chars(self, text):
        """Adds whitespace around any CJK character."""
        output = []
        for char in text:
            cp = ord(char)
            if self._is_chinese_char(cp):
                output.append(" ")
                output.append(char)
                output.append(" ")
            else:
                output.append(char)
        return "".join(output)

    def _is_chinese_char(self, cp):
        """Checks whether CP is the codepoint of a CJK character."""
        # This defines a "chinese character" as anything in the CJK Unicode block:
        #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
        #
        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
        # despite its name. The modern Korean Hangul alphabet is a different block,
        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
        # space-separated words, so they are not treated specially and handled
        # like the all of the other languages.
        if (
            (cp >= 0x4E00 and cp <= 0x9FFF)
            or (cp >= 0x3400 and cp <= 0x4DBF)  #
            or (cp >= 0x20000 and cp <= 0x2A6DF)  #
            or (cp >= 0x2A700 and cp <= 0x2B73F)  #
            or (cp >= 0x2B740 and cp <= 0x2B81F)  #
            or (cp >= 0x2B820 and cp <= 0x2CEAF)  #
            or (cp >= 0xF900 and cp <= 0xFAFF)
            or (cp >= 0x2F800 and cp <= 0x2FA1F)  #
        ):  #
            return True

        return False

    def _clean_text(self, text):
        """Performs invalid character removal and whitespace cleanup on text."""
        output = []
        for char in text:
            cp = ord(char)
            if cp == 0 or cp == 0xFFFD or _is_control(char):
                continue
            if _is_whitespace(char):
                output.append(" ")
            else:
                output.append(char)
        return "".join(output)

mindnlp.transformers.models.mpnet.tokenization_mpnet.BasicTokenizer.__init__(do_lower_case=True, never_split=None, tokenize_chinese_chars=True, strip_accents=None, do_split_on_punc=True)

Initializes a BasicTokenizer object with the specified parameters.

PARAMETER DESCRIPTION
self

The instance of the BasicTokenizer class.

do_lower_case

A flag indicating whether text should be converted to lowercase. Default is True.

TYPE: bool DEFAULT: True

never_split

A list of tokens that should never be split during tokenization. Default is an empty list.

TYPE: list DEFAULT: None

tokenize_chinese_chars

A flag indicating whether to tokenize Chinese characters. Default is True.

TYPE: bool DEFAULT: True

strip_accents

Not used in the current implementation.

TYPE: None DEFAULT: None

do_split_on_punc

A flag indicating whether to split on punctuation marks. Default is True.

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/mpnet/tokenization_mpnet.py
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
def __init__(
    self,
    do_lower_case=True,
    never_split=None,
    tokenize_chinese_chars=True,
    strip_accents=None,
    do_split_on_punc=True,
):
    """
    Initializes a BasicTokenizer object with the specified parameters.

    Args:
        self: The instance of the BasicTokenizer class.
        do_lower_case (bool): A flag indicating whether text should be converted to lowercase. Default is True.
        never_split (list): A list of tokens that should never be split during tokenization. Default is an empty list.
        tokenize_chinese_chars (bool): A flag indicating whether to tokenize Chinese characters. Default is True.
        strip_accents (None): Not used in the current implementation.
        do_split_on_punc (bool): A flag indicating whether to split on punctuation marks. Default is True.

    Returns:
        None.

    Raises:
        None.
    """
    if never_split is None:
        never_split = []
    self.do_lower_case = do_lower_case
    self.never_split = set(never_split)
    self.tokenize_chinese_chars = tokenize_chinese_chars
    self.strip_accents = strip_accents
    self.do_split_on_punc = do_split_on_punc

mindnlp.transformers.models.mpnet.tokenization_mpnet.BasicTokenizer.tokenize(text, never_split=None)

Basic Tokenization of a piece of text. For sub-word tokenization, see WordPieceTokenizer.

Source code in mindnlp/transformers/models/mpnet/tokenization_mpnet.py
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
def tokenize(self, text, never_split=None):
    """
    Basic Tokenization of a piece of text. For sub-word tokenization, see WordPieceTokenizer.

    Args:
        never_split (`List[str]`, *optional*)
            Kept for backward compatibility purposes. Now implemented directly at the base class level (see
            [`PreTrainedTokenizer.tokenize`]) List of token not to split.
    """
    # union() returns a new set by concatenating the two sets.
    never_split = self.never_split.union(set(never_split)) if never_split else self.never_split
    text = self._clean_text(text)

    # This was added on November 1st, 2018 for the multilingual and Chinese
    # models. This is also applied to the English models now, but it doesn't
    # matter since the English models were not trained on any Chinese data
    # and generally don't have any Chinese data in them (there are Chinese
    # characters in the vocabulary because Wikipedia does have some Chinese
    # words in the English Wikipedia.).
    if self.tokenize_chinese_chars:
        text = self._tokenize_chinese_chars(text)
    # prevents treating the same character with different unicode codepoints as different characters
    unicode_normalized_text = unicodedata.normalize("NFC", text)
    orig_tokens = whitespace_tokenize(unicode_normalized_text)
    split_tokens = []
    for token in orig_tokens:
        if token not in never_split:
            if self.do_lower_case:
                token = token.lower()
                if self.strip_accents is not False:
                    token = self._run_strip_accents(token)
            elif self.strip_accents:
                token = self._run_strip_accents(token)
        split_tokens.extend(self._run_split_on_punc(token, never_split))

    output_tokens = whitespace_tokenize(" ".join(split_tokens))
    return output_tokens

mindnlp.transformers.models.mpnet.tokenization_mpnet.MPNetTokenizer

Bases: PreTrainedTokenizer

This tokenizer inherits from [BertTokenizer] which contains most of the methods. Users should refer to the superclass for more information regarding methods.

PARAMETER DESCRIPTION
vocab_file

Path to the vocabulary file.

TYPE: `str`

do_lower_case

Whether or not to lowercase the input when tokenizing.

TYPE: `bool`, *optional*, defaults to `True` DEFAULT: True

do_basic_tokenize

Whether or not to do basic tokenization before WordPiece.

TYPE: `bool`, *optional*, defaults to `True` DEFAULT: True

never_split

Collection of tokens which will never be split during tokenization. Only has an effect when do_basic_tokenize=True

TYPE: `Iterable`, *optional* DEFAULT: None

bos_token

The beginning of sequence token that was used during pre-training. Can be used a sequence classifier token.

When building a sequence using special tokens, this is not the token that is used for the beginning of sequence. The token used is the cls_token.

TYPE: `str`, *optional*, defaults to `"<s>"` DEFAULT: '<s>'

eos_token

The end of sequence token.

When building a sequence using special tokens, this is not the token that is used for the end of sequence. The token used is the sep_token.

TYPE: `str`, *optional*, defaults to `"</s>"` DEFAULT: '</s>'

sep_token

The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for sequence classification or for a text and a question for question answering. It is also used as the last token of a sequence built with special tokens.

TYPE: `str`, *optional*, defaults to `"</s>"` DEFAULT: '</s>'

cls_token

The classifier token which is used when doing sequence classification (classification of the whole sequence instead of per-token classification). It is the first token of the sequence when built with special tokens.

TYPE: `str`, *optional*, defaults to `"<s>"` DEFAULT: '<s>'

unk_token

The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this token instead.

TYPE: `str`, *optional*, defaults to `"[UNK]"` DEFAULT: '[UNK]'

pad_token

The token used for padding, for example when batching sequences of different lengths.

TYPE: `str`, *optional*, defaults to `"<pad>"` DEFAULT: '<pad>'

mask_token

The token used for masking values. This is the token used when training this model with masked language modeling. This is the token which the model will try to predict.

TYPE: `str`, *optional*, defaults to `"<mask>"` DEFAULT: '<mask>'

tokenize_chinese_chars

Whether or not to tokenize Chinese characters.

This should likely be deactivated for Japanese (see this issue).

TYPE: `bool`, *optional*, defaults to `True` DEFAULT: True

strip_accents

Whether or not to strip all accents. If this option is not specified, then it will be determined by the value for lowercase (as in the original BERT).

TYPE: `bool`, *optional* DEFAULT: None

Source code in mindnlp/transformers/models/mpnet/tokenization_mpnet.py
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
class MPNetTokenizer(PreTrainedTokenizer):
    """

    This tokenizer inherits from [`BertTokenizer`] which contains most of the methods. Users should refer to the
    superclass for more information regarding methods.

    Args:
        vocab_file (`str`):
            Path to the vocabulary file.
        do_lower_case (`bool`, *optional*, defaults to `True`):
            Whether or not to lowercase the input when tokenizing.
        do_basic_tokenize (`bool`, *optional*, defaults to `True`):
            Whether or not to do basic tokenization before WordPiece.
        never_split (`Iterable`, *optional*):
            Collection of tokens which will never be split during tokenization. Only has an effect when
            `do_basic_tokenize=True`
        bos_token (`str`, *optional*, defaults to `"<s>"`):
            The beginning of sequence token that was used during pre-training. Can be used a sequence classifier token.

            <Tip>

            When building a sequence using special tokens, this is not the token that is used for the beginning of
            sequence. The token used is the `cls_token`.

            </Tip>

        eos_token (`str`, *optional*, defaults to `"</s>"`):
            The end of sequence token.

            <Tip>

            When building a sequence using special tokens, this is not the token that is used for the end of sequence.
            The token used is the `sep_token`.

            </Tip>

        sep_token (`str`, *optional*, defaults to `"</s>"`):
            The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
            sequence classification or for a text and a question for question answering. It is also used as the last
            token of a sequence built with special tokens.
        cls_token (`str`, *optional*, defaults to `"<s>"`):
            The classifier token which is used when doing sequence classification (classification of the whole sequence
            instead of per-token classification). It is the first token of the sequence when built with special tokens.
        unk_token (`str`, *optional*, defaults to `"[UNK]"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
        pad_token (`str`, *optional*, defaults to `"<pad>"`):
            The token used for padding, for example when batching sequences of different lengths.
        mask_token (`str`, *optional*, defaults to `"<mask>"`):
            The token used for masking values. This is the token used when training this model with masked language
            modeling. This is the token which the model will try to predict.
        tokenize_chinese_chars (`bool`, *optional*, defaults to `True`):
            Whether or not to tokenize Chinese characters.

            This should likely be deactivated for Japanese (see this
            [issue](https://github.com/huggingface/transformers/issues/328)).
        strip_accents (`bool`, *optional*):
            Whether or not to strip all accents. If this option is not specified, then it will be determined by the
            value for `lowercase` (as in the original BERT).
    """
    vocab_files_names = VOCAB_FILES_NAMES
    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
    pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
    max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
    model_input_names = ["input_ids", "attention_mask"]

    def __init__(
        self,
        vocab_file,
        do_lower_case=True,
        do_basic_tokenize=True,
        never_split=None,
        bos_token="<s>",
        eos_token="</s>",
        sep_token="</s>",
        cls_token="<s>",
        unk_token="[UNK]",
        pad_token="<pad>",
        mask_token="<mask>",
        tokenize_chinese_chars=True,
        strip_accents=None,
        **kwargs,
    ):
        """
        This method initializes an instance of the MPNetTokenizer class.

        Args:
            self: The instance of the class.
            vocab_file (str): Path to the vocabulary file.
            do_lower_case (bool, optional): Whether to convert tokens to lowercase. Defaults to True.
            do_basic_tokenize (bool, optional): Whether to perform basic tokenization. Defaults to True.
            never_split (list, optional): List of tokens that should not be split. Defaults to None.
            bos_token (str, optional): Beginning of sequence token. Defaults to '<s>'.
            eos_token (str, optional): End of sequence token. Defaults to '</s>'.
            sep_token (str, optional): Separator token. Defaults to '</s>'.
            cls_token (str, optional): Classification token. Defaults to '<s>'.
            unk_token (str, optional): Token for unknown words. Defaults to '[UNK]'.
            pad_token (str, optional): Padding token. Defaults to '<pad>'.
            mask_token (str, optional): Mask token. Defaults to '<mask>'.
            tokenize_chinese_chars (bool, optional): Whether to tokenize Chinese characters. Defaults to True.
            strip_accents (str, optional): Method for stripping accents. Defaults to None.
            **kwargs: Additional keyword arguments.

        Returns:
            None.

        Raises:
            ValueError: If the vocabulary file specified by 'vocab_file' cannot be found.
        """
        bos_token = AddedToken(bos_token, special=True) if isinstance(bos_token, str) else bos_token
        eos_token = AddedToken(eos_token, special=True) if isinstance(eos_token, str) else eos_token
        sep_token = AddedToken(sep_token, special=True) if isinstance(sep_token, str) else sep_token
        cls_token = AddedToken(cls_token, special=True) if isinstance(cls_token, str) else cls_token
        unk_token = AddedToken(unk_token, special=True) if isinstance(unk_token, str) else unk_token
        pad_token = AddedToken(pad_token, special=True) if isinstance(pad_token, str) else pad_token

        # Mask token behave like a normal word, i.e. include the space before it
        mask_token = AddedToken(mask_token, lstrip=True, special=True) if isinstance(mask_token, str) else mask_token

        if not os.path.isfile(vocab_file):
            raise ValueError(
                f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained"
                " model use `tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
            )
        self.vocab = load_vocab(vocab_file)
        self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()])
        self.do_basic_tokenize = do_basic_tokenize
        if do_basic_tokenize:
            self.basic_tokenizer = BasicTokenizer(
                do_lower_case=do_lower_case,
                never_split=never_split,
                tokenize_chinese_chars=tokenize_chinese_chars,
                strip_accents=strip_accents,
            )
        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab, unk_token=str(unk_token))

        super().__init__(
            do_lower_case=do_lower_case,
            do_basic_tokenize=do_basic_tokenize,
            never_split=never_split,
            bos_token=bos_token,
            eos_token=eos_token,
            unk_token=unk_token,
            sep_token=sep_token,
            cls_token=cls_token,
            pad_token=pad_token,
            mask_token=mask_token,
            tokenize_chinese_chars=tokenize_chinese_chars,
            strip_accents=strip_accents,
            **kwargs,
        )

    @property
    def do_lower_case(self):
        """
        Method 'do_lower_case' in the class 'MPNetTokenizer'.
        This method converts the text to lowercase using the basic tokenizer provided by the MPNetTokenizer.

        Args:
            self: An instance of the MPNetTokenizer class.

        Returns:
            None.

        Raises:
            None
        """
        return self.basic_tokenizer.do_lower_case

    @property
    def vocab_size(self):
        """
        Returns the size of the vocabulary.

        Args:
            self (MPNetTokenizer): An instance of the MPNetTokenizer class.

        Returns:
            int: The size of the vocabulary.

        Raises:
            None.
        """
        return len(self.vocab)

    def get_vocab(self):
        """
        Method to retrieve the vocabulary from the MPNetTokenizer.

        Args:
            self: The instance of the MPNetTokenizer class.

        Returns:
            dict: A dictionary containing the combined vocabulary of added tokens and the original vocabulary.

        Raises:
            None
        """
        # "<mask>" is part of the vocab, but was wrongfully added at a wrong index in the fast saved version
        vocab = self.added_tokens_encoder.copy()
        vocab.update(self.vocab)
        return vocab

    def _tokenize(self, text):
        """
        Method to tokenize the input text using basic or wordpiece tokenizer.

        Args:
            self (MPNetTokenizer): An instance of the MPNetTokenizer class.
            text (str): The input text to be tokenized.

        Returns:
            list: A list of tokens after tokenization. If basic tokenization is enabled, tokens are split based on basic rules.
                If basic tokenization is disabled, tokens are split using the wordpiece tokenizer.

        Raises:
            None.
        """
        split_tokens = []
        if self.do_basic_tokenize:
            for token in self.basic_tokenizer.tokenize(text, never_split=self.all_special_tokens):
                # If the token is part of the never_split set
                if token in self.basic_tokenizer.never_split:
                    split_tokens.append(token)
                else:
                    split_tokens += self.wordpiece_tokenizer.tokenize(token)
        else:
            split_tokens = self.wordpiece_tokenizer.tokenize(text)
        return split_tokens

    def _convert_token_to_id(self, token):
        """Converts a token (str) in an id using the vocab."""
        return self.vocab.get(token, self.vocab.get(self.unk_token))

    def _convert_id_to_token(self, index):
        """Converts an index (integer) in a token (str) using the vocab."""
        return self.ids_to_tokens.get(index, self.unk_token)

    def convert_tokens_to_string(self, tokens):
        """Converts a sequence of tokens (string) in a single string."""
        out_string = " ".join(tokens).replace(" ##", "").strip()
        return out_string

    def build_inputs_with_special_tokens(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
        adding special tokens. A MPNet sequence has the following format:

        - single sequence: `<s> X </s>`
        - pair of sequences: `<s> A </s></s> B </s>`

        Args:
            token_ids_0 (`List[int]`):
                List of IDs to which the special tokens will be added
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
            `List[int]`: list of [input IDs](../glossary#input-ids) with the appropriate special tokens.
        """
        if token_ids_1 is None:
            return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
        cls = [self.cls_token_id]
        sep = [self.sep_token_id]
        return cls + token_ids_0 + sep + sep + token_ids_1 + sep

    def get_special_tokens_mask(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
    ) -> List[int]:
        """
        Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
        special tokens using the tokenizer `prepare_for_model` methods.

        Args:
            token_ids_0 (`List[int]`):
                List of ids.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.
            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
                Set to True if the token list is already formatted with special tokens for the model

        Returns:
            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
        """
        if already_has_special_tokens:
            return super().get_special_tokens_mask(
                token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
            )

        if token_ids_1 is None:
            return [1] + ([0] * len(token_ids_0)) + [1]
        return [1] + ([0] * len(token_ids_0)) + [1, 1] + ([0] * len(token_ids_1)) + [1]

    def create_token_type_ids_from_sequences(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Creates a mask from the two sequences passed to be used in a sequence-pair classification task. MPNet does not
        make use of token type ids, therefore a list of zeros is returned.

        Args:
            token_ids_0 (`List[int]`):
                List of ids.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
            `List[int]`: List of zeros.
        """
        sep = [self.sep_token_id]
        cls = [self.cls_token_id]

        if token_ids_1 is None:
            return len(cls + token_ids_0 + sep) * [0]
        return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]

    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        """
        Save the vocabulary to a file in the specified directory with an optional filename prefix.

        Args:
            self (MPNetTokenizer): The instance of the MPNetTokenizer class.
            save_directory (str): The directory path where the vocabulary file will be saved.
            filename_prefix (Optional[str]): An optional prefix to be added to the filename. Default is None.

        Returns:
            Tuple[str]: A tuple containing the path to the saved vocabulary file.

        Raises:
            IOError: If an error occurs while writing the vocabulary file.
        """
        index = 0
        if os.path.isdir(save_directory):
            vocab_file = os.path.join(
                save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
            )
        else:
            vocab_file = (filename_prefix + "-" if filename_prefix else "") + save_directory
        with open(vocab_file, "w", encoding="utf-8") as writer:
            for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]):
                if index != token_index:
                    logger.warning(
                        f"Saving vocabulary to {vocab_file}: vocabulary indices are not consecutive."
                        " Please check that the vocabulary is not corrupted!"
                    )
                    index = token_index
                writer.write(token + "\n")
                index += 1
        return (vocab_file,)

mindnlp.transformers.models.mpnet.tokenization_mpnet.MPNetTokenizer.do_lower_case property

Method 'do_lower_case' in the class 'MPNetTokenizer'. This method converts the text to lowercase using the basic tokenizer provided by the MPNetTokenizer.

PARAMETER DESCRIPTION
self

An instance of the MPNetTokenizer class.

RETURNS DESCRIPTION

None.

mindnlp.transformers.models.mpnet.tokenization_mpnet.MPNetTokenizer.vocab_size property

Returns the size of the vocabulary.

PARAMETER DESCRIPTION
self

An instance of the MPNetTokenizer class.

TYPE: MPNetTokenizer

RETURNS DESCRIPTION
int

The size of the vocabulary.

mindnlp.transformers.models.mpnet.tokenization_mpnet.MPNetTokenizer.__init__(vocab_file, do_lower_case=True, do_basic_tokenize=True, never_split=None, bos_token='<s>', eos_token='</s>', sep_token='</s>', cls_token='<s>', unk_token='[UNK]', pad_token='<pad>', mask_token='<mask>', tokenize_chinese_chars=True, strip_accents=None, **kwargs)

This method initializes an instance of the MPNetTokenizer class.

PARAMETER DESCRIPTION
self

The instance of the class.

vocab_file

Path to the vocabulary file.

TYPE: str

do_lower_case

Whether to convert tokens to lowercase. Defaults to True.

TYPE: bool DEFAULT: True

do_basic_tokenize

Whether to perform basic tokenization. Defaults to True.

TYPE: bool DEFAULT: True

never_split

List of tokens that should not be split. Defaults to None.

TYPE: list DEFAULT: None

bos_token

Beginning of sequence token. Defaults to ''.

TYPE: str DEFAULT: '<s>'

eos_token

End of sequence token. Defaults to ''.

TYPE: str DEFAULT: '</s>'

sep_token

Separator token. Defaults to ''.

TYPE: str DEFAULT: '</s>'

cls_token

Classification token. Defaults to ''.

TYPE: str DEFAULT: '<s>'

unk_token

Token for unknown words. Defaults to '[UNK]'.

TYPE: str DEFAULT: '[UNK]'

pad_token

Padding token. Defaults to ''.

TYPE: str DEFAULT: '<pad>'

mask_token

Mask token. Defaults to ''.

TYPE: str DEFAULT: '<mask>'

tokenize_chinese_chars

Whether to tokenize Chinese characters. Defaults to True.

TYPE: bool DEFAULT: True

strip_accents

Method for stripping accents. Defaults to None.

TYPE: str DEFAULT: None

**kwargs

Additional keyword arguments.

DEFAULT: {}

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
ValueError

If the vocabulary file specified by 'vocab_file' cannot be found.

Source code in mindnlp/transformers/models/mpnet/tokenization_mpnet.py
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
def __init__(
    self,
    vocab_file,
    do_lower_case=True,
    do_basic_tokenize=True,
    never_split=None,
    bos_token="<s>",
    eos_token="</s>",
    sep_token="</s>",
    cls_token="<s>",
    unk_token="[UNK]",
    pad_token="<pad>",
    mask_token="<mask>",
    tokenize_chinese_chars=True,
    strip_accents=None,
    **kwargs,
):
    """
    This method initializes an instance of the MPNetTokenizer class.

    Args:
        self: The instance of the class.
        vocab_file (str): Path to the vocabulary file.
        do_lower_case (bool, optional): Whether to convert tokens to lowercase. Defaults to True.
        do_basic_tokenize (bool, optional): Whether to perform basic tokenization. Defaults to True.
        never_split (list, optional): List of tokens that should not be split. Defaults to None.
        bos_token (str, optional): Beginning of sequence token. Defaults to '<s>'.
        eos_token (str, optional): End of sequence token. Defaults to '</s>'.
        sep_token (str, optional): Separator token. Defaults to '</s>'.
        cls_token (str, optional): Classification token. Defaults to '<s>'.
        unk_token (str, optional): Token for unknown words. Defaults to '[UNK]'.
        pad_token (str, optional): Padding token. Defaults to '<pad>'.
        mask_token (str, optional): Mask token. Defaults to '<mask>'.
        tokenize_chinese_chars (bool, optional): Whether to tokenize Chinese characters. Defaults to True.
        strip_accents (str, optional): Method for stripping accents. Defaults to None.
        **kwargs: Additional keyword arguments.

    Returns:
        None.

    Raises:
        ValueError: If the vocabulary file specified by 'vocab_file' cannot be found.
    """
    bos_token = AddedToken(bos_token, special=True) if isinstance(bos_token, str) else bos_token
    eos_token = AddedToken(eos_token, special=True) if isinstance(eos_token, str) else eos_token
    sep_token = AddedToken(sep_token, special=True) if isinstance(sep_token, str) else sep_token
    cls_token = AddedToken(cls_token, special=True) if isinstance(cls_token, str) else cls_token
    unk_token = AddedToken(unk_token, special=True) if isinstance(unk_token, str) else unk_token
    pad_token = AddedToken(pad_token, special=True) if isinstance(pad_token, str) else pad_token

    # Mask token behave like a normal word, i.e. include the space before it
    mask_token = AddedToken(mask_token, lstrip=True, special=True) if isinstance(mask_token, str) else mask_token

    if not os.path.isfile(vocab_file):
        raise ValueError(
            f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained"
            " model use `tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
        )
    self.vocab = load_vocab(vocab_file)
    self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()])
    self.do_basic_tokenize = do_basic_tokenize
    if do_basic_tokenize:
        self.basic_tokenizer = BasicTokenizer(
            do_lower_case=do_lower_case,
            never_split=never_split,
            tokenize_chinese_chars=tokenize_chinese_chars,
            strip_accents=strip_accents,
        )
    self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab, unk_token=str(unk_token))

    super().__init__(
        do_lower_case=do_lower_case,
        do_basic_tokenize=do_basic_tokenize,
        never_split=never_split,
        bos_token=bos_token,
        eos_token=eos_token,
        unk_token=unk_token,
        sep_token=sep_token,
        cls_token=cls_token,
        pad_token=pad_token,
        mask_token=mask_token,
        tokenize_chinese_chars=tokenize_chinese_chars,
        strip_accents=strip_accents,
        **kwargs,
    )

mindnlp.transformers.models.mpnet.tokenization_mpnet.MPNetTokenizer.build_inputs_with_special_tokens(token_ids_0, token_ids_1=None)

Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and adding special tokens. A MPNet sequence has the following format:

  • single sequence: <s> X </s>
  • pair of sequences: <s> A </s></s> B </s>
PARAMETER DESCRIPTION
token_ids_0

List of IDs to which the special tokens will be added

TYPE: `List[int]`

token_ids_1

Optional second list of IDs for sequence pairs.

TYPE: `List[int]`, *optional* DEFAULT: None

RETURNS DESCRIPTION
List[int]

List[int]: list of input IDs with the appropriate special tokens.

Source code in mindnlp/transformers/models/mpnet/tokenization_mpnet.py
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
def build_inputs_with_special_tokens(
    self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
    """
    Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
    adding special tokens. A MPNet sequence has the following format:

    - single sequence: `<s> X </s>`
    - pair of sequences: `<s> A </s></s> B </s>`

    Args:
        token_ids_0 (`List[int]`):
            List of IDs to which the special tokens will be added
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.

    Returns:
        `List[int]`: list of [input IDs](../glossary#input-ids) with the appropriate special tokens.
    """
    if token_ids_1 is None:
        return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
    cls = [self.cls_token_id]
    sep = [self.sep_token_id]
    return cls + token_ids_0 + sep + sep + token_ids_1 + sep

mindnlp.transformers.models.mpnet.tokenization_mpnet.MPNetTokenizer.convert_tokens_to_string(tokens)

Converts a sequence of tokens (string) in a single string.

Source code in mindnlp/transformers/models/mpnet/tokenization_mpnet.py
308
309
310
311
def convert_tokens_to_string(self, tokens):
    """Converts a sequence of tokens (string) in a single string."""
    out_string = " ".join(tokens).replace(" ##", "").strip()
    return out_string

mindnlp.transformers.models.mpnet.tokenization_mpnet.MPNetTokenizer.create_token_type_ids_from_sequences(token_ids_0, token_ids_1=None)

Creates a mask from the two sequences passed to be used in a sequence-pair classification task. MPNet does not make use of token type ids, therefore a list of zeros is returned.

PARAMETER DESCRIPTION
token_ids_0

List of ids.

TYPE: `List[int]`

token_ids_1

Optional second list of IDs for sequence pairs.

TYPE: `List[int]`, *optional* DEFAULT: None

RETURNS DESCRIPTION
List[int]

List[int]: List of zeros.

Source code in mindnlp/transformers/models/mpnet/tokenization_mpnet.py
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
def create_token_type_ids_from_sequences(
    self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
    """
    Creates a mask from the two sequences passed to be used in a sequence-pair classification task. MPNet does not
    make use of token type ids, therefore a list of zeros is returned.

    Args:
        token_ids_0 (`List[int]`):
            List of ids.
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.

    Returns:
        `List[int]`: List of zeros.
    """
    sep = [self.sep_token_id]
    cls = [self.cls_token_id]

    if token_ids_1 is None:
        return len(cls + token_ids_0 + sep) * [0]
    return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]

mindnlp.transformers.models.mpnet.tokenization_mpnet.MPNetTokenizer.get_special_tokens_mask(token_ids_0, token_ids_1=None, already_has_special_tokens=False)

Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding special tokens using the tokenizer prepare_for_model methods.

PARAMETER DESCRIPTION
token_ids_0

List of ids.

TYPE: `List[int]`

token_ids_1

Optional second list of IDs for sequence pairs.

TYPE: `List[int]`, *optional* DEFAULT: None

already_has_special_tokens

Set to True if the token list is already formatted with special tokens for the model

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

RETURNS DESCRIPTION
List[int]

List[int]: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.

Source code in mindnlp/transformers/models/mpnet/tokenization_mpnet.py
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
def get_special_tokens_mask(
    self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
) -> List[int]:
    """
    Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
    special tokens using the tokenizer `prepare_for_model` methods.

    Args:
        token_ids_0 (`List[int]`):
            List of ids.
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.
        already_has_special_tokens (`bool`, *optional*, defaults to `False`):
            Set to True if the token list is already formatted with special tokens for the model

    Returns:
        `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
    """
    if already_has_special_tokens:
        return super().get_special_tokens_mask(
            token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
        )

    if token_ids_1 is None:
        return [1] + ([0] * len(token_ids_0)) + [1]
    return [1] + ([0] * len(token_ids_0)) + [1, 1] + ([0] * len(token_ids_1)) + [1]

mindnlp.transformers.models.mpnet.tokenization_mpnet.MPNetTokenizer.get_vocab()

Method to retrieve the vocabulary from the MPNetTokenizer.

PARAMETER DESCRIPTION
self

The instance of the MPNetTokenizer class.

RETURNS DESCRIPTION
dict

A dictionary containing the combined vocabulary of added tokens and the original vocabulary.

Source code in mindnlp/transformers/models/mpnet/tokenization_mpnet.py
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
def get_vocab(self):
    """
    Method to retrieve the vocabulary from the MPNetTokenizer.

    Args:
        self: The instance of the MPNetTokenizer class.

    Returns:
        dict: A dictionary containing the combined vocabulary of added tokens and the original vocabulary.

    Raises:
        None
    """
    # "<mask>" is part of the vocab, but was wrongfully added at a wrong index in the fast saved version
    vocab = self.added_tokens_encoder.copy()
    vocab.update(self.vocab)
    return vocab

mindnlp.transformers.models.mpnet.tokenization_mpnet.MPNetTokenizer.save_vocabulary(save_directory, filename_prefix=None)

Save the vocabulary to a file in the specified directory with an optional filename prefix.

PARAMETER DESCRIPTION
self

The instance of the MPNetTokenizer class.

TYPE: MPNetTokenizer

save_directory

The directory path where the vocabulary file will be saved.

TYPE: str

filename_prefix

An optional prefix to be added to the filename. Default is None.

TYPE: Optional[str] DEFAULT: None

RETURNS DESCRIPTION
Tuple[str]

Tuple[str]: A tuple containing the path to the saved vocabulary file.

RAISES DESCRIPTION
IOError

If an error occurs while writing the vocabulary file.

Source code in mindnlp/transformers/models/mpnet/tokenization_mpnet.py
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
    """
    Save the vocabulary to a file in the specified directory with an optional filename prefix.

    Args:
        self (MPNetTokenizer): The instance of the MPNetTokenizer class.
        save_directory (str): The directory path where the vocabulary file will be saved.
        filename_prefix (Optional[str]): An optional prefix to be added to the filename. Default is None.

    Returns:
        Tuple[str]: A tuple containing the path to the saved vocabulary file.

    Raises:
        IOError: If an error occurs while writing the vocabulary file.
    """
    index = 0
    if os.path.isdir(save_directory):
        vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
        )
    else:
        vocab_file = (filename_prefix + "-" if filename_prefix else "") + save_directory
    with open(vocab_file, "w", encoding="utf-8") as writer:
        for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]):
            if index != token_index:
                logger.warning(
                    f"Saving vocabulary to {vocab_file}: vocabulary indices are not consecutive."
                    " Please check that the vocabulary is not corrupted!"
                )
                index = token_index
            writer.write(token + "\n")
            index += 1
    return (vocab_file,)

mindnlp.transformers.models.mpnet.tokenization_mpnet.WordpieceTokenizer

Bases: object

Runs WordPiece tokenization.

Source code in mindnlp/transformers/models/mpnet/tokenization_mpnet.py
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
class WordpieceTokenizer(object):
    """Runs WordPiece tokenization."""
    def __init__(self, vocab, unk_token, max_input_chars_per_word=100):
        """
        Initializes a new instance of the WordpieceTokenizer class.

        Args:
            self: The instance of the WordpieceTokenizer class.
            vocab (list): A list of vocabulary tokens used for tokenization.
            unk_token (str): The token to be used for representing unknown words.
            max_input_chars_per_word (int): The maximum number of characters allowed per input word. Defaults to 100.

        Returns:
            None.

        Raises:
            ValueError: If max_input_chars_per_word is less than or equal to 0.
            TypeError: If vocab is not a list or unk_token is not a string.
        """
        self.vocab = vocab
        self.unk_token = unk_token
        self.max_input_chars_per_word = max_input_chars_per_word

    def tokenize(self, text):
        """
        Tokenizes a piece of text into its word pieces. This uses a greedy longest-match-first algorithm to perform
        tokenization using the given vocabulary.

        For example, `input = "unaffable"` wil return as output `["un", "##aff", "##able"]`.

        Args:
            text: A single token or whitespace separated tokens. This should have
                already been passed through *BasicTokenizer*.

        Returns:
            A list of wordpiece tokens.
        """
        output_tokens = []
        for token in whitespace_tokenize(text):
            chars = list(token)
            if len(chars) > self.max_input_chars_per_word:
                output_tokens.append(self.unk_token)
                continue

            is_bad = False
            start = 0
            sub_tokens = []
            while start < len(chars):
                end = len(chars)
                cur_substr = None
                while start < end:
                    substr = "".join(chars[start:end])
                    if start > 0:
                        substr = "##" + substr
                    if substr in self.vocab:
                        cur_substr = substr
                        break
                    end -= 1
                if cur_substr is None:
                    is_bad = True
                    break
                sub_tokens.append(cur_substr)
                start = end

            if is_bad:
                output_tokens.append(self.unk_token)
            else:
                output_tokens.extend(sub_tokens)
        return output_tokens

mindnlp.transformers.models.mpnet.tokenization_mpnet.WordpieceTokenizer.__init__(vocab, unk_token, max_input_chars_per_word=100)

Initializes a new instance of the WordpieceTokenizer class.

PARAMETER DESCRIPTION
self

The instance of the WordpieceTokenizer class.

vocab

A list of vocabulary tokens used for tokenization.

TYPE: list

unk_token

The token to be used for representing unknown words.

TYPE: str

max_input_chars_per_word

The maximum number of characters allowed per input word. Defaults to 100.

TYPE: int DEFAULT: 100

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
ValueError

If max_input_chars_per_word is less than or equal to 0.

TypeError

If vocab is not a list or unk_token is not a string.

Source code in mindnlp/transformers/models/mpnet/tokenization_mpnet.py
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
def __init__(self, vocab, unk_token, max_input_chars_per_word=100):
    """
    Initializes a new instance of the WordpieceTokenizer class.

    Args:
        self: The instance of the WordpieceTokenizer class.
        vocab (list): A list of vocabulary tokens used for tokenization.
        unk_token (str): The token to be used for representing unknown words.
        max_input_chars_per_word (int): The maximum number of characters allowed per input word. Defaults to 100.

    Returns:
        None.

    Raises:
        ValueError: If max_input_chars_per_word is less than or equal to 0.
        TypeError: If vocab is not a list or unk_token is not a string.
    """
    self.vocab = vocab
    self.unk_token = unk_token
    self.max_input_chars_per_word = max_input_chars_per_word

mindnlp.transformers.models.mpnet.tokenization_mpnet.WordpieceTokenizer.tokenize(text)

Tokenizes a piece of text into its word pieces. This uses a greedy longest-match-first algorithm to perform tokenization using the given vocabulary.

For example, input = "unaffable" wil return as output ["un", "##aff", "##able"].

PARAMETER DESCRIPTION
text

A single token or whitespace separated tokens. This should have already been passed through BasicTokenizer.

RETURNS DESCRIPTION

A list of wordpiece tokens.

Source code in mindnlp/transformers/models/mpnet/tokenization_mpnet.py
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
def tokenize(self, text):
    """
    Tokenizes a piece of text into its word pieces. This uses a greedy longest-match-first algorithm to perform
    tokenization using the given vocabulary.

    For example, `input = "unaffable"` wil return as output `["un", "##aff", "##able"]`.

    Args:
        text: A single token or whitespace separated tokens. This should have
            already been passed through *BasicTokenizer*.

    Returns:
        A list of wordpiece tokens.
    """
    output_tokens = []
    for token in whitespace_tokenize(text):
        chars = list(token)
        if len(chars) > self.max_input_chars_per_word:
            output_tokens.append(self.unk_token)
            continue

        is_bad = False
        start = 0
        sub_tokens = []
        while start < len(chars):
            end = len(chars)
            cur_substr = None
            while start < end:
                substr = "".join(chars[start:end])
                if start > 0:
                    substr = "##" + substr
                if substr in self.vocab:
                    cur_substr = substr
                    break
                end -= 1
            if cur_substr is None:
                is_bad = True
                break
            sub_tokens.append(cur_substr)
            start = end

        if is_bad:
            output_tokens.append(self.unk_token)
        else:
            output_tokens.extend(sub_tokens)
    return output_tokens

mindnlp.transformers.models.mpnet.tokenization_mpnet.load_vocab(vocab_file)

Loads a vocabulary file into a dictionary.

Source code in mindnlp/transformers/models/mpnet/tokenization_mpnet.py
50
51
52
53
54
55
56
57
58
def load_vocab(vocab_file):
    """Loads a vocabulary file into a dictionary."""
    vocab = collections.OrderedDict()
    with open(vocab_file, "r", encoding="utf-8") as reader:
        tokens = reader.readlines()
    for index, token in enumerate(tokens):
        token = token.rstrip("\n")
        vocab[token] = index
    return vocab

mindnlp.transformers.models.mpnet.tokenization_mpnet.whitespace_tokenize(text)

Runs basic whitespace cleaning and splitting on a piece of text.

Source code in mindnlp/transformers/models/mpnet/tokenization_mpnet.py
61
62
63
64
65
66
67
def whitespace_tokenize(text):
    """Runs basic whitespace cleaning and splitting on a piece of text."""
    text = text.strip()
    if not text:
        return []
    tokens = text.split()
    return tokens