Skip to content

baichuan

mindnlp.transformers.models.baichuan.configuration_baichuan.BaiChuanConfig

Bases: PretrainedConfig

Configurations for BaiChuan

Source code in mindnlp/transformers/models/baichuan/configuration_baichuan.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
class BaiChuanConfig(PretrainedConfig):
    """
    Configurations for BaiChuan
    """
    model_type = "baichuan"
    keys_to_ignore_at_inference = ["past_key_values"]

    def __init__(
            self,
            vocab_size=64000,
            hidden_size=4096,
            intermediate_size=11008,
            num_hidden_layers=32,
            num_attention_heads=32,
            hidden_act="silu",
            max_position_embeddings=4096,
            model_max_length=4096,
            initializer_range=0.02,
            rms_norm_eps=1e-6,
            use_cache=True,
            pad_token_id=0,
            bos_token_id=1,
            eos_token_id=2,
            tie_word_embeddings=False,
            **kwargs,
    ):
        """Constructs BaiChuanConfig."""
        self.vocab_size = vocab_size
        self.max_position_embeddings = max_position_embeddings
        self.model_max_length = model_max_length
        self.hidden_size = hidden_size
        self.intermediate_size = intermediate_size
        self.num_hidden_layers = num_hidden_layers
        self.num_attention_heads = num_attention_heads
        self.hidden_act = hidden_act
        self.initializer_range = initializer_range
        self.rms_norm_eps = rms_norm_eps
        self.use_cache = use_cache
        super().__init__(
            pad_token_id=pad_token_id,
            bos_token_id=bos_token_id,
            eos_token_id=eos_token_id,
            tie_word_embeddings=tie_word_embeddings,
            **kwargs,
        )

mindnlp.transformers.models.baichuan.configuration_baichuan.BaiChuanConfig.__init__(vocab_size=64000, hidden_size=4096, intermediate_size=11008, num_hidden_layers=32, num_attention_heads=32, hidden_act='silu', max_position_embeddings=4096, model_max_length=4096, initializer_range=0.02, rms_norm_eps=1e-06, use_cache=True, pad_token_id=0, bos_token_id=1, eos_token_id=2, tie_word_embeddings=False, **kwargs)

Constructs BaiChuanConfig.

Source code in mindnlp/transformers/models/baichuan/configuration_baichuan.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def __init__(
        self,
        vocab_size=64000,
        hidden_size=4096,
        intermediate_size=11008,
        num_hidden_layers=32,
        num_attention_heads=32,
        hidden_act="silu",
        max_position_embeddings=4096,
        model_max_length=4096,
        initializer_range=0.02,
        rms_norm_eps=1e-6,
        use_cache=True,
        pad_token_id=0,
        bos_token_id=1,
        eos_token_id=2,
        tie_word_embeddings=False,
        **kwargs,
):
    """Constructs BaiChuanConfig."""
    self.vocab_size = vocab_size
    self.max_position_embeddings = max_position_embeddings
    self.model_max_length = model_max_length
    self.hidden_size = hidden_size
    self.intermediate_size = intermediate_size
    self.num_hidden_layers = num_hidden_layers
    self.num_attention_heads = num_attention_heads
    self.hidden_act = hidden_act
    self.initializer_range = initializer_range
    self.rms_norm_eps = rms_norm_eps
    self.use_cache = use_cache
    super().__init__(
        pad_token_id=pad_token_id,
        bos_token_id=bos_token_id,
        eos_token_id=eos_token_id,
        tie_word_embeddings=tie_word_embeddings,
        **kwargs,
    )

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuanPreTrainedModel

Bases: PreTrainedModel

BaiChuanPreTrainedModel

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
class BaiChuanPreTrainedModel(PreTrainedModel):
    """
    BaiChuanPreTrainedModel
    """
    config_class = BaiChuanConfig
    base_model_prefix = "model"
    _no_split_modules = ["DecoderLayer", "BaiChuanLayer"]
    _keys_to_ignore_on_load_unexpected = [r"decoder\.version"]

    def _init_weights(self, cell):
        """
        Initializes the weights for the given cell.

        Args:
            self (BaiChuanPreTrainedModel): The instance of the BaiChuanPreTrainedModel class.
            cell: The cell for which the weights need to be initialized.

        Returns:
            None.

        Raises:
            None.
        """
        std = self.config.initializer_range
        if isinstance(cell, nn.Linear):
            cell.weight.set_data(initializer(Normal(
                sigma=std, mean=0.0), cell.weight.shape, cell.weight.dtype))
            if cell.bias is not None:
                cell.bias.set_data(initializer('zeros', cell.bias.shape, cell.bias.dtype))
        elif isinstance(cell, nn.Embedding):
            weight = np.random.normal(0.0, std, cell.weight.shape)
            if cell.padding_idx:
                weight[cell.padding_idx] = 0

            cell.weight.set_data(Tensor(weight, cell.weight.dtype))

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuan7bModel

Bases: BaiChuanPreTrainedModel

Transformer decoder consisting of config.num_hidden_layers layers. Each layer is a [DecoderLayer] Args: config: BaiChuanConfig

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
class BaiChuan7bModel(BaiChuanPreTrainedModel):
    """
    Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`DecoderLayer`]
    Args:
        config: BaiChuanConfig
    """
    def __init__(self, config: BaiChuanConfig):
        """
        Initializes a new instance of the BaiChuan7bModel class.

        Args:
            self: The instance of the BaiChuan7bModel class.
            config (BaiChuanConfig):
                An instance of BaiChuanConfig containing configuration parameters.

                - Purpose: Specifies the configuration settings for the model.
                - Restrictions: Must be an instance of BaiChuanConfig.

        Returns:
            None.

        Raises:
            None
        """
        super().__init__(config)
        self.padding_idx = config.pad_token_id
        self.vocab_size = config.vocab_size

        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=self.padding_idx)
        self.layers = nn.ModuleList([DecoderLayer(config) for _ in range(config.num_hidden_layers)])
        self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)

        # Initialize weights and apply final processing
        self.post_init()

    def get_input_embeddings(self):
        """
        Retrieves the input embeddings for the BaiChuan7bModel.

        Args:
            self: The instance of BaiChuan7bModel.

        Returns:
            None.

        Raises:
            None.

        This method retrieves the input embeddings for the BaiChuan7bModel.
        The input embeddings are obtained by calling the 'embed_tokens' method of the instance.
        """
        return self.embed_tokens

    def set_input_embeddings(self, new_embeddings):
        """
        Sets the input embeddings for the BaiChuan7bModel.

        Args:
            self (BaiChuan7bModel): The instance of the BaiChuan7bModel class.
            new_embeddings (Any): The new embeddings to be set. This can be of any type.

        Returns:
            None.

        Raises:
            None.
        """
        self.embed_tokens = new_embeddings

    # Copied from transformers.models.bart.modeling_bart.BartDecoder._prepare_decoder_attention_mask
    def _prepare_decoder_attention_mask(self, attention_mask, input_shape, inputs_embeds, past_key_values_length):
        """
        This method prepares the decoder attention mask based on the provided parameters.

        Args:
            self (BaiChuan7bModel): The instance of the BaiChuan7bModel class.
            attention_mask (torch.Tensor): The attention mask tensor to be applied during decoding.
                If None, no attention mask will be applied.
            input_shape (tuple): The shape of the input tensor (batch_size, sequence_length, hidden_size).
            inputs_embeds (torch.Tensor): The embedded input tensor of shape (batch_size, sequence_length, hidden_size).
            past_key_values_length (int): The length of past key values to consider for the attention mask.

        Returns:
            None: This method returns the combined attention mask or None if no attention mask is applied.

        Raises:
            ValueError: If the input_shape[-1] is less than or equal to 1.
            TypeError: If the input data types are incompatible for mask operations.
        """
        # create causal mask
        # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
        combined_attention_mask = None
        if input_shape[-1] > 1:
            combined_attention_mask = _make_causal_mask(
                input_shape,
                inputs_embeds.dtype,
                past_key_values_length=past_key_values_length,
            )

        if attention_mask is not None:
            # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
            expanded_attn_mask = _expand_mask(attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1])
            combined_attention_mask = (
                expanded_attn_mask if combined_attention_mask is None else expanded_attn_mask + combined_attention_mask
            )

        return combined_attention_mask

    def forward(
            self,
            input_ids: Tensor = None,
            attention_mask: Optional[Tensor] = None,
            position_ids: Optional[Tensor] = None,
            past_key_values: Optional[List[Tensor]] = None,
            inputs_embeds: Optional[Tensor] = None,
            use_cache: Optional[bool] = None,
            output_attentions: Optional[bool] = None,
            output_hidden_states: Optional[bool] = None,
            return_dict: Optional[bool] = None,
    ) -> Union[Tuple, BaseModelOutputWithPast]:
        """
        This method forwards the BaiChuan7bModel by processing the input data and generating model outputs.

        Args:
            self (object): The instance of the class BaiChuan7bModel.
            input_ids (Tensor): The input tensor containing token indices representing the input sequence. Default is None.
            attention_mask (Optional[Tensor]): Optional tensor specifying the attention mask for the input sequence. Default is None.
            position_ids (Optional[Tensor]): Optional tensor specifying the position indices for the input sequence. Default is None.
            past_key_values (Optional[List[Tensor]]): Optional list of tensors containing past key values for the model. Default is None.
            inputs_embeds (Optional[Tensor]): Optional tensor containing the embeddings of the input tokens. Default is None.
            use_cache (Optional[bool]): Optional boolean flag indicating whether to use cache during model computation. Default is None.
            output_attentions (Optional[bool]): Optional boolean flag indicating whether to output attentions. Default is None.
            output_hidden_states (Optional[bool]): Optional boolean flag indicating whether to output hidden states. Default is None.
            return_dict (Optional[bool]): Optional boolean flag indicating whether to return the output as a dictionary. Default is None.

        Returns:
            Union[Tuple, BaseModelOutputWithPast]: Returns a tuple or BaseModelOutputWithPast object containing the model outputs.

        Raises:
            ValueError:
                Raised if both input_ids and inputs_embeds are specified simultaneously,
                if neither decoder_input_ids nor decoder_inputs_embeds are specified,
                or if an invalid configuration is encountered during model forwardion.
        """
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
        )
        use_cache = use_cache if use_cache is not None else self.config.use_cache

        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        # retrieve input_ids and inputs_embeds
        if input_ids is not None and inputs_embeds is not None:
            raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
        if input_ids is not None:
            batch_size, seq_length = input_ids.shape
        elif inputs_embeds is not None:
            batch_size, seq_length, _ = inputs_embeds.shape
        else:
            raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")

        seq_length_with_past = seq_length
        past_key_values_length = 0

        if past_key_values is not None:
            past_key_values_length = past_key_values[0][0].shape[2]
            seq_length_with_past = seq_length_with_past + past_key_values_length

        if position_ids is None:
            position_ids = ops.arange(
                past_key_values_length, seq_length + past_key_values_length, dtype=mindspore.int64
            )
            position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
        else:
            position_ids = position_ids.view(-1, seq_length).long()

        if inputs_embeds is None:
            inputs_embeds = self.embed_tokens(input_ids)
        # embed positions
        if attention_mask is None:
            attention_mask = ops.ones(
                batch_size, seq_length_with_past, dtype=mindspore.bool_
            )
        attention_mask = self._prepare_decoder_attention_mask(
            attention_mask, (batch_size, seq_length), inputs_embeds, past_key_values_length
        )

        hidden_states = inputs_embeds

        # decoder layers
        all_hidden_states = () if output_hidden_states else None
        all_self_attns = () if output_attentions else None
        next_decoder_cache = () if use_cache else None

        for idx, decoder_layer in enumerate(self.layers):
            if output_hidden_states:
                all_hidden_states += (hidden_states,)

            past_key_value = past_key_values[idx] if past_key_values is not None else None

            # TODO: how checkpoint
            layer_outputs = decoder_layer(
                hidden_states,
                attention_mask=attention_mask,
                position_ids=position_ids,
                past_key_value=past_key_value,
                output_attentions=output_attentions,
                use_cache=use_cache,
            )

            hidden_states = layer_outputs[0]

            if use_cache:
                next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)

            if output_attentions:
                all_self_attns += (layer_outputs[1],)

        hidden_states = self.norm(hidden_states)

        # add hidden states from the last decoder layer
        if output_hidden_states:
            all_hidden_states += (hidden_states,)

        next_cache = next_decoder_cache if use_cache else None
        if not return_dict:
            return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
        return BaseModelOutputWithPast(
            last_hidden_state=hidden_states,
            past_key_values=next_cache,
            hidden_states=all_hidden_states,
            attentions=all_self_attns,
        )

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuan7bModel.__init__(config)

Initializes a new instance of the BaiChuan7bModel class.

PARAMETER DESCRIPTION
self

The instance of the BaiChuan7bModel class.

config

An instance of BaiChuanConfig containing configuration parameters.

  • Purpose: Specifies the configuration settings for the model.
  • Restrictions: Must be an instance of BaiChuanConfig.

TYPE: BaiChuanConfig

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
def __init__(self, config: BaiChuanConfig):
    """
    Initializes a new instance of the BaiChuan7bModel class.

    Args:
        self: The instance of the BaiChuan7bModel class.
        config (BaiChuanConfig):
            An instance of BaiChuanConfig containing configuration parameters.

            - Purpose: Specifies the configuration settings for the model.
            - Restrictions: Must be an instance of BaiChuanConfig.

    Returns:
        None.

    Raises:
        None
    """
    super().__init__(config)
    self.padding_idx = config.pad_token_id
    self.vocab_size = config.vocab_size

    self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=self.padding_idx)
    self.layers = nn.ModuleList([DecoderLayer(config) for _ in range(config.num_hidden_layers)])
    self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)

    # Initialize weights and apply final processing
    self.post_init()

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuan7bModel.forward(input_ids=None, attention_mask=None, position_ids=None, past_key_values=None, inputs_embeds=None, use_cache=None, output_attentions=None, output_hidden_states=None, return_dict=None)

This method forwards the BaiChuan7bModel by processing the input data and generating model outputs.

PARAMETER DESCRIPTION
self

The instance of the class BaiChuan7bModel.

TYPE: object

input_ids

The input tensor containing token indices representing the input sequence. Default is None.

TYPE: Tensor DEFAULT: None

attention_mask

Optional tensor specifying the attention mask for the input sequence. Default is None.

TYPE: Optional[Tensor] DEFAULT: None

position_ids

Optional tensor specifying the position indices for the input sequence. Default is None.

TYPE: Optional[Tensor] DEFAULT: None

past_key_values

Optional list of tensors containing past key values for the model. Default is None.

TYPE: Optional[List[Tensor]] DEFAULT: None

inputs_embeds

Optional tensor containing the embeddings of the input tokens. Default is None.

TYPE: Optional[Tensor] DEFAULT: None

use_cache

Optional boolean flag indicating whether to use cache during model computation. Default is None.

TYPE: Optional[bool] DEFAULT: None

output_attentions

Optional boolean flag indicating whether to output attentions. Default is None.

TYPE: Optional[bool] DEFAULT: None

output_hidden_states

Optional boolean flag indicating whether to output hidden states. Default is None.

TYPE: Optional[bool] DEFAULT: None

return_dict

Optional boolean flag indicating whether to return the output as a dictionary. Default is None.

TYPE: Optional[bool] DEFAULT: None

RETURNS DESCRIPTION
Union[Tuple, BaseModelOutputWithPast]

Union[Tuple, BaseModelOutputWithPast]: Returns a tuple or BaseModelOutputWithPast object containing the model outputs.

RAISES DESCRIPTION
ValueError

Raised if both input_ids and inputs_embeds are specified simultaneously, if neither decoder_input_ids nor decoder_inputs_embeds are specified, or if an invalid configuration is encountered during model forwardion.

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
def forward(
        self,
        input_ids: Tensor = None,
        attention_mask: Optional[Tensor] = None,
        position_ids: Optional[Tensor] = None,
        past_key_values: Optional[List[Tensor]] = None,
        inputs_embeds: Optional[Tensor] = None,
        use_cache: Optional[bool] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
) -> Union[Tuple, BaseModelOutputWithPast]:
    """
    This method forwards the BaiChuan7bModel by processing the input data and generating model outputs.

    Args:
        self (object): The instance of the class BaiChuan7bModel.
        input_ids (Tensor): The input tensor containing token indices representing the input sequence. Default is None.
        attention_mask (Optional[Tensor]): Optional tensor specifying the attention mask for the input sequence. Default is None.
        position_ids (Optional[Tensor]): Optional tensor specifying the position indices for the input sequence. Default is None.
        past_key_values (Optional[List[Tensor]]): Optional list of tensors containing past key values for the model. Default is None.
        inputs_embeds (Optional[Tensor]): Optional tensor containing the embeddings of the input tokens. Default is None.
        use_cache (Optional[bool]): Optional boolean flag indicating whether to use cache during model computation. Default is None.
        output_attentions (Optional[bool]): Optional boolean flag indicating whether to output attentions. Default is None.
        output_hidden_states (Optional[bool]): Optional boolean flag indicating whether to output hidden states. Default is None.
        return_dict (Optional[bool]): Optional boolean flag indicating whether to return the output as a dictionary. Default is None.

    Returns:
        Union[Tuple, BaseModelOutputWithPast]: Returns a tuple or BaseModelOutputWithPast object containing the model outputs.

    Raises:
        ValueError:
            Raised if both input_ids and inputs_embeds are specified simultaneously,
            if neither decoder_input_ids nor decoder_inputs_embeds are specified,
            or if an invalid configuration is encountered during model forwardion.
    """
    output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
    output_hidden_states = (
        output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
    )
    use_cache = use_cache if use_cache is not None else self.config.use_cache

    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    # retrieve input_ids and inputs_embeds
    if input_ids is not None and inputs_embeds is not None:
        raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
    if input_ids is not None:
        batch_size, seq_length = input_ids.shape
    elif inputs_embeds is not None:
        batch_size, seq_length, _ = inputs_embeds.shape
    else:
        raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")

    seq_length_with_past = seq_length
    past_key_values_length = 0

    if past_key_values is not None:
        past_key_values_length = past_key_values[0][0].shape[2]
        seq_length_with_past = seq_length_with_past + past_key_values_length

    if position_ids is None:
        position_ids = ops.arange(
            past_key_values_length, seq_length + past_key_values_length, dtype=mindspore.int64
        )
        position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
    else:
        position_ids = position_ids.view(-1, seq_length).long()

    if inputs_embeds is None:
        inputs_embeds = self.embed_tokens(input_ids)
    # embed positions
    if attention_mask is None:
        attention_mask = ops.ones(
            batch_size, seq_length_with_past, dtype=mindspore.bool_
        )
    attention_mask = self._prepare_decoder_attention_mask(
        attention_mask, (batch_size, seq_length), inputs_embeds, past_key_values_length
    )

    hidden_states = inputs_embeds

    # decoder layers
    all_hidden_states = () if output_hidden_states else None
    all_self_attns = () if output_attentions else None
    next_decoder_cache = () if use_cache else None

    for idx, decoder_layer in enumerate(self.layers):
        if output_hidden_states:
            all_hidden_states += (hidden_states,)

        past_key_value = past_key_values[idx] if past_key_values is not None else None

        # TODO: how checkpoint
        layer_outputs = decoder_layer(
            hidden_states,
            attention_mask=attention_mask,
            position_ids=position_ids,
            past_key_value=past_key_value,
            output_attentions=output_attentions,
            use_cache=use_cache,
        )

        hidden_states = layer_outputs[0]

        if use_cache:
            next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)

        if output_attentions:
            all_self_attns += (layer_outputs[1],)

    hidden_states = self.norm(hidden_states)

    # add hidden states from the last decoder layer
    if output_hidden_states:
        all_hidden_states += (hidden_states,)

    next_cache = next_decoder_cache if use_cache else None
    if not return_dict:
        return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
    return BaseModelOutputWithPast(
        last_hidden_state=hidden_states,
        past_key_values=next_cache,
        hidden_states=all_hidden_states,
        attentions=all_self_attns,
    )

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuan7bModel.get_input_embeddings()

Retrieves the input embeddings for the BaiChuan7bModel.

PARAMETER DESCRIPTION
self

The instance of BaiChuan7bModel.

RETURNS DESCRIPTION

None.

This method retrieves the input embeddings for the BaiChuan7bModel. The input embeddings are obtained by calling the 'embed_tokens' method of the instance.

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
def get_input_embeddings(self):
    """
    Retrieves the input embeddings for the BaiChuan7bModel.

    Args:
        self: The instance of BaiChuan7bModel.

    Returns:
        None.

    Raises:
        None.

    This method retrieves the input embeddings for the BaiChuan7bModel.
    The input embeddings are obtained by calling the 'embed_tokens' method of the instance.
    """
    return self.embed_tokens

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuan7bModel.set_input_embeddings(new_embeddings)

Sets the input embeddings for the BaiChuan7bModel.

PARAMETER DESCRIPTION
self

The instance of the BaiChuan7bModel class.

TYPE: BaiChuan7bModel

new_embeddings

The new embeddings to be set. This can be of any type.

TYPE: Any

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
def set_input_embeddings(self, new_embeddings):
    """
    Sets the input embeddings for the BaiChuan7bModel.

    Args:
        self (BaiChuan7bModel): The instance of the BaiChuan7bModel class.
        new_embeddings (Any): The new embeddings to be set. This can be of any type.

    Returns:
        None.

    Raises:
        None.
    """
    self.embed_tokens = new_embeddings

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuan13bModel

Bases: BaiChuanPreTrainedModel

This class represents a BaiChuan13b model for natural language processing tasks. It is a subclass of the BaiChuanPreTrainedModel class. The BaiChuan13bModel class contains methods for initializing the model, getting and setting input embeddings, generating an alibi mask, and forwarding the model.

ATTRIBUTE DESCRIPTION
padding_idx

The index used for padding tokens in the embedding layer.

TYPE: int

vocab_size

The size of the vocabulary.

TYPE: int

n_head

The number of attention heads.

TYPE: int

embed_tokens

The embedding layer for input tokens.

TYPE: Embedding

layers

A list of BaiChuanLayer instances representing the layers of the model.

TYPE: ModuleList

norm

The normalization layer applied after the model layers.

TYPE: RMSNorm

max_cache_pos

The maximum position of past key values for caching.

TYPE: int

first_run

A flag indicating if it is the first run of the model.

TYPE: bool

alibi_mask

A tensor representing the alibi mask.

TYPE: Optional[Tensor]

METHOD DESCRIPTION
__init__

BaiChuanConfig): Initializes the BaiChuan13bModel instance with a configuration.

get_input_embeddings

Returns the input embeddings of the model.

set_input_embeddings

Sets the input embeddings of the model.

get_alibi_mask

Generates an alibi mask based on the tensor and sequence length.

forward

forwards the model with the given inputs and returns the model output.

Note
  • The BaiChuan13bModel class is designed to be used for natural language processing tasks, such as text classification or language generation.
  • The model architecture follows the BaiChuan13b configuration, which includes embedding layers, multiple layers of BaiChuanLayer, and normalization layers.
  • The alibi mask is used for attention calculations and is generated based on the input tensor and sequence length.
  • The forward method is the main entry point for using the model, which takes various inputs and returns the model output.
Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
class BaiChuan13bModel(BaiChuanPreTrainedModel):

    """
    This class represents a BaiChuan13b model for natural language processing tasks. It is a subclass of the BaiChuanPreTrainedModel class.
    The BaiChuan13bModel class contains methods for initializing the model, getting and setting input embeddings,
    generating an alibi mask, and forwarding the model.

    Attributes:
        padding_idx (int): The index used for padding tokens in the embedding layer.
        vocab_size (int): The size of the vocabulary.
        n_head (int): The number of attention heads.
        embed_tokens (nn.Embedding): The embedding layer for input tokens.
        layers (nn.ModuleList): A list of BaiChuanLayer instances representing the layers of the model.
        norm (RMSNorm): The normalization layer applied after the model layers.
        max_cache_pos (int): The maximum position of past key values for caching.
        first_run (bool): A flag indicating if it is the first run of the model.
        alibi_mask (Optional[mindspore.Tensor]): A tensor representing the alibi mask.

    Methods:
        __init__(self, config: BaiChuanConfig): Initializes the BaiChuan13bModel instance with a configuration.
        get_input_embeddings(self): Returns the input embeddings of the model.
        set_input_embeddings(self, value): Sets the input embeddings of the model.
        get_alibi_mask(self, tensor, seq_length_with_past): Generates an alibi mask based on the tensor and sequence length.
        forward(self, input_ids, attention_mask, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict):
            forwards the model with the given inputs and returns the model output.

    Note:
        - The BaiChuan13bModel class is designed to be used for natural language processing tasks, such as text classification or language generation.
        - The model architecture follows the BaiChuan13b configuration, which includes embedding layers, multiple layers of BaiChuanLayer, and normalization layers.
        - The alibi mask is used for attention calculations and is generated based on the input tensor and sequence length.
        - The forward method is the main entry point for using the model, which takes various inputs and returns the model output.
    """
    def __init__(self, config: BaiChuanConfig):
        """
        __init__

        This method initializes an instance of the BaiChuan13bModel class.

        Args:
            self: The instance of the BaiChuan13bModel class.
            config (BaiChuanConfig):
                An object of type BaiChuanConfig containing configuration parameters for the model.
                It specifies the configuration parameters such as pad_token_id, vocab_size,
                num_attention_heads, hidden_size, num_hidden_layers, rms_norm_eps, and model_max_length.

        Returns:
            None.

        Raises:
            None.
        """
        super().__init__(config)
        self.padding_idx = config.pad_token_id
        self.vocab_size = config.vocab_size
        self.n_head = config.num_attention_heads
        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=self.padding_idx)
        self.layers = nn.ModuleList([BaiChuanLayer(config) for _ in range(config.num_hidden_layers)])
        self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)

        self.post_init()
        self.max_cache_pos = config.model_max_length
        self.first_run = True
        self.alibi_mask = None

    def get_input_embeddings(self):
        """
        This method returns the input embeddings for the BaiChuan13bModel.

        Args:
            self: The instance of the BaiChuan13bModel class.

        Returns:
            None: This method returns the input embeddings for the BaiChuan13bModel as an instance of 'embed_tokens'.

        Raises:
            None
        """
        return self.embed_tokens

    def set_input_embeddings(self, value):
        """
        Method to set the input embeddings for the BaiChuan13bModel.

        Args:
            self (BaiChuan13bModel): The instance of the BaiChuan13bModel class.
            value:
                The input embeddings to be set for the model.

                - Type: Any
                - Purpose: Represents the new input embeddings to be assigned to the model.
                - Restrictions: None

        Returns:
            None.

        Raises:
            None
        """
        self.embed_tokens = value

    def get_alibi_mask(self, tensor, seq_length_with_past):
        """
        This method is a member of the 'BaiChuan13bModel' class and is used to obtain an alibi mask
        based on the input tensor and sequence length with past information.

        Args:
            self (object): The instance of the class.
            tensor (Tensor): The input tensor used to derive the alibi mask.
            seq_length_with_past (int): The length of the sequence with past information.

        Returns:
            None.

        Raises:
            ValueError: If the 'seq_length_with_past' parameter is not an integer.
            RuntimeError: If the method encounters issues during execution.
        """
        if self.training:
            slopes = mindspore.Tensor(_get_interleave(self.n_head))
            alibi = slopes.unsqueeze(1).unsqueeze(1) * ops.arange(seq_length_with_past).unsqueeze(0).unsqueeze(0).broadcast_to(
                (self.n_head, -1, -1))
            alibi = alibi.view(self.n_head, 1, seq_length_with_past)
            mask = _buffered_future_mask(tensor, seq_length_with_past, alibi, self.n_head)
        else:
            if self.first_run:
                self.first_run = False
                self.future_mask = _gen_alibi_mask(self.n_head, self.max_cache_pos)
            if seq_length_with_past > self.max_cache_pos:
                self.max_cache_pos = seq_length_with_past
                self.future_mask = _gen_alibi_mask(self.n_head, self.max_cache_pos)
            mask = self.future_mask[:self.n_head, :seq_length_with_past, :seq_length_with_past]
        return mask

    def forward(
            self,
            input_ids: mindspore.Tensor = None,
            attention_mask: Optional[mindspore.Tensor] = None,
            past_key_values: Optional[List[mindspore.Tensor]] = None,
            inputs_embeds: Optional[mindspore.Tensor] = None,
            use_cache: Optional[bool] = False,
            output_attentions: Optional[bool] = False,
            output_hidden_states: Optional[bool] = False,
            return_dict: Optional[bool] = True,
    ) -> Union[Tuple, BaseModelOutputWithPast]:
        """
        forwards the BaiChuan13bModel.

        Args:
            self: The object instance.
            input_ids (mindspore.Tensor, optional): The input tensor of shape [batch_size, sequence_length].
            attention_mask (mindspore.Tensor, optional): The attention mask tensor of shape [batch_size, sequence_length].
            past_key_values (List[mindspore.Tensor], optional): The list of past key value tensors.
            inputs_embeds (mindspore.Tensor, optional): The input embeddings tensor of shape [batch_size, sequence_length, hidden_size].
            use_cache (bool, optional): Whether to use cache for decoding.
            output_attentions (bool, optional): Whether to output attention weights.
            output_hidden_states (bool, optional): Whether to output hidden states.
            return_dict (bool, optional): Whether to return a dictionary instead of a tuple.

        Returns:
            Union[Tuple, BaseModelOutputWithPast]:
                The output tuple or BaseModelOutputWithPast object containing the last hidden state,
                past key values, hidden states, and attentions.

        Raises:
            ValueError: If both input_ids and inputs_embeds are provided simultaneously.
            ValueError: If neither input_ids nor inputs_embeds are provided.
        """
        if input_ids is not None and inputs_embeds is not None:
            raise ValueError("You cannot provide both input_ids and inputs_embeds simultaneously")
        if input_ids is not None:
            _, seq_length = input_ids.shape
        elif inputs_embeds is not None:
            _, seq_length, _ = inputs_embeds.shape
        else:
            raise ValueError("You need to provide input_ids or inputs_embeds")

        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        seq_length_with_past = seq_length

        if past_key_values is not None:
            past_key_values_length = past_key_values[0][0].shape[2]
            seq_length_with_past = seq_length_with_past + past_key_values_length

        if inputs_embeds is None:
            inputs_embeds = self.embed_tokens(input_ids)

        if self.training:
            if self.alibi_mask is None or self.alibi_mask.shape[-1] != seq_length_with_past:
                self.alibi_mask = self.get_alibi_mask(inputs_embeds, seq_length_with_past)
            alibi_mask = self.alibi_mask
        else:
            alibi_mask = self.get_alibi_mask(inputs_embeds, seq_length_with_past)

        if attention_mask is not None:
            if len(attention_mask.shape) == 2:
                expanded_mask = attention_mask.to(alibi_mask.dtype)
                expanded_mask = ops.tril(ops.gt(expanded_mask[:, :, None] * expanded_mask[:, None, :], 0)
                                ) * ops.eq(expanded_mask[:, :, None] - expanded_mask[:, None, :], 0)
            else:
                expanded_mask = attention_mask
            bsz = inputs_embeds.size(0)
            src_len, tgt_len = alibi_mask.shape[-2:]
            expanded_mask = expanded_mask.unsqueeze(1).broadcast_to((bsz, 1, src_len, tgt_len)).to(alibi_mask.dtype)
            inverted_mask = 1.0 - expanded_mask
            inverted_mask = inverted_mask.masked_fill(inverted_mask.to(mindspore.bool_), np.finfo(mindspore.dtype_to_nptype(alibi_mask.dtype)).min)
            attention_mask = inverted_mask + alibi_mask.unsqueeze(0)
        else:
            attention_mask = alibi_mask

        hidden_states = inputs_embeds

        # decoder layers
        all_hidden_states = () if output_hidden_states else None
        all_self_attns = () if output_attentions else None
        next_decoder_cache = () if use_cache else None

        for idx, decoder_layer in enumerate(self.layers):
            if output_hidden_states:
                all_hidden_states += (hidden_states,)

            past_key_value = past_key_values[idx] if past_key_values is not None else None

            layer_outputs = decoder_layer(
                hidden_states,
                attention_mask=attention_mask,
                past_key_value=past_key_value,
                output_attentions=output_attentions,
                use_cache=use_cache,
            )

            hidden_states = layer_outputs[0]

            if use_cache:
                next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)

            if output_attentions:
                all_self_attns += (layer_outputs[1],)

        hidden_states = self.norm(hidden_states)

        # add hidden states from the last decoder layer
        if output_hidden_states:
            all_hidden_states += (hidden_states,)

        next_cache = next_decoder_cache if use_cache else None
        if not return_dict:
            return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
        return BaseModelOutputWithPast(
            last_hidden_state=hidden_states,
            past_key_values=next_cache,
            hidden_states=all_hidden_states,
            attentions=all_self_attns,
        )

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuan13bModel.__init__(config)

init

This method initializes an instance of the BaiChuan13bModel class.

PARAMETER DESCRIPTION
self

The instance of the BaiChuan13bModel class.

config

An object of type BaiChuanConfig containing configuration parameters for the model. It specifies the configuration parameters such as pad_token_id, vocab_size, num_attention_heads, hidden_size, num_hidden_layers, rms_norm_eps, and model_max_length.

TYPE: BaiChuanConfig

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
def __init__(self, config: BaiChuanConfig):
    """
    __init__

    This method initializes an instance of the BaiChuan13bModel class.

    Args:
        self: The instance of the BaiChuan13bModel class.
        config (BaiChuanConfig):
            An object of type BaiChuanConfig containing configuration parameters for the model.
            It specifies the configuration parameters such as pad_token_id, vocab_size,
            num_attention_heads, hidden_size, num_hidden_layers, rms_norm_eps, and model_max_length.

    Returns:
        None.

    Raises:
        None.
    """
    super().__init__(config)
    self.padding_idx = config.pad_token_id
    self.vocab_size = config.vocab_size
    self.n_head = config.num_attention_heads
    self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=self.padding_idx)
    self.layers = nn.ModuleList([BaiChuanLayer(config) for _ in range(config.num_hidden_layers)])
    self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)

    self.post_init()
    self.max_cache_pos = config.model_max_length
    self.first_run = True
    self.alibi_mask = None

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuan13bModel.forward(input_ids=None, attention_mask=None, past_key_values=None, inputs_embeds=None, use_cache=False, output_attentions=False, output_hidden_states=False, return_dict=True)

forwards the BaiChuan13bModel.

PARAMETER DESCRIPTION
self

The object instance.

input_ids

The input tensor of shape [batch_size, sequence_length].

TYPE: Tensor DEFAULT: None

attention_mask

The attention mask tensor of shape [batch_size, sequence_length].

TYPE: Tensor DEFAULT: None

past_key_values

The list of past key value tensors.

TYPE: List[Tensor] DEFAULT: None

inputs_embeds

The input embeddings tensor of shape [batch_size, sequence_length, hidden_size].

TYPE: Tensor DEFAULT: None

use_cache

Whether to use cache for decoding.

TYPE: bool DEFAULT: False

output_attentions

Whether to output attention weights.

TYPE: bool DEFAULT: False

output_hidden_states

Whether to output hidden states.

TYPE: bool DEFAULT: False

return_dict

Whether to return a dictionary instead of a tuple.

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION
Union[Tuple, BaseModelOutputWithPast]

Union[Tuple, BaseModelOutputWithPast]: The output tuple or BaseModelOutputWithPast object containing the last hidden state, past key values, hidden states, and attentions.

RAISES DESCRIPTION
ValueError

If both input_ids and inputs_embeds are provided simultaneously.

ValueError

If neither input_ids nor inputs_embeds are provided.

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
def forward(
        self,
        input_ids: mindspore.Tensor = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        past_key_values: Optional[List[mindspore.Tensor]] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        use_cache: Optional[bool] = False,
        output_attentions: Optional[bool] = False,
        output_hidden_states: Optional[bool] = False,
        return_dict: Optional[bool] = True,
) -> Union[Tuple, BaseModelOutputWithPast]:
    """
    forwards the BaiChuan13bModel.

    Args:
        self: The object instance.
        input_ids (mindspore.Tensor, optional): The input tensor of shape [batch_size, sequence_length].
        attention_mask (mindspore.Tensor, optional): The attention mask tensor of shape [batch_size, sequence_length].
        past_key_values (List[mindspore.Tensor], optional): The list of past key value tensors.
        inputs_embeds (mindspore.Tensor, optional): The input embeddings tensor of shape [batch_size, sequence_length, hidden_size].
        use_cache (bool, optional): Whether to use cache for decoding.
        output_attentions (bool, optional): Whether to output attention weights.
        output_hidden_states (bool, optional): Whether to output hidden states.
        return_dict (bool, optional): Whether to return a dictionary instead of a tuple.

    Returns:
        Union[Tuple, BaseModelOutputWithPast]:
            The output tuple or BaseModelOutputWithPast object containing the last hidden state,
            past key values, hidden states, and attentions.

    Raises:
        ValueError: If both input_ids and inputs_embeds are provided simultaneously.
        ValueError: If neither input_ids nor inputs_embeds are provided.
    """
    if input_ids is not None and inputs_embeds is not None:
        raise ValueError("You cannot provide both input_ids and inputs_embeds simultaneously")
    if input_ids is not None:
        _, seq_length = input_ids.shape
    elif inputs_embeds is not None:
        _, seq_length, _ = inputs_embeds.shape
    else:
        raise ValueError("You need to provide input_ids or inputs_embeds")

    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    seq_length_with_past = seq_length

    if past_key_values is not None:
        past_key_values_length = past_key_values[0][0].shape[2]
        seq_length_with_past = seq_length_with_past + past_key_values_length

    if inputs_embeds is None:
        inputs_embeds = self.embed_tokens(input_ids)

    if self.training:
        if self.alibi_mask is None or self.alibi_mask.shape[-1] != seq_length_with_past:
            self.alibi_mask = self.get_alibi_mask(inputs_embeds, seq_length_with_past)
        alibi_mask = self.alibi_mask
    else:
        alibi_mask = self.get_alibi_mask(inputs_embeds, seq_length_with_past)

    if attention_mask is not None:
        if len(attention_mask.shape) == 2:
            expanded_mask = attention_mask.to(alibi_mask.dtype)
            expanded_mask = ops.tril(ops.gt(expanded_mask[:, :, None] * expanded_mask[:, None, :], 0)
                            ) * ops.eq(expanded_mask[:, :, None] - expanded_mask[:, None, :], 0)
        else:
            expanded_mask = attention_mask
        bsz = inputs_embeds.size(0)
        src_len, tgt_len = alibi_mask.shape[-2:]
        expanded_mask = expanded_mask.unsqueeze(1).broadcast_to((bsz, 1, src_len, tgt_len)).to(alibi_mask.dtype)
        inverted_mask = 1.0 - expanded_mask
        inverted_mask = inverted_mask.masked_fill(inverted_mask.to(mindspore.bool_), np.finfo(mindspore.dtype_to_nptype(alibi_mask.dtype)).min)
        attention_mask = inverted_mask + alibi_mask.unsqueeze(0)
    else:
        attention_mask = alibi_mask

    hidden_states = inputs_embeds

    # decoder layers
    all_hidden_states = () if output_hidden_states else None
    all_self_attns = () if output_attentions else None
    next_decoder_cache = () if use_cache else None

    for idx, decoder_layer in enumerate(self.layers):
        if output_hidden_states:
            all_hidden_states += (hidden_states,)

        past_key_value = past_key_values[idx] if past_key_values is not None else None

        layer_outputs = decoder_layer(
            hidden_states,
            attention_mask=attention_mask,
            past_key_value=past_key_value,
            output_attentions=output_attentions,
            use_cache=use_cache,
        )

        hidden_states = layer_outputs[0]

        if use_cache:
            next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)

        if output_attentions:
            all_self_attns += (layer_outputs[1],)

    hidden_states = self.norm(hidden_states)

    # add hidden states from the last decoder layer
    if output_hidden_states:
        all_hidden_states += (hidden_states,)

    next_cache = next_decoder_cache if use_cache else None
    if not return_dict:
        return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
    return BaseModelOutputWithPast(
        last_hidden_state=hidden_states,
        past_key_values=next_cache,
        hidden_states=all_hidden_states,
        attentions=all_self_attns,
    )

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuan13bModel.get_alibi_mask(tensor, seq_length_with_past)

This method is a member of the 'BaiChuan13bModel' class and is used to obtain an alibi mask based on the input tensor and sequence length with past information.

PARAMETER DESCRIPTION
self

The instance of the class.

TYPE: object

tensor

The input tensor used to derive the alibi mask.

TYPE: Tensor

seq_length_with_past

The length of the sequence with past information.

TYPE: int

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
ValueError

If the 'seq_length_with_past' parameter is not an integer.

RuntimeError

If the method encounters issues during execution.

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
def get_alibi_mask(self, tensor, seq_length_with_past):
    """
    This method is a member of the 'BaiChuan13bModel' class and is used to obtain an alibi mask
    based on the input tensor and sequence length with past information.

    Args:
        self (object): The instance of the class.
        tensor (Tensor): The input tensor used to derive the alibi mask.
        seq_length_with_past (int): The length of the sequence with past information.

    Returns:
        None.

    Raises:
        ValueError: If the 'seq_length_with_past' parameter is not an integer.
        RuntimeError: If the method encounters issues during execution.
    """
    if self.training:
        slopes = mindspore.Tensor(_get_interleave(self.n_head))
        alibi = slopes.unsqueeze(1).unsqueeze(1) * ops.arange(seq_length_with_past).unsqueeze(0).unsqueeze(0).broadcast_to(
            (self.n_head, -1, -1))
        alibi = alibi.view(self.n_head, 1, seq_length_with_past)
        mask = _buffered_future_mask(tensor, seq_length_with_past, alibi, self.n_head)
    else:
        if self.first_run:
            self.first_run = False
            self.future_mask = _gen_alibi_mask(self.n_head, self.max_cache_pos)
        if seq_length_with_past > self.max_cache_pos:
            self.max_cache_pos = seq_length_with_past
            self.future_mask = _gen_alibi_mask(self.n_head, self.max_cache_pos)
        mask = self.future_mask[:self.n_head, :seq_length_with_past, :seq_length_with_past]
    return mask

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuan13bModel.get_input_embeddings()

This method returns the input embeddings for the BaiChuan13bModel.

PARAMETER DESCRIPTION
self

The instance of the BaiChuan13bModel class.

RETURNS DESCRIPTION
None

This method returns the input embeddings for the BaiChuan13bModel as an instance of 'embed_tokens'.

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
def get_input_embeddings(self):
    """
    This method returns the input embeddings for the BaiChuan13bModel.

    Args:
        self: The instance of the BaiChuan13bModel class.

    Returns:
        None: This method returns the input embeddings for the BaiChuan13bModel as an instance of 'embed_tokens'.

    Raises:
        None
    """
    return self.embed_tokens

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuan13bModel.set_input_embeddings(value)

Method to set the input embeddings for the BaiChuan13bModel.

PARAMETER DESCRIPTION
self

The instance of the BaiChuan13bModel class.

TYPE: BaiChuan13bModel

value

The input embeddings to be set for the model.

  • Type: Any
  • Purpose: Represents the new input embeddings to be assigned to the model.
  • Restrictions: None

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
def set_input_embeddings(self, value):
    """
    Method to set the input embeddings for the BaiChuan13bModel.

    Args:
        self (BaiChuan13bModel): The instance of the BaiChuan13bModel class.
        value:
            The input embeddings to be set for the model.

            - Type: Any
            - Purpose: Represents the new input embeddings to be assigned to the model.
            - Restrictions: None

    Returns:
        None.

    Raises:
        None
    """
    self.embed_tokens = value

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuanForCausalLM

Bases: BaiChuanPreTrainedModel

BaiChuanForCausalLM

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
class BaiChuanForCausalLM(BaiChuanPreTrainedModel):
    """
    BaiChuanForCausalLM
    """
    def __init__(self, config, size=None):
        """
        Initializes a new instance of BaiChuanForCausalLM.

        Args:
            self: The instance of the class.
            config: The configuration for the model.
            size (str, optional): The size of the model. Defaults to None. Must be either '7b' or '13b'.

        Returns:
            None.

        Raises:
            ValueError:
                If the size parameter is not '7b' or '13b', a ValueError is raised with the message 'BaiChuan model
                only supports 7b and 13b, please check your config.'
        """
        super().__init__(config)
        if size == '7b':
            self.model = BaiChuan7bModel(config)
        elif size == '13b':
            self.model = BaiChuan13bModel(config)
        else:
            self.model = BaiChuan7bModel(config)
            raise ValueError('BaiChuan model only support 7b and 13b, please check your config.')

        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

        # Initialize weights and apply final processing
        self.post_init()

    def get_input_embeddings(self):
        """
        Method to retrieve the input embeddings from the model for the BaiChuanForCausalLM class.

        Args:
            self: The instance of the BaiChuanForCausalLM class. It is used to access the model's embed_tokens.

        Returns:
            None: This method returns the input embeddings from the model to be used in the BaiChuanForCausalLM class.

        Raises:
            None.
        """
        return self.model.embed_tokens

    def set_input_embeddings(self, new_embeddings):
        """
        Set the input embeddings for the BaiChuanForCausalLM model.

        Args:
            self (BaiChuanForCausalLM): The instance of the BaiChuanForCausalLM class.
            new_embeddings (torch.Tensor): The new input embeddings to be set for the model. Should be of shape (vocab_size, embedding_dim).

        Returns:
            None.

        Raises:
            TypeError: If the new_embeddings parameter is not of type torch.Tensor.
            ValueError: If the shape of new_embeddings does not match the expected shape (vocab_size, embedding_dim).
        """
        self.model.embed_tokens = new_embeddings

    def get_output_embeddings(self):
        """
        This method retrieves the output embeddings from the BaiChuanForCausalLM model.

        Args:
            self: An instance of the BaiChuanForCausalLM class.

        Returns:
            lm_head: The method returns the lm_head attribute which contains the output embeddings.

        Raises:
            None.
        """
        return self.lm_head

    def set_output_embeddings(self, new_embeddings):
        """
        Set the output embeddings for BaiChuanForCausalLM model.

        Args:
            self (BaiChuanForCausalLM): The instance of BaiChuanForCausalLM class.
            new_embeddings (Any): The new embeddings to be set as the output embeddings for the model.
                This can be of any type.

        Returns:
            None.

        Raises:
            None.
        """
        self.lm_head = new_embeddings

    def set_decoder(self, decoder):
        """
        set_decoder
        """
        self.model = decoder

    def get_decoder(self):
        """
        get_decoder
        """
        return self.model

    def forward(
            self,
            input_ids: Tensor = None,
            attention_mask: Optional[Tensor] = None,
            position_ids: Optional[Tensor] = None,
            past_key_values: Optional[List[Tensor]] = None,
            inputs_embeds: Optional[Tensor] = None,
            labels: Optional[Tensor] = None,
            use_cache: Optional[bool] = None,
            output_attentions: Optional[bool] = None,
            output_hidden_states: Optional[bool] = None,
            return_dict: Optional[bool] = None,
    ) -> Union[Tuple, CausalLMOutputWithPast]:
        """
        forwards the Causal Language Model for the BaiChuan model.

        Args:
            self (BaiChuanForCausalLM): The instance of the BaiChuanForCausalLM class.
            input_ids (Tensor, optional): The input tensor containing the token IDs. Default: None.
            attention_mask (Optional[Tensor], optional): The attention mask tensor. Default: None.
            position_ids (Optional[Tensor], optional): The position IDs tensor. Default: None.
            past_key_values (Optional[List[Tensor]], optional): The list of past key values tensor. Default: None.
            inputs_embeds (Optional[Tensor], optional): The input embeddings tensor. Default: None.
            labels (Optional[Tensor], optional): The tensor containing the labels. Default: None.
            use_cache (Optional[bool], optional): Whether to use cache. Default: None.
            output_attentions (Optional[bool], optional): Whether to output attentions. Default: None.
            output_hidden_states (Optional[bool], optional): Whether to output hidden states. Default: None.
            return_dict (Optional[bool], optional): Whether to return a dictionary. Default: None.

        Returns:
            Union[Tuple, CausalLMOutputWithPast]:
                The model outputs.

                - If `return_dict` is False, returns a tuple containing the logits and the various model outputs.
                - If `return_dict` is True, returns an instance of `CausalLMOutputWithPast` containing the loss,
                logits, past key values, hidden states, and attentions.

        Raises:
            ValueError: If the BaiChuan model is not of type BaiChuan7bModel or BaiChuan13bModel.

        """
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
        )
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
        if isinstance(self.model, BaiChuan7bModel):
            outputs = self.model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                position_ids=position_ids,
                past_key_values=past_key_values,
                inputs_embeds=inputs_embeds,
                use_cache=use_cache,
                output_attentions=output_attentions,
                output_hidden_states=output_hidden_states,
                return_dict=return_dict,
            )
        elif isinstance(self.model, BaiChuan13bModel):
            outputs = self.model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                past_key_values=past_key_values,
                inputs_embeds=inputs_embeds,
                use_cache=use_cache,
                output_attentions=output_attentions,
                output_hidden_states=output_hidden_states,
                return_dict=return_dict,
            )
        else:
            raise ValueError('BaiChuan model only support 7b and 13b, please check your config.')

        hidden_states = outputs[0]
        logits = self.lm_head(hidden_states)

        loss = None
        if labels is not None:
            # Shift so that tokens < n predict n
            shift_logits = logits[..., :-1, :]
            shift_labels = labels[..., 1:]
            # Flatten the tokens
            shift_logits = shift_logits.view(-1, self.config.vocab_size)
            shift_labels = shift_labels.view(-1)
            # Enable model parallelism
            loss = ops.cross_entropy(shift_logits, shift_labels)

        if not return_dict:
            output = (logits,) + outputs[1:]
            return (loss,) + output if loss is not None else output

        return CausalLMOutputWithPast(
            loss=loss,
            logits=logits,
            past_key_values=outputs.past_key_values,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

    def prepare_inputs_for_generation(
            self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs
    ):
        """
        This method prepares inputs for generation in the BaiChuanForCausalLM class.

        Args:
            self (object): The instance of the class.
            input_ids (torch.Tensor): The input token IDs. Shape (batch_size, sequence_length).
            past_key_values (tuple, optional):
                Tuple of tensors containing cached key and value projection states of the model. Default is None.
            attention_mask (torch.Tensor, optional):
                Mask to avoid performing attention on padding token indices. Shape (batch_size, sequence_length).
            inputs_embeds (torch.Tensor, optional):
                The embedded representation of the input tokens. Shape (batch_size, sequence_length, hidden_size).

        Returns:
            dict:
                A dictionary containing model inputs for generation, including 'input_ids', 'position_ids',
                'past_key_values', 'use_cache', and 'attention_mask'.

        Raises:
            ValueError: If attention_mask and position_ids are both provided and have mismatched shapes.
            ValueError: If inputs_embeds and past_key_values are both provided.
        """
        if past_key_values:
            input_ids = input_ids[:, -1:]

        position_ids = kwargs.get("position_ids", None)
        if attention_mask is not None and position_ids is None:
            # create position_ids on the fly for batch generation
            position_ids = attention_mask.long().cumsum(-1) - 1
            position_ids = position_ids.masked_fill(attention_mask == 0, 1)
            if past_key_values:
                position_ids = position_ids[:, -1].unsqueeze(-1)

        # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
        if inputs_embeds is not None and past_key_values is None:
            model_inputs = {"inputs_embeds": inputs_embeds}
        else:
            model_inputs = {"input_ids": input_ids}

        model_inputs.update(
            {
                "position_ids": position_ids,
                "past_key_values": past_key_values,
                "use_cache": kwargs.get("use_cache"),
                "attention_mask": attention_mask,
            }
        )
        return model_inputs

    @staticmethod
    def _reorder_cache(past_key_values, beam_idx):
        """
        Reorders the cache for the given beam indices.

        Args:
            past_key_values (tuple): A tuple containing the past key and value tensors for each layer.
                Each element in the tuple is a tuple of tensors representing the past states for the corresponding layer.
            beam_idx (tensor): A 1-D tensor containing the indices of the beams for reordering the cache.

        Returns:
            None: This method modifies the 'past_key_values' in place to reorder the cache based on the 'beam_idx'.

        Raises:
            ValueError: If the length of 'past_key_values' does not match the number of layers in the model.
            IndexError: If the 'beam_idx' contains indices that are out of range for the dimensions of
                the tensors in 'past_key_values'.
        """
        reordered_past = ()
        for layer_past in past_key_values:
            reordered_past += (tuple(past_state.index_select(0, beam_idx) for past_state in layer_past),)
        return reordered_past

    def chat(self, tokenizer, messages: List[dict], stream=False,
             generation_config: Optional[GenerationConfig]=None):
        """
        Method:
            chat

        Description:
            This method allows for conducting a chat conversation using the BaiChuanForCausalLM model.
            It takes in the necessary input parameters and returns the response generated by the model.

        Args:
            self: The instance of the BaiChuanForCausalLM class.
            tokenizer: An object of the tokenizer class used for tokenizing the input messages.
            messages: A list of dictionaries representing the chat messages.
                Each dictionary contains the following keys:

                - 'role': The role of the message sender (e.g., 'system', 'user', 'assistant').
                - 'content': The content of the message.
            stream: A boolean value indicating whether the chat conversation should be streamed or not. Default is False.
            generation_config: An optional object of the GenerationConfig class that specifies the generation configurations.
                If not provided, the instance's generation_config will be used.

        Returns:
            None

        Raises:
            None
        """
        generation_config = generation_config or self.generation_config
        input_ids = build_chat_input(self, tokenizer, messages, generation_config.max_new_tokens)
        if stream:
            streamer = TextIterStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
            Thread(target=self.generate, kwargs={
                                                    "inputs": input_ids,
                                                    "streamer": streamer,
                                                    "generation_config": generation_config
                                                }
            ).start()
            return streamer

        outputs = self.generate(input_ids, generation_config=generation_config)
        response = tokenizer.decode(outputs[0][len(input_ids[0]):], skip_special_tokens=True)
        return response

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuanForCausalLM.__init__(config, size=None)

Initializes a new instance of BaiChuanForCausalLM.

PARAMETER DESCRIPTION
self

The instance of the class.

config

The configuration for the model.

size

The size of the model. Defaults to None. Must be either '7b' or '13b'.

TYPE: str DEFAULT: None

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
ValueError

If the size parameter is not '7b' or '13b', a ValueError is raised with the message 'BaiChuan model only supports 7b and 13b, please check your config.'

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
def __init__(self, config, size=None):
    """
    Initializes a new instance of BaiChuanForCausalLM.

    Args:
        self: The instance of the class.
        config: The configuration for the model.
        size (str, optional): The size of the model. Defaults to None. Must be either '7b' or '13b'.

    Returns:
        None.

    Raises:
        ValueError:
            If the size parameter is not '7b' or '13b', a ValueError is raised with the message 'BaiChuan model
            only supports 7b and 13b, please check your config.'
    """
    super().__init__(config)
    if size == '7b':
        self.model = BaiChuan7bModel(config)
    elif size == '13b':
        self.model = BaiChuan13bModel(config)
    else:
        self.model = BaiChuan7bModel(config)
        raise ValueError('BaiChuan model only support 7b and 13b, please check your config.')

    self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

    # Initialize weights and apply final processing
    self.post_init()

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuanForCausalLM.chat(tokenizer, messages, stream=False, generation_config=None)

Method

chat

Description

This method allows for conducting a chat conversation using the BaiChuanForCausalLM model. It takes in the necessary input parameters and returns the response generated by the model.

PARAMETER DESCRIPTION
self

The instance of the BaiChuanForCausalLM class.

tokenizer

An object of the tokenizer class used for tokenizing the input messages.

messages

A list of dictionaries representing the chat messages. Each dictionary contains the following keys:

  • 'role': The role of the message sender (e.g., 'system', 'user', 'assistant').
  • 'content': The content of the message.

TYPE: List[dict]

stream

A boolean value indicating whether the chat conversation should be streamed or not. Default is False.

DEFAULT: False

generation_config

An optional object of the GenerationConfig class that specifies the generation configurations. If not provided, the instance's generation_config will be used.

TYPE: Optional[GenerationConfig] DEFAULT: None

RETURNS DESCRIPTION

None

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
def chat(self, tokenizer, messages: List[dict], stream=False,
         generation_config: Optional[GenerationConfig]=None):
    """
    Method:
        chat

    Description:
        This method allows for conducting a chat conversation using the BaiChuanForCausalLM model.
        It takes in the necessary input parameters and returns the response generated by the model.

    Args:
        self: The instance of the BaiChuanForCausalLM class.
        tokenizer: An object of the tokenizer class used for tokenizing the input messages.
        messages: A list of dictionaries representing the chat messages.
            Each dictionary contains the following keys:

            - 'role': The role of the message sender (e.g., 'system', 'user', 'assistant').
            - 'content': The content of the message.
        stream: A boolean value indicating whether the chat conversation should be streamed or not. Default is False.
        generation_config: An optional object of the GenerationConfig class that specifies the generation configurations.
            If not provided, the instance's generation_config will be used.

    Returns:
        None

    Raises:
        None
    """
    generation_config = generation_config or self.generation_config
    input_ids = build_chat_input(self, tokenizer, messages, generation_config.max_new_tokens)
    if stream:
        streamer = TextIterStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
        Thread(target=self.generate, kwargs={
                                                "inputs": input_ids,
                                                "streamer": streamer,
                                                "generation_config": generation_config
                                            }
        ).start()
        return streamer

    outputs = self.generate(input_ids, generation_config=generation_config)
    response = tokenizer.decode(outputs[0][len(input_ids[0]):], skip_special_tokens=True)
    return response

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuanForCausalLM.forward(input_ids=None, attention_mask=None, position_ids=None, past_key_values=None, inputs_embeds=None, labels=None, use_cache=None, output_attentions=None, output_hidden_states=None, return_dict=None)

forwards the Causal Language Model for the BaiChuan model.

PARAMETER DESCRIPTION
self

The instance of the BaiChuanForCausalLM class.

TYPE: BaiChuanForCausalLM

input_ids

The input tensor containing the token IDs. Default: None.

TYPE: Tensor DEFAULT: None

attention_mask

The attention mask tensor. Default: None.

TYPE: Optional[Tensor] DEFAULT: None

position_ids

The position IDs tensor. Default: None.

TYPE: Optional[Tensor] DEFAULT: None

past_key_values

The list of past key values tensor. Default: None.

TYPE: Optional[List[Tensor]] DEFAULT: None

inputs_embeds

The input embeddings tensor. Default: None.

TYPE: Optional[Tensor] DEFAULT: None

labels

The tensor containing the labels. Default: None.

TYPE: Optional[Tensor] DEFAULT: None

use_cache

Whether to use cache. Default: None.

TYPE: Optional[bool] DEFAULT: None

output_attentions

Whether to output attentions. Default: None.

TYPE: Optional[bool] DEFAULT: None

output_hidden_states

Whether to output hidden states. Default: None.

TYPE: Optional[bool] DEFAULT: None

return_dict

Whether to return a dictionary. Default: None.

TYPE: Optional[bool] DEFAULT: None

RETURNS DESCRIPTION
Union[Tuple, CausalLMOutputWithPast]

Union[Tuple, CausalLMOutputWithPast]: The model outputs.

  • If return_dict is False, returns a tuple containing the logits and the various model outputs.
  • If return_dict is True, returns an instance of CausalLMOutputWithPast containing the loss, logits, past key values, hidden states, and attentions.
RAISES DESCRIPTION
ValueError

If the BaiChuan model is not of type BaiChuan7bModel or BaiChuan13bModel.

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
def forward(
        self,
        input_ids: Tensor = None,
        attention_mask: Optional[Tensor] = None,
        position_ids: Optional[Tensor] = None,
        past_key_values: Optional[List[Tensor]] = None,
        inputs_embeds: Optional[Tensor] = None,
        labels: Optional[Tensor] = None,
        use_cache: Optional[bool] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
) -> Union[Tuple, CausalLMOutputWithPast]:
    """
    forwards the Causal Language Model for the BaiChuan model.

    Args:
        self (BaiChuanForCausalLM): The instance of the BaiChuanForCausalLM class.
        input_ids (Tensor, optional): The input tensor containing the token IDs. Default: None.
        attention_mask (Optional[Tensor], optional): The attention mask tensor. Default: None.
        position_ids (Optional[Tensor], optional): The position IDs tensor. Default: None.
        past_key_values (Optional[List[Tensor]], optional): The list of past key values tensor. Default: None.
        inputs_embeds (Optional[Tensor], optional): The input embeddings tensor. Default: None.
        labels (Optional[Tensor], optional): The tensor containing the labels. Default: None.
        use_cache (Optional[bool], optional): Whether to use cache. Default: None.
        output_attentions (Optional[bool], optional): Whether to output attentions. Default: None.
        output_hidden_states (Optional[bool], optional): Whether to output hidden states. Default: None.
        return_dict (Optional[bool], optional): Whether to return a dictionary. Default: None.

    Returns:
        Union[Tuple, CausalLMOutputWithPast]:
            The model outputs.

            - If `return_dict` is False, returns a tuple containing the logits and the various model outputs.
            - If `return_dict` is True, returns an instance of `CausalLMOutputWithPast` containing the loss,
            logits, past key values, hidden states, and attentions.

    Raises:
        ValueError: If the BaiChuan model is not of type BaiChuan7bModel or BaiChuan13bModel.

    """
    output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
    output_hidden_states = (
        output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
    )
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
    if isinstance(self.model, BaiChuan7bModel):
        outputs = self.model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            past_key_values=past_key_values,
            inputs_embeds=inputs_embeds,
            use_cache=use_cache,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
    elif isinstance(self.model, BaiChuan13bModel):
        outputs = self.model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            past_key_values=past_key_values,
            inputs_embeds=inputs_embeds,
            use_cache=use_cache,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
    else:
        raise ValueError('BaiChuan model only support 7b and 13b, please check your config.')

    hidden_states = outputs[0]
    logits = self.lm_head(hidden_states)

    loss = None
    if labels is not None:
        # Shift so that tokens < n predict n
        shift_logits = logits[..., :-1, :]
        shift_labels = labels[..., 1:]
        # Flatten the tokens
        shift_logits = shift_logits.view(-1, self.config.vocab_size)
        shift_labels = shift_labels.view(-1)
        # Enable model parallelism
        loss = ops.cross_entropy(shift_logits, shift_labels)

    if not return_dict:
        output = (logits,) + outputs[1:]
        return (loss,) + output if loss is not None else output

    return CausalLMOutputWithPast(
        loss=loss,
        logits=logits,
        past_key_values=outputs.past_key_values,
        hidden_states=outputs.hidden_states,
        attentions=outputs.attentions,
    )

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuanForCausalLM.get_decoder()

get_decoder

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1737
1738
1739
1740
1741
def get_decoder(self):
    """
    get_decoder
    """
    return self.model

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuanForCausalLM.get_input_embeddings()

Method to retrieve the input embeddings from the model for the BaiChuanForCausalLM class.

PARAMETER DESCRIPTION
self

The instance of the BaiChuanForCausalLM class. It is used to access the model's embed_tokens.

RETURNS DESCRIPTION
None

This method returns the input embeddings from the model to be used in the BaiChuanForCausalLM class.

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
def get_input_embeddings(self):
    """
    Method to retrieve the input embeddings from the model for the BaiChuanForCausalLM class.

    Args:
        self: The instance of the BaiChuanForCausalLM class. It is used to access the model's embed_tokens.

    Returns:
        None: This method returns the input embeddings from the model to be used in the BaiChuanForCausalLM class.

    Raises:
        None.
    """
    return self.model.embed_tokens

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuanForCausalLM.get_output_embeddings()

This method retrieves the output embeddings from the BaiChuanForCausalLM model.

PARAMETER DESCRIPTION
self

An instance of the BaiChuanForCausalLM class.

RETURNS DESCRIPTION
lm_head

The method returns the lm_head attribute which contains the output embeddings.

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
def get_output_embeddings(self):
    """
    This method retrieves the output embeddings from the BaiChuanForCausalLM model.

    Args:
        self: An instance of the BaiChuanForCausalLM class.

    Returns:
        lm_head: The method returns the lm_head attribute which contains the output embeddings.

    Raises:
        None.
    """
    return self.lm_head

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuanForCausalLM.prepare_inputs_for_generation(input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs)

This method prepares inputs for generation in the BaiChuanForCausalLM class.

PARAMETER DESCRIPTION
self

The instance of the class.

TYPE: object

input_ids

The input token IDs. Shape (batch_size, sequence_length).

TYPE: Tensor

past_key_values

Tuple of tensors containing cached key and value projection states of the model. Default is None.

TYPE: tuple DEFAULT: None

attention_mask

Mask to avoid performing attention on padding token indices. Shape (batch_size, sequence_length).

TYPE: Tensor DEFAULT: None

inputs_embeds

The embedded representation of the input tokens. Shape (batch_size, sequence_length, hidden_size).

TYPE: Tensor DEFAULT: None

RETURNS DESCRIPTION
dict

A dictionary containing model inputs for generation, including 'input_ids', 'position_ids', 'past_key_values', 'use_cache', and 'attention_mask'.

RAISES DESCRIPTION
ValueError

If attention_mask and position_ids are both provided and have mismatched shapes.

ValueError

If inputs_embeds and past_key_values are both provided.

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
def prepare_inputs_for_generation(
        self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs
):
    """
    This method prepares inputs for generation in the BaiChuanForCausalLM class.

    Args:
        self (object): The instance of the class.
        input_ids (torch.Tensor): The input token IDs. Shape (batch_size, sequence_length).
        past_key_values (tuple, optional):
            Tuple of tensors containing cached key and value projection states of the model. Default is None.
        attention_mask (torch.Tensor, optional):
            Mask to avoid performing attention on padding token indices. Shape (batch_size, sequence_length).
        inputs_embeds (torch.Tensor, optional):
            The embedded representation of the input tokens. Shape (batch_size, sequence_length, hidden_size).

    Returns:
        dict:
            A dictionary containing model inputs for generation, including 'input_ids', 'position_ids',
            'past_key_values', 'use_cache', and 'attention_mask'.

    Raises:
        ValueError: If attention_mask and position_ids are both provided and have mismatched shapes.
        ValueError: If inputs_embeds and past_key_values are both provided.
    """
    if past_key_values:
        input_ids = input_ids[:, -1:]

    position_ids = kwargs.get("position_ids", None)
    if attention_mask is not None and position_ids is None:
        # create position_ids on the fly for batch generation
        position_ids = attention_mask.long().cumsum(-1) - 1
        position_ids = position_ids.masked_fill(attention_mask == 0, 1)
        if past_key_values:
            position_ids = position_ids[:, -1].unsqueeze(-1)

    # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
    if inputs_embeds is not None and past_key_values is None:
        model_inputs = {"inputs_embeds": inputs_embeds}
    else:
        model_inputs = {"input_ids": input_ids}

    model_inputs.update(
        {
            "position_ids": position_ids,
            "past_key_values": past_key_values,
            "use_cache": kwargs.get("use_cache"),
            "attention_mask": attention_mask,
        }
    )
    return model_inputs

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuanForCausalLM.set_decoder(decoder)

set_decoder

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1731
1732
1733
1734
1735
def set_decoder(self, decoder):
    """
    set_decoder
    """
    self.model = decoder

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuanForCausalLM.set_input_embeddings(new_embeddings)

Set the input embeddings for the BaiChuanForCausalLM model.

PARAMETER DESCRIPTION
self

The instance of the BaiChuanForCausalLM class.

TYPE: BaiChuanForCausalLM

new_embeddings

The new input embeddings to be set for the model. Should be of shape (vocab_size, embedding_dim).

TYPE: Tensor

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
TypeError

If the new_embeddings parameter is not of type torch.Tensor.

ValueError

If the shape of new_embeddings does not match the expected shape (vocab_size, embedding_dim).

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
def set_input_embeddings(self, new_embeddings):
    """
    Set the input embeddings for the BaiChuanForCausalLM model.

    Args:
        self (BaiChuanForCausalLM): The instance of the BaiChuanForCausalLM class.
        new_embeddings (torch.Tensor): The new input embeddings to be set for the model. Should be of shape (vocab_size, embedding_dim).

    Returns:
        None.

    Raises:
        TypeError: If the new_embeddings parameter is not of type torch.Tensor.
        ValueError: If the shape of new_embeddings does not match the expected shape (vocab_size, embedding_dim).
    """
    self.model.embed_tokens = new_embeddings

mindnlp.transformers.models.baichuan.modeling_baichuan.BaiChuanForCausalLM.set_output_embeddings(new_embeddings)

Set the output embeddings for BaiChuanForCausalLM model.

PARAMETER DESCRIPTION
self

The instance of BaiChuanForCausalLM class.

TYPE: BaiChuanForCausalLM

new_embeddings

The new embeddings to be set as the output embeddings for the model. This can be of any type.

TYPE: Any

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/baichuan/modeling_baichuan.py
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
def set_output_embeddings(self, new_embeddings):
    """
    Set the output embeddings for BaiChuanForCausalLM model.

    Args:
        self (BaiChuanForCausalLM): The instance of BaiChuanForCausalLM class.
        new_embeddings (Any): The new embeddings to be set as the output embeddings for the model.
            This can be of any type.

    Returns:
        None.

    Raises:
        None.
    """
    self.lm_head = new_embeddings

mindnlp.transformers.models.baichuan.tokenization_baichuan.BaiChuanTokenizer

Bases: PreTrainedTokenizer

Construct a BaiChuan tokenizer. Based on byte-level Byte-Pair-Encoding.

PARAMETER DESCRIPTION
vocab_file

Path to the vocabulary file.

TYPE: `str`

Source code in mindnlp/transformers/models/baichuan/tokenization_baichuan.py
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
class BaiChuanTokenizer(PreTrainedTokenizer):
    """
    Construct a BaiChuan tokenizer. Based on byte-level Byte-Pair-Encoding.

    Args:
        vocab_file (`str`):
            Path to the vocabulary file.
    """
    vocab_files_names = VOCAB_FILES_NAMES
    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
    max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
    model_input_names = ["input_ids", "attention_mask"]

    def __init__(
        self,
        vocab_file,
        unk_token="<unk>",
        bos_token="<s>",
        eos_token="</s>",
        pad_token=None,
        sp_model_kwargs: Optional[Dict[str, Any]] = None,
        add_bos_token=True,
        add_eos_token=False,
        clean_up_tokenization_spaces=False,
        **kwargs,
    ):
        """
        __init__

        Initializes a new instance of the BaiChuanTokenizer class.

        Args:
            self: The instance of the class.
            vocab_file (str): The path to the vocabulary file.
            unk_token (str, optional): The unknown token. Defaults to '<unk>'.
            bos_token (str, optional): The beginning of sentence token. Defaults to '<s>'.
            eos_token (str, optional): The end of sentence token. Defaults to '</s>'.
            pad_token (str, optional): The padding token. Defaults to None.
            sp_model_kwargs (Optional[Dict[str, Any]], optional): Optional arguments for SentencePieceProcessor. Defaults to None.
            add_bos_token (bool, optional): Whether to add the beginning of sentence token. Defaults to True.
            add_eos_token (bool, optional): Whether to add the end of sentence token. Defaults to False.
            clean_up_tokenization_spaces (bool, optional): Whether to clean up tokenization spaces. Defaults to False.
            **kwargs: Additional keyword arguments.

        Returns:
            None.

        Raises:
            TypeError: If vocab_file is not a string.
            TypeError: If unk_token is not a string.
            TypeError: If bos_token is not a string.
            TypeError: If eos_token is not a string.
            TypeError: If pad_token is not a string.
            TypeError: If sp_model_kwargs is not a dictionary or None.
            TypeError: If add_bos_token is not a boolean.
            TypeError: If add_eos_token is not a boolean.
            TypeError: If clean_up_tokenization_spaces is not a boolean.
        """
        self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
        self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
        self.sp_model.Load(vocab_file)

        bos_token = AddedToken(bos_token, lstrip=False, rstrip=False) if isinstance(bos_token, str) else bos_token
        eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
        unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
        pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
        super().__init__(
            bos_token=bos_token,
            eos_token=eos_token,
            unk_token=unk_token,
            pad_token=pad_token,
            add_bos_token=add_bos_token,
            add_eos_token=add_eos_token,
            sp_model_kwargs=self.sp_model_kwargs,
            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
            **kwargs,
        )
        self.vocab_file = vocab_file
        self.add_bos_token = add_bos_token
        self.add_eos_token = add_eos_token

    def __getstate__(self):
        """Return a dictionary representing the state of the BaiChuanTokenizer instance.

        This method takes no additional parameters.

        Args:
            self: The instance of the BaiChuanTokenizer class.

        Returns:
            None: This method does not return any value. It modifies the state of the instance in-place.

        Raises:
            None.
        """
        state = self.__dict__.copy()
        state["sp_model"] = None
        return state

    def __setstate__(self, d):
        """
        __setstate__ method in the BaiChuanTokenizer class.

        Args:
            self (BaiChuanTokenizer): An instance of the BaiChuanTokenizer class.
                Represents the current object that the method is called on.
            d (dict): A dictionary containing the state information to be set.
                This dictionary is used to update the internal state of the object.

        Returns:
            None.

        Raises:
            TypeError: If the input parameters are not of the expected types.
            ValueError: If there is an issue with the values passed as parameters.
            AttributeError: If there are issues related to attribute access or assignment.
            RuntimeError: If there is a runtime issue during the method execution.
        """
        self.__dict__ = d
        self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
        self.sp_model.Load(self.vocab_file)

    @property
    def vocab_size(self):
        """Returns vocab size"""
        return self.sp_model.get_piece_size()

    def get_vocab(self):
        """Returns vocab as a dict"""
        vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
        vocab.update(self.added_tokens_encoder)
        return vocab

    def _tokenize(self, text):
        """Returns a tokenized string."""
        return self.sp_model.encode(text, out_type=str)

    def _convert_token_to_id(self, token):
        """Converts a token (str) in an id using the vocab."""
        return self.sp_model.piece_to_id(token)

    def _convert_id_to_token(self, index):
        """Converts an index (integer) in a token (str) using the vocab."""
        token = self.sp_model.IdToPiece(index)
        return token

    def convert_tokens_to_string(self, tokens):
        """Converts a sequence of tokens (string) in a single string."""
        current_sub_tokens = []
        out_string = ""
        prev_is_special = False
        for i, token in enumerate(tokens):
            # make sure that special tokens are not decoded using sentencepiece model
            if token in self.all_special_tokens:
                if not prev_is_special and i != 0:
                    out_string += " "
                out_string += self.sp_model.decode(current_sub_tokens) + token
                prev_is_special = True
                current_sub_tokens = []
            else:
                current_sub_tokens.append(token)
                prev_is_special = False
        out_string += self.sp_model.decode(current_sub_tokens)
        return out_string

    def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
        """
        Save the vocabulary and special tokens file to a directory.

        Args:
            save_directory (`str`):
                The directory in which to save the vocabulary.

        Returns:
            `Tuple(str)`: Paths to the files saved.
        """
        if not os.path.isdir(save_directory):
            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
            return
        out_vocab_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
        )

        if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
            copyfile(self.vocab_file, out_vocab_file)
        elif not os.path.isfile(self.vocab_file):
            with open(out_vocab_file, "wb") as fi:
                content_spiece_model = self.sp_model.serialized_model_proto()
                fi.write(content_spiece_model)

        return (out_vocab_file,)

    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
        """
        Builds input sequences with special tokens for the BaiChuanTokenizer class.

        Args:
            self (BaiChuanTokenizer): An instance of the BaiChuanTokenizer class.
            token_ids_0 (List[int]): A list of token IDs representing the first input sequence.
            token_ids_1 (List[int], optional): A list of token IDs representing the second input sequence. Defaults to None.

        Returns:
            List[int]: A list of token IDs representing the input sequences with special tokens added.

        Raises:
            None.

        This method takes in two input sequences token_ids_0 and token_ids_1 (optional) and builds input sequences with special tokens.
        If the add_bos_token parameter is True, the method adds the beginning of sequence token (bos_token) at the beginning of the sequences.
        If the add_eos_token parameter is True, the method adds the end of sequence token (eos_token) at the end of the sequences.
        The method then concatenates the special tokens with the input sequences and returns the result.

        If a second input sequence (token_ids_1) is provided, the method also adds special tokens to it and concatenates it
        with the first input sequence.

        Note:
            The bos_token_id and eos_token_id are specific token IDs for the beginning and end of sequence tokens, respectively,
            as defined in the BaiChuanTokenizer class.
        """
        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
        eos_token_id = [self.eos_token_id] if self.add_eos_token else []

        output = bos_token_id + token_ids_0 + eos_token_id

        if token_ids_1 is not None:
            output = output + bos_token_id + token_ids_1 + eos_token_id

        return output

    def get_special_tokens_mask(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
    ) -> List[int]:
        """
        Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
        special tokens using the tokenizer `prepare_for_model` method.

        Args:
            token_ids_0 (`List[int]`):
                List of IDs.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.
            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
                Whether or not the token list is already formatted with special tokens for the model.

        Returns:
            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
        """
        if already_has_special_tokens:
            return super().get_special_tokens_mask(
                token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
            )

        bos_token_id = [1] if self.add_bos_token else []
        eos_token_id = [1] if self.add_eos_token else []

        if token_ids_1 is None:
            return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
        return (
            bos_token_id
            + ([0] * len(token_ids_0))
            + eos_token_id
            + bos_token_id
            + ([0] * len(token_ids_1))
            + eos_token_id
        )

    def create_token_type_ids_from_sequences(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
        sequence pair mask has the following format:

        ```
        0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
        | first sequence    | second sequence |
        ```

        if token_ids_1 is None, only returns the first portion of the mask (0s).

        Args:
            token_ids_0 (`List[int]`):
                List of ids.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
            `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
        """
        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
        eos_token_id = [self.eos_token_id] if self.add_eos_token else []

        output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)

        if token_ids_1 is not None:
            output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)

        return output

mindnlp.transformers.models.baichuan.tokenization_baichuan.BaiChuanTokenizer.vocab_size property

Returns vocab size

mindnlp.transformers.models.baichuan.tokenization_baichuan.BaiChuanTokenizer.__getstate__()

Return a dictionary representing the state of the BaiChuanTokenizer instance.

This method takes no additional parameters.

PARAMETER DESCRIPTION
self

The instance of the BaiChuanTokenizer class.

RETURNS DESCRIPTION
None

This method does not return any value. It modifies the state of the instance in-place.

Source code in mindnlp/transformers/models/baichuan/tokenization_baichuan.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
def __getstate__(self):
    """Return a dictionary representing the state of the BaiChuanTokenizer instance.

    This method takes no additional parameters.

    Args:
        self: The instance of the BaiChuanTokenizer class.

    Returns:
        None: This method does not return any value. It modifies the state of the instance in-place.

    Raises:
        None.
    """
    state = self.__dict__.copy()
    state["sp_model"] = None
    return state

mindnlp.transformers.models.baichuan.tokenization_baichuan.BaiChuanTokenizer.__init__(vocab_file, unk_token='<unk>', bos_token='<s>', eos_token='</s>', pad_token=None, sp_model_kwargs=None, add_bos_token=True, add_eos_token=False, clean_up_tokenization_spaces=False, **kwargs)

init

Initializes a new instance of the BaiChuanTokenizer class.

PARAMETER DESCRIPTION
self

The instance of the class.

vocab_file

The path to the vocabulary file.

TYPE: str

unk_token

The unknown token. Defaults to ''.

TYPE: str DEFAULT: '<unk>'

bos_token

The beginning of sentence token. Defaults to ''.

TYPE: str DEFAULT: '<s>'

eos_token

The end of sentence token. Defaults to ''.

TYPE: str DEFAULT: '</s>'

pad_token

The padding token. Defaults to None.

TYPE: str DEFAULT: None

sp_model_kwargs

Optional arguments for SentencePieceProcessor. Defaults to None.

TYPE: Optional[Dict[str, Any]] DEFAULT: None

add_bos_token

Whether to add the beginning of sentence token. Defaults to True.

TYPE: bool DEFAULT: True

add_eos_token

Whether to add the end of sentence token. Defaults to False.

TYPE: bool DEFAULT: False

clean_up_tokenization_spaces

Whether to clean up tokenization spaces. Defaults to False.

TYPE: bool DEFAULT: False

**kwargs

Additional keyword arguments.

DEFAULT: {}

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
TypeError

If vocab_file is not a string.

TypeError

If unk_token is not a string.

TypeError

If bos_token is not a string.

TypeError

If eos_token is not a string.

TypeError

If pad_token is not a string.

TypeError

If sp_model_kwargs is not a dictionary or None.

TypeError

If add_bos_token is not a boolean.

TypeError

If add_eos_token is not a boolean.

TypeError

If clean_up_tokenization_spaces is not a boolean.

Source code in mindnlp/transformers/models/baichuan/tokenization_baichuan.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
def __init__(
    self,
    vocab_file,
    unk_token="<unk>",
    bos_token="<s>",
    eos_token="</s>",
    pad_token=None,
    sp_model_kwargs: Optional[Dict[str, Any]] = None,
    add_bos_token=True,
    add_eos_token=False,
    clean_up_tokenization_spaces=False,
    **kwargs,
):
    """
    __init__

    Initializes a new instance of the BaiChuanTokenizer class.

    Args:
        self: The instance of the class.
        vocab_file (str): The path to the vocabulary file.
        unk_token (str, optional): The unknown token. Defaults to '<unk>'.
        bos_token (str, optional): The beginning of sentence token. Defaults to '<s>'.
        eos_token (str, optional): The end of sentence token. Defaults to '</s>'.
        pad_token (str, optional): The padding token. Defaults to None.
        sp_model_kwargs (Optional[Dict[str, Any]], optional): Optional arguments for SentencePieceProcessor. Defaults to None.
        add_bos_token (bool, optional): Whether to add the beginning of sentence token. Defaults to True.
        add_eos_token (bool, optional): Whether to add the end of sentence token. Defaults to False.
        clean_up_tokenization_spaces (bool, optional): Whether to clean up tokenization spaces. Defaults to False.
        **kwargs: Additional keyword arguments.

    Returns:
        None.

    Raises:
        TypeError: If vocab_file is not a string.
        TypeError: If unk_token is not a string.
        TypeError: If bos_token is not a string.
        TypeError: If eos_token is not a string.
        TypeError: If pad_token is not a string.
        TypeError: If sp_model_kwargs is not a dictionary or None.
        TypeError: If add_bos_token is not a boolean.
        TypeError: If add_eos_token is not a boolean.
        TypeError: If clean_up_tokenization_spaces is not a boolean.
    """
    self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
    self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
    self.sp_model.Load(vocab_file)

    bos_token = AddedToken(bos_token, lstrip=False, rstrip=False) if isinstance(bos_token, str) else bos_token
    eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
    unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
    pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
    super().__init__(
        bos_token=bos_token,
        eos_token=eos_token,
        unk_token=unk_token,
        pad_token=pad_token,
        add_bos_token=add_bos_token,
        add_eos_token=add_eos_token,
        sp_model_kwargs=self.sp_model_kwargs,
        clean_up_tokenization_spaces=clean_up_tokenization_spaces,
        **kwargs,
    )
    self.vocab_file = vocab_file
    self.add_bos_token = add_bos_token
    self.add_eos_token = add_eos_token

mindnlp.transformers.models.baichuan.tokenization_baichuan.BaiChuanTokenizer.__setstate__(d)

setstate method in the BaiChuanTokenizer class.

PARAMETER DESCRIPTION
self

An instance of the BaiChuanTokenizer class. Represents the current object that the method is called on.

TYPE: BaiChuanTokenizer

d

A dictionary containing the state information to be set. This dictionary is used to update the internal state of the object.

TYPE: dict

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
TypeError

If the input parameters are not of the expected types.

ValueError

If there is an issue with the values passed as parameters.

AttributeError

If there are issues related to attribute access or assignment.

RuntimeError

If there is a runtime issue during the method execution.

Source code in mindnlp/transformers/models/baichuan/tokenization_baichuan.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
def __setstate__(self, d):
    """
    __setstate__ method in the BaiChuanTokenizer class.

    Args:
        self (BaiChuanTokenizer): An instance of the BaiChuanTokenizer class.
            Represents the current object that the method is called on.
        d (dict): A dictionary containing the state information to be set.
            This dictionary is used to update the internal state of the object.

    Returns:
        None.

    Raises:
        TypeError: If the input parameters are not of the expected types.
        ValueError: If there is an issue with the values passed as parameters.
        AttributeError: If there are issues related to attribute access or assignment.
        RuntimeError: If there is a runtime issue during the method execution.
    """
    self.__dict__ = d
    self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
    self.sp_model.Load(self.vocab_file)

mindnlp.transformers.models.baichuan.tokenization_baichuan.BaiChuanTokenizer.build_inputs_with_special_tokens(token_ids_0, token_ids_1=None)

Builds input sequences with special tokens for the BaiChuanTokenizer class.

PARAMETER DESCRIPTION
self

An instance of the BaiChuanTokenizer class.

TYPE: BaiChuanTokenizer

token_ids_0

A list of token IDs representing the first input sequence.

TYPE: List[int]

token_ids_1

A list of token IDs representing the second input sequence. Defaults to None.

TYPE: List[int] DEFAULT: None

RETURNS DESCRIPTION

List[int]: A list of token IDs representing the input sequences with special tokens added.

This method takes in two input sequences token_ids_0 and token_ids_1 (optional) and builds input sequences with special tokens. If the add_bos_token parameter is True, the method adds the beginning of sequence token (bos_token) at the beginning of the sequences. If the add_eos_token parameter is True, the method adds the end of sequence token (eos_token) at the end of the sequences. The method then concatenates the special tokens with the input sequences and returns the result.

If a second input sequence (token_ids_1) is provided, the method also adds special tokens to it and concatenates it with the first input sequence.

Note

The bos_token_id and eos_token_id are specific token IDs for the beginning and end of sequence tokens, respectively, as defined in the BaiChuanTokenizer class.

Source code in mindnlp/transformers/models/baichuan/tokenization_baichuan.py
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
    """
    Builds input sequences with special tokens for the BaiChuanTokenizer class.

    Args:
        self (BaiChuanTokenizer): An instance of the BaiChuanTokenizer class.
        token_ids_0 (List[int]): A list of token IDs representing the first input sequence.
        token_ids_1 (List[int], optional): A list of token IDs representing the second input sequence. Defaults to None.

    Returns:
        List[int]: A list of token IDs representing the input sequences with special tokens added.

    Raises:
        None.

    This method takes in two input sequences token_ids_0 and token_ids_1 (optional) and builds input sequences with special tokens.
    If the add_bos_token parameter is True, the method adds the beginning of sequence token (bos_token) at the beginning of the sequences.
    If the add_eos_token parameter is True, the method adds the end of sequence token (eos_token) at the end of the sequences.
    The method then concatenates the special tokens with the input sequences and returns the result.

    If a second input sequence (token_ids_1) is provided, the method also adds special tokens to it and concatenates it
    with the first input sequence.

    Note:
        The bos_token_id and eos_token_id are specific token IDs for the beginning and end of sequence tokens, respectively,
        as defined in the BaiChuanTokenizer class.
    """
    bos_token_id = [self.bos_token_id] if self.add_bos_token else []
    eos_token_id = [self.eos_token_id] if self.add_eos_token else []

    output = bos_token_id + token_ids_0 + eos_token_id

    if token_ids_1 is not None:
        output = output + bos_token_id + token_ids_1 + eos_token_id

    return output

mindnlp.transformers.models.baichuan.tokenization_baichuan.BaiChuanTokenizer.convert_tokens_to_string(tokens)

Converts a sequence of tokens (string) in a single string.

Source code in mindnlp/transformers/models/baichuan/tokenization_baichuan.py
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
def convert_tokens_to_string(self, tokens):
    """Converts a sequence of tokens (string) in a single string."""
    current_sub_tokens = []
    out_string = ""
    prev_is_special = False
    for i, token in enumerate(tokens):
        # make sure that special tokens are not decoded using sentencepiece model
        if token in self.all_special_tokens:
            if not prev_is_special and i != 0:
                out_string += " "
            out_string += self.sp_model.decode(current_sub_tokens) + token
            prev_is_special = True
            current_sub_tokens = []
        else:
            current_sub_tokens.append(token)
            prev_is_special = False
    out_string += self.sp_model.decode(current_sub_tokens)
    return out_string

mindnlp.transformers.models.baichuan.tokenization_baichuan.BaiChuanTokenizer.create_token_type_ids_from_sequences(token_ids_0, token_ids_1=None)

Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT sequence pair mask has the following format:

0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
| first sequence    | second sequence |

if token_ids_1 is None, only returns the first portion of the mask (0s).

PARAMETER DESCRIPTION
token_ids_0

List of ids.

TYPE: `List[int]`

token_ids_1

Optional second list of IDs for sequence pairs.

TYPE: `List[int]`, *optional* DEFAULT: None

RETURNS DESCRIPTION
List[int]

List[int]: List of token type IDs according to the given sequence(s).

Source code in mindnlp/transformers/models/baichuan/tokenization_baichuan.py
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
def create_token_type_ids_from_sequences(
    self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
    """
    Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
    sequence pair mask has the following format:

    ```
    0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
    | first sequence    | second sequence |
    ```

    if token_ids_1 is None, only returns the first portion of the mask (0s).

    Args:
        token_ids_0 (`List[int]`):
            List of ids.
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.

    Returns:
        `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
    """
    bos_token_id = [self.bos_token_id] if self.add_bos_token else []
    eos_token_id = [self.eos_token_id] if self.add_eos_token else []

    output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)

    if token_ids_1 is not None:
        output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)

    return output

mindnlp.transformers.models.baichuan.tokenization_baichuan.BaiChuanTokenizer.get_special_tokens_mask(token_ids_0, token_ids_1=None, already_has_special_tokens=False)

Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding special tokens using the tokenizer prepare_for_model method.

PARAMETER DESCRIPTION
token_ids_0

List of IDs.

TYPE: `List[int]`

token_ids_1

Optional second list of IDs for sequence pairs.

TYPE: `List[int]`, *optional* DEFAULT: None

already_has_special_tokens

Whether or not the token list is already formatted with special tokens for the model.

TYPE: `bool`, *optional*, defaults to `False` DEFAULT: False

RETURNS DESCRIPTION
List[int]

List[int]: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.

Source code in mindnlp/transformers/models/baichuan/tokenization_baichuan.py
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
def get_special_tokens_mask(
    self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
) -> List[int]:
    """
    Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
    special tokens using the tokenizer `prepare_for_model` method.

    Args:
        token_ids_0 (`List[int]`):
            List of IDs.
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.
        already_has_special_tokens (`bool`, *optional*, defaults to `False`):
            Whether or not the token list is already formatted with special tokens for the model.

    Returns:
        `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
    """
    if already_has_special_tokens:
        return super().get_special_tokens_mask(
            token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
        )

    bos_token_id = [1] if self.add_bos_token else []
    eos_token_id = [1] if self.add_eos_token else []

    if token_ids_1 is None:
        return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
    return (
        bos_token_id
        + ([0] * len(token_ids_0))
        + eos_token_id
        + bos_token_id
        + ([0] * len(token_ids_1))
        + eos_token_id
    )

mindnlp.transformers.models.baichuan.tokenization_baichuan.BaiChuanTokenizer.get_vocab()

Returns vocab as a dict

Source code in mindnlp/transformers/models/baichuan/tokenization_baichuan.py
169
170
171
172
173
def get_vocab(self):
    """Returns vocab as a dict"""
    vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
    vocab.update(self.added_tokens_encoder)
    return vocab

mindnlp.transformers.models.baichuan.tokenization_baichuan.BaiChuanTokenizer.save_vocabulary(save_directory, filename_prefix=None)

Save the vocabulary and special tokens file to a directory.

PARAMETER DESCRIPTION
save_directory

The directory in which to save the vocabulary.

TYPE: `str`

RETURNS DESCRIPTION
Tuple[str]

Tuple(str): Paths to the files saved.

Source code in mindnlp/transformers/models/baichuan/tokenization_baichuan.py
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
    """
    Save the vocabulary and special tokens file to a directory.

    Args:
        save_directory (`str`):
            The directory in which to save the vocabulary.

    Returns:
        `Tuple(str)`: Paths to the files saved.
    """
    if not os.path.isdir(save_directory):
        logger.error(f"Vocabulary path ({save_directory}) should be a directory")
        return
    out_vocab_file = os.path.join(
        save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
    )

    if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
        copyfile(self.vocab_file, out_vocab_file)
    elif not os.path.isfile(self.vocab_file):
        with open(out_vocab_file, "wb") as fi:
            content_spiece_model = self.sp_model.serialized_model_proto()
            fi.write(content_spiece_model)

    return (out_vocab_file,)