Skip to content

chatglm2

mindnlp.transformers.models.chatglm2.configuration_chatglm2.ChatGLM2Config

Bases: PretrainedConfig

ChatGLM2Config

Source code in mindnlp/transformers/models/chatglm2/configuration_chatglm2.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
class ChatGLM2Config(PretrainedConfig):
    """ChatGLM2Config"""
    model_type = "chatglm"
    def __init__(
        self,
        num_layers=28,
        padded_vocab_size=65024,
        hidden_size=4096,
        ffn_hidden_size=13696,
        kv_channels=128,
        num_attention_heads=32,
        seq_length=2048,
        hidden_dropout=0.0,
        classifier_dropout=None,
        attention_dropout=0.0,
        layernorm_epsilon=1e-5,
        rmsnorm=True,
        apply_residual_connection_post_layernorm=False,
        post_layer_norm=True,
        add_bias_linear=False,
        add_qkv_bias=False,
        bias_dropout_fusion=True,
        multi_query_attention=False,
        multi_query_group_num=1,
        apply_query_key_layer_scaling=True,
        attention_softmax_in_fp32=True,
        fp32_residual_connection=False,
        quantization_bit=0,
        pre_seq_len=None,
        prefix_projection=False,
        **kwargs
    ):
        """Initialize a ChatGLM2Config object.

        Args:
            self (ChatGLM2Config): An instance of the ChatGLM2Config class.
            num_layers (int, optional): The number of layers in the model. Defaults to 28.
            padded_vocab_size (int, optional): The size of the padded vocabulary. Defaults to 65024.
            hidden_size (int, optional): The size of the hidden layers. Defaults to 4096.
            ffn_hidden_size (int, optional): The size of the feed-forward network hidden layers. Defaults to 13696.
            kv_channels (int, optional): The number of channels in the key-value attention. Defaults to 128.
            num_attention_heads (int, optional): The number of attention heads. Defaults to 32.
            seq_length (int, optional): The maximum sequence length. Defaults to 2048.
            hidden_dropout (float, optional): The dropout probability for the hidden layers. Defaults to 0.0.
            classifier_dropout (float, optional): The dropout probability for the classifier layer. Defaults to None.
            attention_dropout (float, optional): The dropout probability for the attention layers. Defaults to 0.0.
            layernorm_epsilon (float, optional): The epsilon value for layer normalization. Defaults to 1e-05.
            rmsnorm (bool, optional): Whether to use RMSNorm for normalization. Defaults to True.
            apply_residual_connection_post_layernorm (bool, optional): Whether to apply residual connection after layer normalization. Defaults to False.
            post_layer_norm (bool, optional): Whether to apply layer normalization after each sublayer. Defaults to True.
            add_bias_linear (bool, optional): Whether to add bias to the linear layer. Defaults to False.
            add_qkv_bias (bool, optional): Whether to add bias to the query, key, and value layers. Defaults to False.
            bias_dropout_fusion (bool, optional): Whether to fuse bias dropout with linear layer. Defaults to True.
            multi_query_attention (bool, optional): Whether to use multi-query attention. Defaults to False.
            multi_query_group_num (int, optional): The number of groups for multi-query attention. Defaults to 1.
            apply_query_key_layer_scaling (bool, optional): Whether to apply scaling on query-key layer. Defaults to True.
            attention_softmax_in_fp32 (bool, optional): Whether to use FP32 for attention softmax. Defaults to True.
            fp32_residual_connection (bool, optional): Whether to use FP32 for residual connection. Defaults to False.
            quantization_bit (int, optional): The number of bits for quantization. Defaults to 0.
            pre_seq_len (int, optional): The length of the prefix sequence. Defaults to None.
            prefix_projection (bool, optional): Whether to use prefix projection. Defaults to False.

        Returns:
            None.

        Raises:
            None: This method does not raise any exceptions.
        """
        self.num_layers = num_layers
        self.vocab_size = padded_vocab_size
        self.padded_vocab_size = padded_vocab_size
        self.hidden_size = hidden_size
        self.ffn_hidden_size = ffn_hidden_size
        self.kv_channels = kv_channels
        self.num_attention_heads = num_attention_heads
        self.seq_length = seq_length
        self.hidden_dropout = hidden_dropout
        self.classifier_dropout = classifier_dropout
        self.attention_dropout = attention_dropout
        self.layernorm_epsilon = layernorm_epsilon
        self.rmsnorm = rmsnorm
        self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
        self.post_layer_norm = post_layer_norm
        self.add_bias_linear = add_bias_linear
        self.add_qkv_bias = add_qkv_bias
        self.bias_dropout_fusion = bias_dropout_fusion
        self.multi_query_attention = multi_query_attention
        self.multi_query_group_num = multi_query_group_num
        self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
        self.attention_softmax_in_fp32 = attention_softmax_in_fp32
        self.fp32_residual_connection = fp32_residual_connection
        self.quantization_bit = quantization_bit
        self.pre_seq_len = pre_seq_len
        self.prefix_projection = prefix_projection
        super().__init__(**kwargs)

mindnlp.transformers.models.chatglm2.configuration_chatglm2.ChatGLM2Config.__init__(num_layers=28, padded_vocab_size=65024, hidden_size=4096, ffn_hidden_size=13696, kv_channels=128, num_attention_heads=32, seq_length=2048, hidden_dropout=0.0, classifier_dropout=None, attention_dropout=0.0, layernorm_epsilon=1e-05, rmsnorm=True, apply_residual_connection_post_layernorm=False, post_layer_norm=True, add_bias_linear=False, add_qkv_bias=False, bias_dropout_fusion=True, multi_query_attention=False, multi_query_group_num=1, apply_query_key_layer_scaling=True, attention_softmax_in_fp32=True, fp32_residual_connection=False, quantization_bit=0, pre_seq_len=None, prefix_projection=False, **kwargs)

Initialize a ChatGLM2Config object.

PARAMETER DESCRIPTION
self

An instance of the ChatGLM2Config class.

TYPE: ChatGLM2Config

num_layers

The number of layers in the model. Defaults to 28.

TYPE: int DEFAULT: 28

padded_vocab_size

The size of the padded vocabulary. Defaults to 65024.

TYPE: int DEFAULT: 65024

hidden_size

The size of the hidden layers. Defaults to 4096.

TYPE: int DEFAULT: 4096

ffn_hidden_size

The size of the feed-forward network hidden layers. Defaults to 13696.

TYPE: int DEFAULT: 13696

kv_channels

The number of channels in the key-value attention. Defaults to 128.

TYPE: int DEFAULT: 128

num_attention_heads

The number of attention heads. Defaults to 32.

TYPE: int DEFAULT: 32

seq_length

The maximum sequence length. Defaults to 2048.

TYPE: int DEFAULT: 2048

hidden_dropout

The dropout probability for the hidden layers. Defaults to 0.0.

TYPE: float DEFAULT: 0.0

classifier_dropout

The dropout probability for the classifier layer. Defaults to None.

TYPE: float DEFAULT: None

attention_dropout

The dropout probability for the attention layers. Defaults to 0.0.

TYPE: float DEFAULT: 0.0

layernorm_epsilon

The epsilon value for layer normalization. Defaults to 1e-05.

TYPE: float DEFAULT: 1e-05

rmsnorm

Whether to use RMSNorm for normalization. Defaults to True.

TYPE: bool DEFAULT: True

apply_residual_connection_post_layernorm

Whether to apply residual connection after layer normalization. Defaults to False.

TYPE: bool DEFAULT: False

post_layer_norm

Whether to apply layer normalization after each sublayer. Defaults to True.

TYPE: bool DEFAULT: True

add_bias_linear

Whether to add bias to the linear layer. Defaults to False.

TYPE: bool DEFAULT: False

add_qkv_bias

Whether to add bias to the query, key, and value layers. Defaults to False.

TYPE: bool DEFAULT: False

bias_dropout_fusion

Whether to fuse bias dropout with linear layer. Defaults to True.

TYPE: bool DEFAULT: True

multi_query_attention

Whether to use multi-query attention. Defaults to False.

TYPE: bool DEFAULT: False

multi_query_group_num

The number of groups for multi-query attention. Defaults to 1.

TYPE: int DEFAULT: 1

apply_query_key_layer_scaling

Whether to apply scaling on query-key layer. Defaults to True.

TYPE: bool DEFAULT: True

attention_softmax_in_fp32

Whether to use FP32 for attention softmax. Defaults to True.

TYPE: bool DEFAULT: True

fp32_residual_connection

Whether to use FP32 for residual connection. Defaults to False.

TYPE: bool DEFAULT: False

quantization_bit

The number of bits for quantization. Defaults to 0.

TYPE: int DEFAULT: 0

pre_seq_len

The length of the prefix sequence. Defaults to None.

TYPE: int DEFAULT: None

prefix_projection

Whether to use prefix projection. Defaults to False.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
None

This method does not raise any exceptions.

Source code in mindnlp/transformers/models/chatglm2/configuration_chatglm2.py
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def __init__(
    self,
    num_layers=28,
    padded_vocab_size=65024,
    hidden_size=4096,
    ffn_hidden_size=13696,
    kv_channels=128,
    num_attention_heads=32,
    seq_length=2048,
    hidden_dropout=0.0,
    classifier_dropout=None,
    attention_dropout=0.0,
    layernorm_epsilon=1e-5,
    rmsnorm=True,
    apply_residual_connection_post_layernorm=False,
    post_layer_norm=True,
    add_bias_linear=False,
    add_qkv_bias=False,
    bias_dropout_fusion=True,
    multi_query_attention=False,
    multi_query_group_num=1,
    apply_query_key_layer_scaling=True,
    attention_softmax_in_fp32=True,
    fp32_residual_connection=False,
    quantization_bit=0,
    pre_seq_len=None,
    prefix_projection=False,
    **kwargs
):
    """Initialize a ChatGLM2Config object.

    Args:
        self (ChatGLM2Config): An instance of the ChatGLM2Config class.
        num_layers (int, optional): The number of layers in the model. Defaults to 28.
        padded_vocab_size (int, optional): The size of the padded vocabulary. Defaults to 65024.
        hidden_size (int, optional): The size of the hidden layers. Defaults to 4096.
        ffn_hidden_size (int, optional): The size of the feed-forward network hidden layers. Defaults to 13696.
        kv_channels (int, optional): The number of channels in the key-value attention. Defaults to 128.
        num_attention_heads (int, optional): The number of attention heads. Defaults to 32.
        seq_length (int, optional): The maximum sequence length. Defaults to 2048.
        hidden_dropout (float, optional): The dropout probability for the hidden layers. Defaults to 0.0.
        classifier_dropout (float, optional): The dropout probability for the classifier layer. Defaults to None.
        attention_dropout (float, optional): The dropout probability for the attention layers. Defaults to 0.0.
        layernorm_epsilon (float, optional): The epsilon value for layer normalization. Defaults to 1e-05.
        rmsnorm (bool, optional): Whether to use RMSNorm for normalization. Defaults to True.
        apply_residual_connection_post_layernorm (bool, optional): Whether to apply residual connection after layer normalization. Defaults to False.
        post_layer_norm (bool, optional): Whether to apply layer normalization after each sublayer. Defaults to True.
        add_bias_linear (bool, optional): Whether to add bias to the linear layer. Defaults to False.
        add_qkv_bias (bool, optional): Whether to add bias to the query, key, and value layers. Defaults to False.
        bias_dropout_fusion (bool, optional): Whether to fuse bias dropout with linear layer. Defaults to True.
        multi_query_attention (bool, optional): Whether to use multi-query attention. Defaults to False.
        multi_query_group_num (int, optional): The number of groups for multi-query attention. Defaults to 1.
        apply_query_key_layer_scaling (bool, optional): Whether to apply scaling on query-key layer. Defaults to True.
        attention_softmax_in_fp32 (bool, optional): Whether to use FP32 for attention softmax. Defaults to True.
        fp32_residual_connection (bool, optional): Whether to use FP32 for residual connection. Defaults to False.
        quantization_bit (int, optional): The number of bits for quantization. Defaults to 0.
        pre_seq_len (int, optional): The length of the prefix sequence. Defaults to None.
        prefix_projection (bool, optional): Whether to use prefix projection. Defaults to False.

    Returns:
        None.

    Raises:
        None: This method does not raise any exceptions.
    """
    self.num_layers = num_layers
    self.vocab_size = padded_vocab_size
    self.padded_vocab_size = padded_vocab_size
    self.hidden_size = hidden_size
    self.ffn_hidden_size = ffn_hidden_size
    self.kv_channels = kv_channels
    self.num_attention_heads = num_attention_heads
    self.seq_length = seq_length
    self.hidden_dropout = hidden_dropout
    self.classifier_dropout = classifier_dropout
    self.attention_dropout = attention_dropout
    self.layernorm_epsilon = layernorm_epsilon
    self.rmsnorm = rmsnorm
    self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
    self.post_layer_norm = post_layer_norm
    self.add_bias_linear = add_bias_linear
    self.add_qkv_bias = add_qkv_bias
    self.bias_dropout_fusion = bias_dropout_fusion
    self.multi_query_attention = multi_query_attention
    self.multi_query_group_num = multi_query_group_num
    self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
    self.attention_softmax_in_fp32 = attention_softmax_in_fp32
    self.fp32_residual_connection = fp32_residual_connection
    self.quantization_bit = quantization_bit
    self.pre_seq_len = pre_seq_len
    self.prefix_projection = prefix_projection
    super().__init__(**kwargs)

mindnlp.transformers.models.chatglm2.modeling_chatglm2.CHATGLM2_6B_PRETRAINED_MODEL_ARCHIVE_LIST = ['THUDM/chatglm2-6b'] module-attribute

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2Model

Bases: ChatGLM2PreTrainedModel

This class represents the ChatGLM2Model, which is used for natural language processing tasks. It inherits from the ChatGLM2PreTrainedModel and contains methods for initializing the model, getting input embeddings, getting prompts, forwarding the model, and quantizing the model's weights. The class contains attributes for embedding, number of layers, multi-query group number, key-value channels, sequence length, rotary position embedding, encoder, output layer, prefix sequence length, prefix projection, prefix tokens, prefix encoder, and dropout. The methods included are init, get_input_embeddings, get_prompt, forward, and quantize.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
class ChatGLM2Model(ChatGLM2PreTrainedModel):

    """
    This class represents the ChatGLM2Model, which is used for natural language processing tasks.
    It inherits from the ChatGLM2PreTrainedModel and contains methods for initializing the model, getting input
    embeddings, getting prompts, forwarding the model, and quantizing the model's weights.
    The class contains attributes for embedding, number of layers, multi-query group number, key-value channels,
    sequence length, rotary position embedding, encoder, output layer, prefix sequence length, prefix projection,
    prefix tokens, prefix encoder, and dropout.
    The methods included are __init__, get_input_embeddings, get_prompt, forward, and quantize.
    """
    def __init__(self, config: ChatGLM2Config, empty_init=True):
        """
        This method initializes an instance of the ChatGLM2Model class.

        Args:
            self: The instance of the ChatGLM2Model class.
            config (ChatGLM2Config): An instance of the ChatGLM2Config class containing configuration parameters for the model.
            empty_init (bool): A flag indicating whether to perform an empty initialization.
                If True, the initialization method is set to zero_init; otherwise, it is set to default_init.

        Returns:
            None.

        Raises:
            None
        """
        super().__init__(config)
        if empty_init:
            init_method = zero_init
        else:
            init_method = default_init
        init_kwargs = {}
        self.embedding = init_method(Embedding, config, **init_kwargs)
        self.num_layers = config.num_layers
        self.multi_query_group_num = config.multi_query_group_num
        self.kv_channels = config.kv_channels

        # Rotary positional embeddings
        self.seq_length = config.seq_length
        rotary_dim = (
            config.hidden_size // config.num_attention_heads if config.kv_channels is None else config.kv_channels
        )

        self.rotary_pos_emb = RotaryEmbedding(rotary_dim // 2, original_impl=config.original_rope,
                                              dtype=config.ms_dtype)
        self.encoder = init_method(GLMTransformer, config, **init_kwargs)
        self.output_layer = init_method(nn.Linear, config.hidden_size, config.padded_vocab_size, bias=False,
                                        dtype=config.ms_dtype, **init_kwargs)
        self.pre_seq_len = config.pre_seq_len
        self.prefix_projection = config.prefix_projection
        if self.pre_seq_len is not None:
            for param in self.parameters():
                param.requires_grad = False
            self.prefix_tokens = ops.arange(self.pre_seq_len).long()
            self.prefix_encoder = PrefixEncoder(config)
            self.dropout = nn.Dropout(p=0.1)

    def get_input_embeddings(self):
        """
        Retrieves the input embeddings for the ChatGLM2Model.

        Args:
            self (ChatGLM2Model): The instance of the ChatGLM2Model class.

        Returns:
            None.

        Raises:
            None.
        """
        return self.embedding.word_embeddings

    def get_prompt(self, batch_size, dtype=mindspore.float16):
        """
        Retrieves the prompt for the ChatGLM2Model.

        Args:
            self (ChatGLM2Model): The instance of the ChatGLM2Model class.
            batch_size (int): The number of sequences in a batch.
            dtype (mindspore.dtype, optional): The data type of the returned prompt. Defaults to mindspore.float16.

        Returns:
            None.

        Raises:
            None.
        """
        prefix_tokens = self.prefix_tokens.unsqueeze(0).expand(batch_size, -1)
        past_key_values = self.prefix_encoder(prefix_tokens).astype(dtype)
        past_key_values = past_key_values.view(
            batch_size,
            self.pre_seq_len,
            self.num_layers * 2,
            self.multi_query_group_num,
            self.kv_channels
        )
        # seq_len, b, nh, hidden_size
        past_key_values = self.dropout(past_key_values)
        past_key_values = past_key_values.permute([2, 1, 0, 3, 4]).split(2)
        return past_key_values

    def forward(
            self,
            input_ids,
            position_ids: Optional[mindspore.Tensor] = None,
            attention_mask: Optional[mindspore.Tensor] = None,
            full_attention_mask: Optional[mindspore.Tensor] = None,
            past_key_values: Optional[Tuple[Tuple[mindspore.Tensor, mindspore.Tensor], ...]] = None,
            inputs_embeds: Optional[mindspore.Tensor] = None,
            use_cache: Optional[bool] = None,
            output_hidden_states: Optional[bool] = None,
            return_dict: Optional[bool] = None,
    ):
        """
        Constructs the ChatGLM2Model.

        Args:
            self: The object instance.
            input_ids (mindspore.Tensor): The input token IDs of shape (batch_size, seq_length).
            position_ids (Optional[mindspore.Tensor]): The position IDs tensor. Default is None.
            attention_mask (Optional[mindspore.Tensor]): The attention mask tensor. Default is None.
            full_attention_mask (Optional[mindspore.Tensor]): The full attention mask tensor. Default is None.
            past_key_values (Optional[Tuple[Tuple[mindspore.Tensor, mindspore.Tensor], ...]]):
                The past key values. Default is None.
            inputs_embeds (Optional[mindspore.Tensor]): The embedded inputs tensor. Default is None.
            use_cache (Optional[bool]): Flag to use cache. Default is None.
            output_hidden_states (Optional[bool]): Flag to output hidden states. Default is None.
            return_dict (Optional[bool]): Flag to return a dictionary. Default is None.

        Returns:
            None.

        Raises:
            None.
        """
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
        )
        use_cache = use_cache if use_cache is not None else self.config.use_cache
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        batch_size, seq_length = input_ids.shape

        if inputs_embeds is None:
            inputs_embeds = self.embedding(input_ids)

        if self.pre_seq_len is not None:
            if past_key_values is None:
                past_key_values = self.get_prompt(batch_size=batch_size,
                                                  dtype=inputs_embeds.dtype)
            if attention_mask is not None:
                attention_mask = ops.cat([attention_mask.new_ones((batch_size, self.pre_seq_len), dtype=attention_mask.dtype),
                                            attention_mask], axis=-1)

        if full_attention_mask is None:
            if (attention_mask is not None and not attention_mask.all()) or (past_key_values and seq_length != 1):
                full_attention_mask = self.get_masks(input_ids, past_key_values, padding_mask=attention_mask)

        # Rotary positional embeddings
        rotary_pos_emb = self.rotary_pos_emb(self.seq_length)
        if position_ids is not None:
            rotary_pos_emb = rotary_pos_emb[position_ids]
        else:
            rotary_pos_emb = rotary_pos_emb[None, :seq_length]
        rotary_pos_emb = rotary_pos_emb.swapaxes(0, 1)

        # Run encoder.
        hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
            inputs_embeds, full_attention_mask, rotary_pos_emb=rotary_pos_emb,
            kv_caches=past_key_values, use_cache=use_cache, output_hidden_states=output_hidden_states
        )

        if not return_dict:
            return tuple(v for v in [hidden_states, presents, all_hidden_states, all_self_attentions] if v is not None)

        return BaseModelOutputWithPast(
            last_hidden_state=hidden_states,
            past_key_values=presents,
            hidden_states=all_hidden_states,
            attentions=all_self_attentions,
        )

    def quantize(self, weight_bit_width: int):
        """Quantize the weights of the ChatGLM2Model.

        This method quantizes the weights of the ChatGLM2Model object according to the specified weight bit width.

        Args:
            self (ChatGLM2Model): The ChatGLM2Model object to be quantized.
            weight_bit_width (int): The number of bits to be used for quantizing the weights.
                This value determines the precision of the quantization. Valid values are positive integers.

        Returns:
            None.

        Raises:
            None.
        """

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2Model.__init__(config, empty_init=True)

This method initializes an instance of the ChatGLM2Model class.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM2Model class.

config

An instance of the ChatGLM2Config class containing configuration parameters for the model.

TYPE: ChatGLM2Config

empty_init

A flag indicating whether to perform an empty initialization. If True, the initialization method is set to zero_init; otherwise, it is set to default_init.

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
def __init__(self, config: ChatGLM2Config, empty_init=True):
    """
    This method initializes an instance of the ChatGLM2Model class.

    Args:
        self: The instance of the ChatGLM2Model class.
        config (ChatGLM2Config): An instance of the ChatGLM2Config class containing configuration parameters for the model.
        empty_init (bool): A flag indicating whether to perform an empty initialization.
            If True, the initialization method is set to zero_init; otherwise, it is set to default_init.

    Returns:
        None.

    Raises:
        None
    """
    super().__init__(config)
    if empty_init:
        init_method = zero_init
    else:
        init_method = default_init
    init_kwargs = {}
    self.embedding = init_method(Embedding, config, **init_kwargs)
    self.num_layers = config.num_layers
    self.multi_query_group_num = config.multi_query_group_num
    self.kv_channels = config.kv_channels

    # Rotary positional embeddings
    self.seq_length = config.seq_length
    rotary_dim = (
        config.hidden_size // config.num_attention_heads if config.kv_channels is None else config.kv_channels
    )

    self.rotary_pos_emb = RotaryEmbedding(rotary_dim // 2, original_impl=config.original_rope,
                                          dtype=config.ms_dtype)
    self.encoder = init_method(GLMTransformer, config, **init_kwargs)
    self.output_layer = init_method(nn.Linear, config.hidden_size, config.padded_vocab_size, bias=False,
                                    dtype=config.ms_dtype, **init_kwargs)
    self.pre_seq_len = config.pre_seq_len
    self.prefix_projection = config.prefix_projection
    if self.pre_seq_len is not None:
        for param in self.parameters():
            param.requires_grad = False
        self.prefix_tokens = ops.arange(self.pre_seq_len).long()
        self.prefix_encoder = PrefixEncoder(config)
        self.dropout = nn.Dropout(p=0.1)

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2Model.forward(input_ids, position_ids=None, attention_mask=None, full_attention_mask=None, past_key_values=None, inputs_embeds=None, use_cache=None, output_hidden_states=None, return_dict=None)

Constructs the ChatGLM2Model.

PARAMETER DESCRIPTION
self

The object instance.

input_ids

The input token IDs of shape (batch_size, seq_length).

TYPE: Tensor

position_ids

The position IDs tensor. Default is None.

TYPE: Optional[Tensor] DEFAULT: None

attention_mask

The attention mask tensor. Default is None.

TYPE: Optional[Tensor] DEFAULT: None

full_attention_mask

The full attention mask tensor. Default is None.

TYPE: Optional[Tensor] DEFAULT: None

past_key_values

The past key values. Default is None.

TYPE: Optional[Tuple[Tuple[Tensor, Tensor], ...]] DEFAULT: None

inputs_embeds

The embedded inputs tensor. Default is None.

TYPE: Optional[Tensor] DEFAULT: None

use_cache

Flag to use cache. Default is None.

TYPE: Optional[bool] DEFAULT: None

output_hidden_states

Flag to output hidden states. Default is None.

TYPE: Optional[bool] DEFAULT: None

return_dict

Flag to return a dictionary. Default is None.

TYPE: Optional[bool] DEFAULT: None

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
def forward(
        self,
        input_ids,
        position_ids: Optional[mindspore.Tensor] = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        full_attention_mask: Optional[mindspore.Tensor] = None,
        past_key_values: Optional[Tuple[Tuple[mindspore.Tensor, mindspore.Tensor], ...]] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        use_cache: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
):
    """
    Constructs the ChatGLM2Model.

    Args:
        self: The object instance.
        input_ids (mindspore.Tensor): The input token IDs of shape (batch_size, seq_length).
        position_ids (Optional[mindspore.Tensor]): The position IDs tensor. Default is None.
        attention_mask (Optional[mindspore.Tensor]): The attention mask tensor. Default is None.
        full_attention_mask (Optional[mindspore.Tensor]): The full attention mask tensor. Default is None.
        past_key_values (Optional[Tuple[Tuple[mindspore.Tensor, mindspore.Tensor], ...]]):
            The past key values. Default is None.
        inputs_embeds (Optional[mindspore.Tensor]): The embedded inputs tensor. Default is None.
        use_cache (Optional[bool]): Flag to use cache. Default is None.
        output_hidden_states (Optional[bool]): Flag to output hidden states. Default is None.
        return_dict (Optional[bool]): Flag to return a dictionary. Default is None.

    Returns:
        None.

    Raises:
        None.
    """
    output_hidden_states = (
        output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
    )
    use_cache = use_cache if use_cache is not None else self.config.use_cache
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    batch_size, seq_length = input_ids.shape

    if inputs_embeds is None:
        inputs_embeds = self.embedding(input_ids)

    if self.pre_seq_len is not None:
        if past_key_values is None:
            past_key_values = self.get_prompt(batch_size=batch_size,
                                              dtype=inputs_embeds.dtype)
        if attention_mask is not None:
            attention_mask = ops.cat([attention_mask.new_ones((batch_size, self.pre_seq_len), dtype=attention_mask.dtype),
                                        attention_mask], axis=-1)

    if full_attention_mask is None:
        if (attention_mask is not None and not attention_mask.all()) or (past_key_values and seq_length != 1):
            full_attention_mask = self.get_masks(input_ids, past_key_values, padding_mask=attention_mask)

    # Rotary positional embeddings
    rotary_pos_emb = self.rotary_pos_emb(self.seq_length)
    if position_ids is not None:
        rotary_pos_emb = rotary_pos_emb[position_ids]
    else:
        rotary_pos_emb = rotary_pos_emb[None, :seq_length]
    rotary_pos_emb = rotary_pos_emb.swapaxes(0, 1)

    # Run encoder.
    hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
        inputs_embeds, full_attention_mask, rotary_pos_emb=rotary_pos_emb,
        kv_caches=past_key_values, use_cache=use_cache, output_hidden_states=output_hidden_states
    )

    if not return_dict:
        return tuple(v for v in [hidden_states, presents, all_hidden_states, all_self_attentions] if v is not None)

    return BaseModelOutputWithPast(
        last_hidden_state=hidden_states,
        past_key_values=presents,
        hidden_states=all_hidden_states,
        attentions=all_self_attentions,
    )

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2Model.get_input_embeddings()

Retrieves the input embeddings for the ChatGLM2Model.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM2Model class.

TYPE: ChatGLM2Model

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
def get_input_embeddings(self):
    """
    Retrieves the input embeddings for the ChatGLM2Model.

    Args:
        self (ChatGLM2Model): The instance of the ChatGLM2Model class.

    Returns:
        None.

    Raises:
        None.
    """
    return self.embedding.word_embeddings

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2Model.get_prompt(batch_size, dtype=mindspore.float16)

Retrieves the prompt for the ChatGLM2Model.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM2Model class.

TYPE: ChatGLM2Model

batch_size

The number of sequences in a batch.

TYPE: int

dtype

The data type of the returned prompt. Defaults to mindspore.float16.

TYPE: dtype DEFAULT: float16

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
def get_prompt(self, batch_size, dtype=mindspore.float16):
    """
    Retrieves the prompt for the ChatGLM2Model.

    Args:
        self (ChatGLM2Model): The instance of the ChatGLM2Model class.
        batch_size (int): The number of sequences in a batch.
        dtype (mindspore.dtype, optional): The data type of the returned prompt. Defaults to mindspore.float16.

    Returns:
        None.

    Raises:
        None.
    """
    prefix_tokens = self.prefix_tokens.unsqueeze(0).expand(batch_size, -1)
    past_key_values = self.prefix_encoder(prefix_tokens).astype(dtype)
    past_key_values = past_key_values.view(
        batch_size,
        self.pre_seq_len,
        self.num_layers * 2,
        self.multi_query_group_num,
        self.kv_channels
    )
    # seq_len, b, nh, hidden_size
    past_key_values = self.dropout(past_key_values)
    past_key_values = past_key_values.permute([2, 1, 0, 3, 4]).split(2)
    return past_key_values

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2Model.quantize(weight_bit_width)

Quantize the weights of the ChatGLM2Model.

This method quantizes the weights of the ChatGLM2Model object according to the specified weight bit width.

PARAMETER DESCRIPTION
self

The ChatGLM2Model object to be quantized.

TYPE: ChatGLM2Model

weight_bit_width

The number of bits to be used for quantizing the weights. This value determines the precision of the quantization. Valid values are positive integers.

TYPE: int

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
def quantize(self, weight_bit_width: int):
    """Quantize the weights of the ChatGLM2Model.

    This method quantizes the weights of the ChatGLM2Model object according to the specified weight bit width.

    Args:
        self (ChatGLM2Model): The ChatGLM2Model object to be quantized.
        weight_bit_width (int): The number of bits to be used for quantizing the weights.
            This value determines the precision of the quantization. Valid values are positive integers.

    Returns:
        None.

    Raises:
        None.
    """

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2PreTrainedModel

Bases: PreTrainedModel

An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained models.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
class ChatGLM2PreTrainedModel(PreTrainedModel):
    """
    An abstract class to handle weights initialization and
    a simple interface for downloading and loading pretrained models.
    """
    is_parallelizable = False
    config_class = ChatGLM2Config
    base_model_prefix = "transformer"
    _no_split_modules = ["GLMBlock"]

    def _init_weights(self, cell):
        """Initialize the weights."""
        return

    def get_masks(self, input_ids, past_key_values, padding_mask=None):
        '''
            This method calculates the attention masks for the input sequence in the context of the ChatGLM2PreTrainedModel class.

            Args:
                self (ChatGLM2PreTrainedModel): The instance of the ChatGLM2PreTrainedModel class.
                input_ids (torch.Tensor): The input sequence tensor of shape (batch_size, seq_length).
                past_key_values (tuple of torch.Tensor): The past key-value pairs for attention weights of shape
                    (past_length, batch_size, num_heads, past_seq_length, embed_dim).
                padding_mask (torch.Tensor, optional): The tensor indicating the positions of padding tokens in the input sequence.
                    It has the shape (batch_size, seq_length) and contains 0's for non-padding tokens and 1's for padding tokens.
                    Defaults to None.

            Returns:
                torch.Tensor: The attention mask tensor of shape (batch_size, 1, seq_length, seq_length).

            Raises:
                None.
        '''
        batch_size, seq_length = input_ids.shape
        full_attention_mask = ops.ones(batch_size, seq_length, seq_length)
        full_attention_mask = full_attention_mask.tril()
        past_length = 0
        if past_key_values:
            past_length = past_key_values[0][0].shape[0]
        if past_length:
            full_attention_mask = ops.cat((ops.ones(batch_size, seq_length, past_length), full_attention_mask), axis=-1)
        if padding_mask is not None:
            full_attention_mask = full_attention_mask * padding_mask.unsqueeze(1)
        if not past_length and padding_mask is not None:
            full_attention_mask -= padding_mask.unsqueeze(-1) - 1
        full_attention_mask = (full_attention_mask < 0.5).bool()
        full_attention_mask = full_attention_mask.unsqueeze(1)
        return full_attention_mask

    def get_position_ids(self, input_ids):
        """
        Returns the position IDs corresponding to input IDs.

        Args:
            self (ChatGLM2PreTrainedModel): The instance of the ChatGLM2PreTrainedModel class.
            input_ids (ndarray): A 2-dimensional array of shape (batch_size, seq_length) containing input IDs.

        Returns:
            ndarray: A 2-dimensional array of shape (batch_size, seq_length) containing position IDs corresponding to input IDs.

        Raises:
            None.

        """
        batch_size, seq_length = input_ids.shape
        position_ids = ops.arange(seq_length, dtype=mindspore.int64).unsqueeze(0).repeat(batch_size, 1)
        return position_ids

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2PreTrainedModel.get_masks(input_ids, past_key_values, padding_mask=None)

This method calculates the attention masks for the input sequence in the context of the ChatGLM2PreTrainedModel class.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM2PreTrainedModel class.

TYPE: ChatGLM2PreTrainedModel

input_ids

The input sequence tensor of shape (batch_size, seq_length).

TYPE: Tensor

past_key_values

The past key-value pairs for attention weights of shape (past_length, batch_size, num_heads, past_seq_length, embed_dim).

TYPE: tuple of torch.Tensor

padding_mask

The tensor indicating the positions of padding tokens in the input sequence. It has the shape (batch_size, seq_length) and contains 0's for non-padding tokens and 1's for padding tokens. Defaults to None.

TYPE: Tensor DEFAULT: None

RETURNS DESCRIPTION

torch.Tensor: The attention mask tensor of shape (batch_size, 1, seq_length, seq_length).

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
def get_masks(self, input_ids, past_key_values, padding_mask=None):
    '''
        This method calculates the attention masks for the input sequence in the context of the ChatGLM2PreTrainedModel class.

        Args:
            self (ChatGLM2PreTrainedModel): The instance of the ChatGLM2PreTrainedModel class.
            input_ids (torch.Tensor): The input sequence tensor of shape (batch_size, seq_length).
            past_key_values (tuple of torch.Tensor): The past key-value pairs for attention weights of shape
                (past_length, batch_size, num_heads, past_seq_length, embed_dim).
            padding_mask (torch.Tensor, optional): The tensor indicating the positions of padding tokens in the input sequence.
                It has the shape (batch_size, seq_length) and contains 0's for non-padding tokens and 1's for padding tokens.
                Defaults to None.

        Returns:
            torch.Tensor: The attention mask tensor of shape (batch_size, 1, seq_length, seq_length).

        Raises:
            None.
    '''
    batch_size, seq_length = input_ids.shape
    full_attention_mask = ops.ones(batch_size, seq_length, seq_length)
    full_attention_mask = full_attention_mask.tril()
    past_length = 0
    if past_key_values:
        past_length = past_key_values[0][0].shape[0]
    if past_length:
        full_attention_mask = ops.cat((ops.ones(batch_size, seq_length, past_length), full_attention_mask), axis=-1)
    if padding_mask is not None:
        full_attention_mask = full_attention_mask * padding_mask.unsqueeze(1)
    if not past_length and padding_mask is not None:
        full_attention_mask -= padding_mask.unsqueeze(-1) - 1
    full_attention_mask = (full_attention_mask < 0.5).bool()
    full_attention_mask = full_attention_mask.unsqueeze(1)
    return full_attention_mask

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2PreTrainedModel.get_position_ids(input_ids)

Returns the position IDs corresponding to input IDs.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM2PreTrainedModel class.

TYPE: ChatGLM2PreTrainedModel

input_ids

A 2-dimensional array of shape (batch_size, seq_length) containing input IDs.

TYPE: ndarray

RETURNS DESCRIPTION
ndarray

A 2-dimensional array of shape (batch_size, seq_length) containing position IDs corresponding to input IDs.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
def get_position_ids(self, input_ids):
    """
    Returns the position IDs corresponding to input IDs.

    Args:
        self (ChatGLM2PreTrainedModel): The instance of the ChatGLM2PreTrainedModel class.
        input_ids (ndarray): A 2-dimensional array of shape (batch_size, seq_length) containing input IDs.

    Returns:
        ndarray: A 2-dimensional array of shape (batch_size, seq_length) containing position IDs corresponding to input IDs.

    Raises:
        None.

    """
    batch_size, seq_length = input_ids.shape
    position_ids = ops.arange(seq_length, dtype=mindspore.int64).unsqueeze(0).repeat(batch_size, 1)
    return position_ids

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2ForConditionalGeneration

Bases: ChatGLM2PreTrainedModel

A Python class representing a conditional generation model for chat-based tasks using ChatGLM2.

This class inherits from ChatGLM2PreTrainedModel and includes methods to initialize the model, update model keyword arguments for generation, prepare inputs for generation, forward the model, reorder cache, process response, build inputs, build stream inputs, chat, stream chat, stream generate, and quantize the model.

The methods in this class enable the generation of responses for chat-based queries, handling of input data, and model quantization for improved efficiency.

For detailed information on the methods and their parameters, please refer to the method docstrings within the class implementation.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
class ChatGLM2ForConditionalGeneration(ChatGLM2PreTrainedModel):

    """A Python class representing a conditional generation model for chat-based tasks using ChatGLM2.

    This class inherits from ChatGLM2PreTrainedModel and includes methods to initialize the model, update model keyword
    arguments for generation, prepare inputs for generation, forward the model, reorder cache, process response,
    build inputs, build stream inputs, chat, stream chat, stream generate, and quantize the model.

    The methods in this class enable the generation of responses for chat-based queries, handling of input data, and
    model quantization for improved efficiency.

    For detailed information on the methods and their parameters, please refer to the method docstrings within the class implementation.
    """
    def __init__(self, config: ChatGLM2Config, empty_init=True):
        """
        Initializes an instance of the ChatGLM2ForConditionalGeneration class.

        Args:
            self: The instance of the class.
            config (ChatGLM2Config): An object of type ChatGLM2Config which provides configuration settings for the model.
            empty_init (bool, optional): Indicates whether to initialize the ChatGLM2Model with empty weights. Defaults to True.

        Returns:
            None

        Raises:
            None
        """
        super().__init__(config)

        self.max_sequence_length = config.max_length
        self.transformer = ChatGLM2Model(config, empty_init=empty_init)
        self.config = config
        self.quantized = False

        if self.config.quantization_bit:
            self.quantize(self.config.quantization_bit, empty_init=True)

    def _update_model_kwargs_for_generation(
            self,
            outputs: ModelOutput,
            model_kwargs: Dict[str, Any],
            is_encoder_decoder: bool = False,
            standardize_cache_format: bool = False,
    ) -> Dict[str, Any]:
        '''
        Updates the model keyword arguments for generation in the `ChatGLM2ForConditionalGeneration` class.

        Args:
            self (ChatGLM2ForConditionalGeneration): The instance of the ChatGLM2ForConditionalGeneration class.
            outputs (ModelOutput): The output of the model.
            model_kwargs (Dict[str, Any]): The dictionary containing the model keyword arguments.
            is_encoder_decoder (bool, optional): Indicates if the model is an encoder-decoder model. Defaults to False.
            standardize_cache_format (bool, optional): Indicates if the cache format should be standardized. Defaults to False.

        Returns:
            Dict[str, Any]: The updated model keyword arguments.

        Raises:
            None.
        '''
        # update past_key_values
        model_kwargs["past_key_values"] = self._extract_past_from_model_output(
            outputs, standardize_cache_format=standardize_cache_format
        )

        # update attention mask
        if "attention_mask" in model_kwargs:
            attention_mask = model_kwargs["attention_mask"]
            model_kwargs["attention_mask"] = ops.cat(
                [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1), dtype=attention_mask.dtype)], axis=-1
            )

        # update position ids
        if "position_ids" in model_kwargs:
            position_ids = model_kwargs["position_ids"]
            new_position_id = position_ids[..., -1:].copy()
            new_position_id += 1
            model_kwargs["position_ids"] = ops.cat(
                [position_ids, new_position_id], axis=-1
            )

        model_kwargs["is_first_forward"] = False
        return model_kwargs

    def prepare_inputs_for_generation(
            self,
            input_ids: mindspore.Tensor,
            past_key_values: Optional[mindspore.Tensor] = None,
            attention_mask: Optional[mindspore.Tensor] = None,
            position_ids: Optional[mindspore.Tensor] = None,
            use_cache: Optional[bool] = None,
            is_first_forward: bool = True,
            **kwargs
    ) -> dict:
        """
        Prepares input tensors for generation during ChatGLM2ForConditionalGeneration model training.

        Args:
            self (ChatGLM2ForConditionalGeneration): The instance of the ChatGLM2ForConditionalGeneration class.
            input_ids (mindspore.Tensor): The input tensor of shape (batch_size, seq_length) containing the input sequence indices.
            past_key_values (Optional[mindspore.Tensor]): Optional past key values tensor of shape
                (batch_size, num_heads, past_seq_length, hidden_size_per_head) used for generation in accordance with GPT-2.
            attention_mask (Optional[mindspore.Tensor]): Optional attention mask tensor of shape
                (batch_size, seq_length) used for masking out padded tokens.
            position_ids (Optional[mindspore.Tensor]): Optional position ids tensor of shape
                (batch_size, seq_length) used for generation in accordance with GPT-2.
            use_cache (Optional[bool]): Optional flag indicating whether to use cache during generation.
            is_first_forward (bool): Flag indicating whether it is the first forward pass.

        Returns:
            dict:
                A dictionary containing input tensors for generation:

                - input_ids (mindspore.Tensor): The input tensor of shape (batch_size, seq_length) containing the input sequence indices.
                - past_key_values (Optional[mindspore.Tensor]): Optional past key values tensor of shape
                (batch_size, num_heads, past_seq_length, hidden_size_per_head) used for generation in accordance with  GPT-2.
                - position_ids (mindspore.Tensor): The position ids tensor of shape (batch_size, seq_length) used for generation in accordance with GPT-2.
                - attention_mask (Optional[mindspore.Tensor]): Optional attention mask tensor of shape (batch_size, seq_length) used for masking out padded tokens.
                - return_last_logit (bool): Flag indicating whether to return the last logit during generation.
                - use_cache (Optional[bool]): Optional flag indicating whether to use cache during generation.

        Raises:
            None.
        """
        # only last token for input_ids if past is not None
        if position_ids is None:
            position_ids = self.get_position_ids(input_ids)
        if not is_first_forward:
            if past_key_values is not None:
                position_ids = position_ids[..., -1:]
                input_ids = input_ids[:, -1:]
        return {
            "input_ids": input_ids,
            "past_key_values": past_key_values,
            "position_ids": position_ids,
            "attention_mask": attention_mask,
            "return_last_logit": True,
            "use_cache": use_cache
        }

    def forward(
            self,
            input_ids: Optional[mindspore.Tensor] = None,
            position_ids: Optional[mindspore.Tensor] = None,
            attention_mask: Optional[mindspore.Tensor] = None,
            past_key_values: Optional[Tuple[mindspore.Tensor]] = None,
            inputs_embeds: Optional[mindspore.Tensor] = None,
            labels: Optional[mindspore.Tensor] = None,
            use_cache: Optional[bool] = None,
            output_attentions: Optional[bool] = None,
            output_hidden_states: Optional[bool] = None,
            return_dict: Optional[bool] = None,
            return_last_logit: Optional[bool] = False,
    ):
        '''
        Constructs a ChatGLM2ForConditionalGeneration object.

        Args:
            self (ChatGLM2ForConditionalGeneration): The instance of the class.
            input_ids (Optional[mindspore.Tensor]):
                The input tensor of shape [batch_size, sequence_length] representing the tokenized input sequences.
                Default is None.
            position_ids (Optional[mindspore.Tensor]):
                The input tensor of shape [batch_size, sequence_length] representing the position indices of the input tokens.
                Default is None.
            attention_mask (Optional[mindspore.Tensor]):
                The input tensor of shape [batch_size, sequence_length] representing the attention mask to avoid
                performing attention on padding tokens. Default is None.
            past_key_values (Optional[Tuple[mindspore.Tensor]]):
                The optional tuple of tensors that contains pre-computed key and value tensors for fast decoding.
                Default is None.
            inputs_embeds (Optional[mindspore.Tensor]):
                The input tensor of shape [batch_size, sequence_length, hidden_size] representing the embedded inputs.
                Default is None.
            labels (Optional[mindspore.Tensor]):
                The input tensor of shape [batch_size, sequence_length] representing the labels. Default is None.
            use_cache (Optional[bool]): Whether to use caching mechanism for faster decoding.
                If not provided, it takes the value from self.config.use_cache. Default is None.
            output_attentions (Optional[bool]): Whether to output attention weights. Default is None.
            output_hidden_states (Optional[bool]): Whether to output hidden states. Default is None.
            return_dict (Optional[bool]): Whether to return outputs as a dictionary instead of a tuple.
                If not provided, it takes the value from self.config.use_return_dict. Default is None.
            return_last_logit (Optional[bool]): Whether to return the last logit. Default is False.

        Returns:
            None

        Raises:
            None
        '''
        use_cache = use_cache if use_cache is not None else self.config.use_cache
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        transformer_outputs = self.transformer(
            input_ids=input_ids,
            position_ids=position_ids,
            attention_mask=attention_mask,
            past_key_values=past_key_values,
            inputs_embeds=inputs_embeds,
            use_cache=use_cache,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        hidden_states = transformer_outputs[0]
        if return_last_logit:
            hidden_states = hidden_states[-1:]
        lm_logits = self.transformer.output_layer(hidden_states)
        lm_logits = lm_logits.swapaxes(0, 1)

        loss = None
        if labels is not None:
            lm_logits = lm_logits.to(mindspore.float32)

            # Shift so that tokens < n predict n
            shift_logits = lm_logits[..., :-1, :]
            shift_labels = labels[..., 1:]
            # Flatten the tokens
            loss = ops.cross_entropy(shift_logits.view(-1, shift_logits.shape[-1]), shift_labels.view(-1),
                                     ignore_index=-100)

            lm_logits = lm_logits.to(hidden_states.dtype)
            loss = loss.to(hidden_states.dtype)

        if not return_dict:
            output = (lm_logits,) + transformer_outputs[1:]
            return ((loss,) + output) if loss is not None else output

        return CausalLMOutputWithPast(
            loss=loss,
            logits=lm_logits,
            past_key_values=transformer_outputs.past_key_values,
            hidden_states=transformer_outputs.hidden_states,
            attentions=transformer_outputs.attentions,
        )

    @staticmethod
    def _reorder_cache(
            past: Tuple[Tuple[mindspore.Tensor, mindspore.Tensor], ...], beam_idx: mindspore.Tensor
    ) -> Tuple[Tuple[mindspore.Tensor, mindspore.Tensor], ...]:
        """
        This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or
        [`~PreTrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct
        beam_idx at every generation step.

        Output shares the same memory storage as `past`.
        """
        return tuple(
            (
                layer_past[0].index_select(1, beam_idx),
                layer_past[1].index_select(1, beam_idx),
            )
            for layer_past in past
        )

    def process_response(self, response):
        """
        Process the response received from the chat model.

        Args:
            self: An instance of the ChatGLM2ForConditionalGeneration class.
            response (str): The response received from the chat model.

        Returns:
            None.

        Raises:
            None.
        """
        response = response.strip()
        response = response.replace("[[训练时间]]", "2023年")
        return response

    def build_inputs(self, tokenizer, query: str, history: List[Tuple[str, str]] = None):
        """
        Builds the input tensors for the ChatGLM2ForConditionalGeneration model.

        Args:
            self (ChatGLM2ForConditionalGeneration): An instance of the ChatGLM2ForConditionalGeneration class.
            tokenizer (PreTrainedTokenizer): An instance of PreTrainedTokenizer used for tokenizing the input.
            query (str): A string containing the user query.
            history (List[Tuple[str, str]], optional): A list of tuples containing previous queries and their respective responses.
                Defaults to None.

        Returns:
            None.

        Raises:
            None.

        The method takes in a tokenizer instance, a user query, and optionally a list of previous queries and their
        respective responses. It then builds the input tensors using the provided tokenizer by calling the build_prompt
        method on the tokenizer instance. The input tensors are then returned as a dictionary with a single key and value
        pair. The key is 'input_ids' and the value is a tensor containing the tokenized input.
        """
        prompt = tokenizer.build_prompt(query, history=history)
        inputs = tokenizer([prompt], return_tensors="ms")
        return inputs

    def build_stream_inputs(self, tokenizer, query: str, history: List[Tuple[str, str]] = None):
        """
        This method builds stream inputs for the ChatGLM2ForConditionalGeneration class.

        Args:
            self: The instance of the class.
            tokenizer: An object of the tokenizer used to encode the input prompt. It should be compatible with the model being used.
            query (str): The query string for which the stream inputs are being generated.
            history (List[Tuple[str, str]], optional): A list of historical tuples containing the previous queries and responses.
                Defaults to None.

        Returns:
            None: This method does not return any value, but it populates the 'inputs' variable with the encoded input prompt
                and returns it.

        Raises:
            None.
        """
        if history:
            prompt = "\n\n[Round {}]\n\n问:{}\n\n答:".format(len(history) + 1, query)
            input_ids = tokenizer.encode(prompt, add_special_tokens=False)
            input_ids = input_ids[1:]
            inputs = tokenizer.batch_encode_plus([(input_ids, None)], return_tensors="ms", add_special_tokens=False)
        else:
            prompt = "[Round {}]\n\n问:{}\n\n答:".format(len(history) + 1, query)
            inputs = tokenizer([prompt], return_tensors="ms")
        return inputs

    @_no_grad()
    def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] = None, max_length: int = 8192, num_beams=1,
             do_sample=True, top_p=0.8, temperature=0.8, logits_processor=None, **kwargs):
        """
        This method 'chat' is defined in the class 'ChatGLM2ForConditionalGeneration' and is used for generating a response to a given query in a chat scenario.

        Args:
            self: Represents the instance of the class.
            tokenizer: An object used for tokenizing the input query and decoding the generated response.
            query (str): The input query for which a response needs to be generated.
            history (List[Tuple[str, str]]): A list of previous query-response pairs. Defaults to an empty list.
            max_length (int): The maximum length of the generated response. Defaults to 8192.
            num_beams (int): The number of beams to be used in beam search. Defaults to 1.
            do_sample (bool): A flag indicating whether sampling should be used during generation. Defaults to True.
            top_p (float): The nucleus sampling parameter. Defaults to 0.8.
            temperature (float): The temperature parameter for sampling. Defaults to 0.8.
            logits_processor: An object for processing the logits during generation. Defaults to None.

        Returns:
            response (str): The generated response to the input query.
            history (List[Tuple[str, str]]): The updated history including the input query and generated response.

        Raises:
            None

        Note:
            The method appends the input query and generated response to the history and returns the generated response along with the updated history.
        """
        if history is None:
            history = []
        if logits_processor is None:
            logits_processor = LogitsProcessorList()
        logits_processor.append(InvalidScoreLogitsProcessor())
        gen_kwargs = {"max_length": max_length, "num_beams": num_beams, "do_sample": do_sample, "top_p": top_p,
                      "temperature": temperature, "logits_processor": logits_processor, **kwargs}
        inputs = self.build_inputs(tokenizer, query, history=history)
        outputs = self.generate(**inputs, **gen_kwargs)
        outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):]
        response = tokenizer.decode(outputs)
        response = self.process_response(response)
        history = history + [(query, response)]
        return response, history

    @_no_grad()
    def stream_chat(self, tokenizer, query: str, history: List[Tuple[str, str]] = None, past_key_values=None,
                    max_length: int = 8192, do_sample=True, top_p=0.8, temperature=0.8, logits_processor=None,
                    return_past_key_values=False, **kwargs):
        """
        Method to perform streaming chat using the ChatGLM2ForConditionalGeneration model.

        Args:
            self: The instance of the ChatGLM2ForConditionalGeneration class.
            tokenizer: An instance of the tokenizer to encode/decode the input/output sequences.
            query (str): The input query for the chat conversation.
            history (List[Tuple[str, str]], optional): List of previous chat history tuples,
                where each tuple contains the input query and the corresponding response. Defaults to None.
            past_key_values: The past key values for the model's autoregressive generation. Defaults to None.
            max_length (int): The maximum length of the output sequence. Defaults to 8192.
            do_sample (bool): Flag to enable sampling of the output sequence. Defaults to True.
            top_p (float): The nucleus sampling parameter for the output sequence generation. Defaults to 0.8.
            temperature (float): The temperature parameter for the output sequence generation. Defaults to 0.8.
            logits_processor: The logits processor to modify model's output distribution. Defaults to None.
            return_past_key_values (bool): Flag to return the past key values along with the response. Defaults to False.
            **kwargs: Additional keyword arguments for generating the output sequence.

        Returns:
            None: However, yields a tuple containing the response, updated chat history,
                and past key values if return_past_key_values is True.

        Raises:
            None.
        """
        if history is None:
            history = []
        if logits_processor is None:
            logits_processor = LogitsProcessorList()
        logits_processor.append(InvalidScoreLogitsProcessor())
        gen_kwargs = {"max_length": max_length, "do_sample": do_sample, "top_p": top_p,
                      "temperature": temperature, "logits_processor": logits_processor, **kwargs}
        if past_key_values is None and not return_past_key_values:
            inputs = self.build_inputs(tokenizer, query, history=history)
        else:
            inputs = self.build_stream_inputs(tokenizer, query, history=history)
        if past_key_values is not None:
            past_length = past_key_values[0][0].shape[0]
            if self.transformer.pre_seq_len is not None:
                past_length -= self.transformer.pre_seq_len
            inputs['position_ids'] = inputs.position_ids + past_length # mindspore do not support `x += 1`
            attention_mask = inputs.attention_mask
            attention_mask = ops.cat((attention_mask.new_ones((1, past_length), dtype=attention_mask.dtype), attention_mask), axis=1)
            inputs['attention_mask'] = attention_mask
        for outputs in self.stream_generate(**inputs, past_key_values=past_key_values,
                                            return_past_key_values=return_past_key_values, **gen_kwargs):
            if return_past_key_values:
                outputs, past_key_values = outputs
            outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):]
            response = tokenizer.decode(outputs)
            if response and response[-1] != "�":
                response = self.process_response(response)
                new_history = history + [(query, response)]
                if return_past_key_values:
                    yield response, new_history, past_key_values
                else:
                    yield response, new_history

    @_no_grad()
    def stream_generate(
            self,
            input_ids,
            generation_config: Optional[GenerationConfig] = None,
            logits_processor: Optional[LogitsProcessorList] = None,
            stopping_criteria: Optional[StoppingCriteriaList] = None,
            prefix_allowed_tokens_fn: Optional[Callable[[int, mindspore.Tensor], List[int]]] = None,
            return_past_key_values=False,
            **kwargs,
    ):
        """
        Generates a stream of conditional text based on the given input_ids using the ChatGLM2 model.

        Args:
            self (ChatGLM2ForConditionalGeneration): The instance of the ChatGLM2ForConditionalGeneration class.
            input_ids (mindspore.Tensor): The input token ids for text generation.
            generation_config (Optional[GenerationConfig]): The configuration for text generation. Default is None.
            logits_processor (Optional[LogitsProcessorList]):
                The list of logits processors to be applied on the generated logits. Default is None.
            stopping_criteria (Optional[StoppingCriteriaList]):
                The list of stopping criteria to determine when to stop text generation. Default is None.
            prefix_allowed_tokens_fn (Optional[Callable[[int, mindspore.Tensor], List[int]]]):
                The function that returns a list of allowed tokens for each prefix. Default is None.
            return_past_key_values (bool): Whether to return the past key values during generation. Default is False.

        Returns:
            None.

        Raises:
            UserWarning: If using `max_length`'s default value to control generation length.
                This behavior is deprecated and will be removed in v5 of Transformers.
                It is recommended to use `max_new_tokens` instead.
            UserWarning: If both `max_new_tokens` and `max_length` are set. `max_new_tokens` takes precedence.
            UserWarning: If the input length exceeds `max_length` and may lead to unexpected behavior.

        Note:
            This method yields generated text in a streaming fashion.
        """
        _, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]

        if generation_config is None:
            generation_config = self.generation_config
        generation_config = copy.deepcopy(generation_config)
        model_kwargs = generation_config.update(**kwargs)
        model_kwargs["use_cache"] = generation_config.use_cache
        _, eos_token_id = generation_config.bos_token_id, generation_config.eos_token_id

        if isinstance(eos_token_id, int):
            eos_token_id = [eos_token_id]

        has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
        if has_default_max_length and generation_config.max_new_tokens is None:
            warnings.warn(
                f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
                "This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
                " recommend using `max_new_tokens` to control the maximum length of the generation.",
                UserWarning,
            )
        elif generation_config.max_new_tokens is not None:
            generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
            if not has_default_max_length:
                logger.warn(
                    f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
                    f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
                    "Please refer to the documentation for more information. "
                    "(https://hf-mirror.com/docs/transformers/main/en/main_classes/text_generation)",
                    UserWarning,
                )

        if input_ids_seq_length >= generation_config.max_length:
            input_ids_string = "decoder_input_ids" if self.config.is_encoder_decoder else "input_ids"
            logger.warning(
                f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
                f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
                " increasing `max_new_tokens`."
            )

        # 2. Set generation parameters if not already defined
        logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
        stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()

        logits_processor = self._get_logits_processor(
            generation_config=generation_config,
            input_ids_seq_length=input_ids_seq_length,
            encoder_input_ids=input_ids,
            prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
            logits_processor=logits_processor,
        )

        stopping_criteria = self._get_stopping_criteria(
            generation_config=generation_config, stopping_criteria=stopping_criteria
        )
        logits_warper = self._get_logits_warper(generation_config)

        unfinished_sequences = ops.ones(input_ids.shape[0], dtype=input_ids.dtype)
        scores = None
        while True:
            model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
            # forward pass to get next token
            outputs = self(
                **model_inputs,
                return_dict=True,
                output_attentions=False,
                output_hidden_states=False,
            )

            next_token_logits = outputs.logits[:, -1, :]

            # pre-process distribution
            next_token_scores = logits_processor(input_ids, next_token_logits)
            next_token_scores = logits_warper(input_ids, next_token_scores)

            # sample
            probs = ops.softmax(next_token_scores, axis=-1)
            if generation_config.do_sample:
                next_tokens = ops.multinomial(probs, num_samples=1).squeeze(1)
            else:
                next_tokens = ops.argmax(probs, dim=-1)

            # update generated ids, model inputs, and length for next step
            input_ids = ops.cat([input_ids, next_tokens[:, None]], axis=-1)
            model_kwargs = self._update_model_kwargs_for_generation(
                outputs, model_kwargs, is_encoder_decoder=self.config.is_encoder_decoder
            )
            unfinished_sequences = unfinished_sequences.mul((sum(next_tokens != i for i in eos_token_id)).long())
            if return_past_key_values:
                yield input_ids, outputs.past_key_values
            else:
                yield input_ids
            # stop when each sentence is finished, or if we exceed the maximum length
            if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
                break

    def quantize(self, bits: int, empty_init=False, **kwargs):
        """
        This method quantizes the input data to a specified number of bits.

        Args:
            self: The instance of the ChatGLM2ForConditionalGeneration class.
            bits (int): The number of bits to quantize the input data to.
                Must be a positive integer.
            empty_init (bool): Optional. If True, the initialization process is skipped.
                Defaults to False.

        Returns:
            None.

        Raises:
            ValueError: If the bits parameter is not a positive integer.
            TypeError: If the bits parameter is not an integer.
        """

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2ForConditionalGeneration.__init__(config, empty_init=True)

Initializes an instance of the ChatGLM2ForConditionalGeneration class.

PARAMETER DESCRIPTION
self

The instance of the class.

config

An object of type ChatGLM2Config which provides configuration settings for the model.

TYPE: ChatGLM2Config

empty_init

Indicates whether to initialize the ChatGLM2Model with empty weights. Defaults to True.

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION

None

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
def __init__(self, config: ChatGLM2Config, empty_init=True):
    """
    Initializes an instance of the ChatGLM2ForConditionalGeneration class.

    Args:
        self: The instance of the class.
        config (ChatGLM2Config): An object of type ChatGLM2Config which provides configuration settings for the model.
        empty_init (bool, optional): Indicates whether to initialize the ChatGLM2Model with empty weights. Defaults to True.

    Returns:
        None

    Raises:
        None
    """
    super().__init__(config)

    self.max_sequence_length = config.max_length
    self.transformer = ChatGLM2Model(config, empty_init=empty_init)
    self.config = config
    self.quantized = False

    if self.config.quantization_bit:
        self.quantize(self.config.quantization_bit, empty_init=True)

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2ForConditionalGeneration.build_inputs(tokenizer, query, history=None)

Builds the input tensors for the ChatGLM2ForConditionalGeneration model.

PARAMETER DESCRIPTION
self

An instance of the ChatGLM2ForConditionalGeneration class.

TYPE: ChatGLM2ForConditionalGeneration

tokenizer

An instance of PreTrainedTokenizer used for tokenizing the input.

TYPE: PreTrainedTokenizer

query

A string containing the user query.

TYPE: str

history

A list of tuples containing previous queries and their respective responses. Defaults to None.

TYPE: List[Tuple[str, str]] DEFAULT: None

RETURNS DESCRIPTION

None.

The method takes in a tokenizer instance, a user query, and optionally a list of previous queries and their respective responses. It then builds the input tensors using the provided tokenizer by calling the build_prompt method on the tokenizer instance. The input tensors are then returned as a dictionary with a single key and value pair. The key is 'input_ids' and the value is a tensor containing the tokenized input.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
def build_inputs(self, tokenizer, query: str, history: List[Tuple[str, str]] = None):
    """
    Builds the input tensors for the ChatGLM2ForConditionalGeneration model.

    Args:
        self (ChatGLM2ForConditionalGeneration): An instance of the ChatGLM2ForConditionalGeneration class.
        tokenizer (PreTrainedTokenizer): An instance of PreTrainedTokenizer used for tokenizing the input.
        query (str): A string containing the user query.
        history (List[Tuple[str, str]], optional): A list of tuples containing previous queries and their respective responses.
            Defaults to None.

    Returns:
        None.

    Raises:
        None.

    The method takes in a tokenizer instance, a user query, and optionally a list of previous queries and their
    respective responses. It then builds the input tensors using the provided tokenizer by calling the build_prompt
    method on the tokenizer instance. The input tensors are then returned as a dictionary with a single key and value
    pair. The key is 'input_ids' and the value is a tensor containing the tokenized input.
    """
    prompt = tokenizer.build_prompt(query, history=history)
    inputs = tokenizer([prompt], return_tensors="ms")
    return inputs

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2ForConditionalGeneration.build_stream_inputs(tokenizer, query, history=None)

This method builds stream inputs for the ChatGLM2ForConditionalGeneration class.

PARAMETER DESCRIPTION
self

The instance of the class.

tokenizer

An object of the tokenizer used to encode the input prompt. It should be compatible with the model being used.

query

The query string for which the stream inputs are being generated.

TYPE: str

history

A list of historical tuples containing the previous queries and responses. Defaults to None.

TYPE: List[Tuple[str, str]] DEFAULT: None

RETURNS DESCRIPTION
None

This method does not return any value, but it populates the 'inputs' variable with the encoded input prompt and returns it.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
def build_stream_inputs(self, tokenizer, query: str, history: List[Tuple[str, str]] = None):
    """
    This method builds stream inputs for the ChatGLM2ForConditionalGeneration class.

    Args:
        self: The instance of the class.
        tokenizer: An object of the tokenizer used to encode the input prompt. It should be compatible with the model being used.
        query (str): The query string for which the stream inputs are being generated.
        history (List[Tuple[str, str]], optional): A list of historical tuples containing the previous queries and responses.
            Defaults to None.

    Returns:
        None: This method does not return any value, but it populates the 'inputs' variable with the encoded input prompt
            and returns it.

    Raises:
        None.
    """
    if history:
        prompt = "\n\n[Round {}]\n\n问:{}\n\n答:".format(len(history) + 1, query)
        input_ids = tokenizer.encode(prompt, add_special_tokens=False)
        input_ids = input_ids[1:]
        inputs = tokenizer.batch_encode_plus([(input_ids, None)], return_tensors="ms", add_special_tokens=False)
    else:
        prompt = "[Round {}]\n\n问:{}\n\n答:".format(len(history) + 1, query)
        inputs = tokenizer([prompt], return_tensors="ms")
    return inputs

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2ForConditionalGeneration.chat(tokenizer, query, history=None, max_length=8192, num_beams=1, do_sample=True, top_p=0.8, temperature=0.8, logits_processor=None, **kwargs)

This method 'chat' is defined in the class 'ChatGLM2ForConditionalGeneration' and is used for generating a response to a given query in a chat scenario.

PARAMETER DESCRIPTION
self

Represents the instance of the class.

tokenizer

An object used for tokenizing the input query and decoding the generated response.

query

The input query for which a response needs to be generated.

TYPE: str

history

A list of previous query-response pairs. Defaults to an empty list.

TYPE: List[Tuple[str, str]] DEFAULT: None

max_length

The maximum length of the generated response. Defaults to 8192.

TYPE: int DEFAULT: 8192

num_beams

The number of beams to be used in beam search. Defaults to 1.

TYPE: int DEFAULT: 1

do_sample

A flag indicating whether sampling should be used during generation. Defaults to True.

TYPE: bool DEFAULT: True

top_p

The nucleus sampling parameter. Defaults to 0.8.

TYPE: float DEFAULT: 0.8

temperature

The temperature parameter for sampling. Defaults to 0.8.

TYPE: float DEFAULT: 0.8

logits_processor

An object for processing the logits during generation. Defaults to None.

DEFAULT: None

RETURNS DESCRIPTION
response

The generated response to the input query.

TYPE: str

history

The updated history including the input query and generated response.

TYPE: List[Tuple[str, str]]

Note

The method appends the input query and generated response to the history and returns the generated response along with the updated history.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
@_no_grad()
def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] = None, max_length: int = 8192, num_beams=1,
         do_sample=True, top_p=0.8, temperature=0.8, logits_processor=None, **kwargs):
    """
    This method 'chat' is defined in the class 'ChatGLM2ForConditionalGeneration' and is used for generating a response to a given query in a chat scenario.

    Args:
        self: Represents the instance of the class.
        tokenizer: An object used for tokenizing the input query and decoding the generated response.
        query (str): The input query for which a response needs to be generated.
        history (List[Tuple[str, str]]): A list of previous query-response pairs. Defaults to an empty list.
        max_length (int): The maximum length of the generated response. Defaults to 8192.
        num_beams (int): The number of beams to be used in beam search. Defaults to 1.
        do_sample (bool): A flag indicating whether sampling should be used during generation. Defaults to True.
        top_p (float): The nucleus sampling parameter. Defaults to 0.8.
        temperature (float): The temperature parameter for sampling. Defaults to 0.8.
        logits_processor: An object for processing the logits during generation. Defaults to None.

    Returns:
        response (str): The generated response to the input query.
        history (List[Tuple[str, str]]): The updated history including the input query and generated response.

    Raises:
        None

    Note:
        The method appends the input query and generated response to the history and returns the generated response along with the updated history.
    """
    if history is None:
        history = []
    if logits_processor is None:
        logits_processor = LogitsProcessorList()
    logits_processor.append(InvalidScoreLogitsProcessor())
    gen_kwargs = {"max_length": max_length, "num_beams": num_beams, "do_sample": do_sample, "top_p": top_p,
                  "temperature": temperature, "logits_processor": logits_processor, **kwargs}
    inputs = self.build_inputs(tokenizer, query, history=history)
    outputs = self.generate(**inputs, **gen_kwargs)
    outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):]
    response = tokenizer.decode(outputs)
    response = self.process_response(response)
    history = history + [(query, response)]
    return response, history

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2ForConditionalGeneration.forward(input_ids=None, position_ids=None, attention_mask=None, past_key_values=None, inputs_embeds=None, labels=None, use_cache=None, output_attentions=None, output_hidden_states=None, return_dict=None, return_last_logit=False)

Constructs a ChatGLM2ForConditionalGeneration object.

PARAMETER DESCRIPTION
self

The instance of the class.

TYPE: ChatGLM2ForConditionalGeneration

input_ids

The input tensor of shape [batch_size, sequence_length] representing the tokenized input sequences. Default is None.

TYPE: Optional[Tensor] DEFAULT: None

position_ids

The input tensor of shape [batch_size, sequence_length] representing the position indices of the input tokens. Default is None.

TYPE: Optional[Tensor] DEFAULT: None

attention_mask

The input tensor of shape [batch_size, sequence_length] representing the attention mask to avoid performing attention on padding tokens. Default is None.

TYPE: Optional[Tensor] DEFAULT: None

past_key_values

The optional tuple of tensors that contains pre-computed key and value tensors for fast decoding. Default is None.

TYPE: Optional[Tuple[Tensor]] DEFAULT: None

inputs_embeds

The input tensor of shape [batch_size, sequence_length, hidden_size] representing the embedded inputs. Default is None.

TYPE: Optional[Tensor] DEFAULT: None

labels

The input tensor of shape [batch_size, sequence_length] representing the labels. Default is None.

TYPE: Optional[Tensor] DEFAULT: None

use_cache

Whether to use caching mechanism for faster decoding. If not provided, it takes the value from self.config.use_cache. Default is None.

TYPE: Optional[bool] DEFAULT: None

output_attentions

Whether to output attention weights. Default is None.

TYPE: Optional[bool] DEFAULT: None

output_hidden_states

Whether to output hidden states. Default is None.

TYPE: Optional[bool] DEFAULT: None

return_dict

Whether to return outputs as a dictionary instead of a tuple. If not provided, it takes the value from self.config.use_return_dict. Default is None.

TYPE: Optional[bool] DEFAULT: None

return_last_logit

Whether to return the last logit. Default is False.

TYPE: Optional[bool] DEFAULT: False

RETURNS DESCRIPTION

None

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
def forward(
        self,
        input_ids: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        past_key_values: Optional[Tuple[mindspore.Tensor]] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        labels: Optional[mindspore.Tensor] = None,
        use_cache: Optional[bool] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
        return_last_logit: Optional[bool] = False,
):
    '''
    Constructs a ChatGLM2ForConditionalGeneration object.

    Args:
        self (ChatGLM2ForConditionalGeneration): The instance of the class.
        input_ids (Optional[mindspore.Tensor]):
            The input tensor of shape [batch_size, sequence_length] representing the tokenized input sequences.
            Default is None.
        position_ids (Optional[mindspore.Tensor]):
            The input tensor of shape [batch_size, sequence_length] representing the position indices of the input tokens.
            Default is None.
        attention_mask (Optional[mindspore.Tensor]):
            The input tensor of shape [batch_size, sequence_length] representing the attention mask to avoid
            performing attention on padding tokens. Default is None.
        past_key_values (Optional[Tuple[mindspore.Tensor]]):
            The optional tuple of tensors that contains pre-computed key and value tensors for fast decoding.
            Default is None.
        inputs_embeds (Optional[mindspore.Tensor]):
            The input tensor of shape [batch_size, sequence_length, hidden_size] representing the embedded inputs.
            Default is None.
        labels (Optional[mindspore.Tensor]):
            The input tensor of shape [batch_size, sequence_length] representing the labels. Default is None.
        use_cache (Optional[bool]): Whether to use caching mechanism for faster decoding.
            If not provided, it takes the value from self.config.use_cache. Default is None.
        output_attentions (Optional[bool]): Whether to output attention weights. Default is None.
        output_hidden_states (Optional[bool]): Whether to output hidden states. Default is None.
        return_dict (Optional[bool]): Whether to return outputs as a dictionary instead of a tuple.
            If not provided, it takes the value from self.config.use_return_dict. Default is None.
        return_last_logit (Optional[bool]): Whether to return the last logit. Default is False.

    Returns:
        None

    Raises:
        None
    '''
    use_cache = use_cache if use_cache is not None else self.config.use_cache
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    transformer_outputs = self.transformer(
        input_ids=input_ids,
        position_ids=position_ids,
        attention_mask=attention_mask,
        past_key_values=past_key_values,
        inputs_embeds=inputs_embeds,
        use_cache=use_cache,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
    )

    hidden_states = transformer_outputs[0]
    if return_last_logit:
        hidden_states = hidden_states[-1:]
    lm_logits = self.transformer.output_layer(hidden_states)
    lm_logits = lm_logits.swapaxes(0, 1)

    loss = None
    if labels is not None:
        lm_logits = lm_logits.to(mindspore.float32)

        # Shift so that tokens < n predict n
        shift_logits = lm_logits[..., :-1, :]
        shift_labels = labels[..., 1:]
        # Flatten the tokens
        loss = ops.cross_entropy(shift_logits.view(-1, shift_logits.shape[-1]), shift_labels.view(-1),
                                 ignore_index=-100)

        lm_logits = lm_logits.to(hidden_states.dtype)
        loss = loss.to(hidden_states.dtype)

    if not return_dict:
        output = (lm_logits,) + transformer_outputs[1:]
        return ((loss,) + output) if loss is not None else output

    return CausalLMOutputWithPast(
        loss=loss,
        logits=lm_logits,
        past_key_values=transformer_outputs.past_key_values,
        hidden_states=transformer_outputs.hidden_states,
        attentions=transformer_outputs.attentions,
    )

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2ForConditionalGeneration.prepare_inputs_for_generation(input_ids, past_key_values=None, attention_mask=None, position_ids=None, use_cache=None, is_first_forward=True, **kwargs)

Prepares input tensors for generation during ChatGLM2ForConditionalGeneration model training.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM2ForConditionalGeneration class.

TYPE: ChatGLM2ForConditionalGeneration

input_ids

The input tensor of shape (batch_size, seq_length) containing the input sequence indices.

TYPE: Tensor

past_key_values

Optional past key values tensor of shape (batch_size, num_heads, past_seq_length, hidden_size_per_head) used for generation in accordance with GPT-2.

TYPE: Optional[Tensor] DEFAULT: None

attention_mask

Optional attention mask tensor of shape (batch_size, seq_length) used for masking out padded tokens.

TYPE: Optional[Tensor] DEFAULT: None

position_ids

Optional position ids tensor of shape (batch_size, seq_length) used for generation in accordance with GPT-2.

TYPE: Optional[Tensor] DEFAULT: None

use_cache

Optional flag indicating whether to use cache during generation.

TYPE: Optional[bool] DEFAULT: None

is_first_forward

Flag indicating whether it is the first forward pass.

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION
dict

A dictionary containing input tensors for generation:

  • input_ids (mindspore.Tensor): The input tensor of shape (batch_size, seq_length) containing the input sequence indices.
  • past_key_values (Optional[mindspore.Tensor]): Optional past key values tensor of shape (batch_size, num_heads, past_seq_length, hidden_size_per_head) used for generation in accordance with GPT-2.
  • position_ids (mindspore.Tensor): The position ids tensor of shape (batch_size, seq_length) used for generation in accordance with GPT-2.
  • attention_mask (Optional[mindspore.Tensor]): Optional attention mask tensor of shape (batch_size, seq_length) used for masking out padded tokens.
  • return_last_logit (bool): Flag indicating whether to return the last logit during generation.
  • use_cache (Optional[bool]): Optional flag indicating whether to use cache during generation.

TYPE: dict

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
def prepare_inputs_for_generation(
        self,
        input_ids: mindspore.Tensor,
        past_key_values: Optional[mindspore.Tensor] = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        use_cache: Optional[bool] = None,
        is_first_forward: bool = True,
        **kwargs
) -> dict:
    """
    Prepares input tensors for generation during ChatGLM2ForConditionalGeneration model training.

    Args:
        self (ChatGLM2ForConditionalGeneration): The instance of the ChatGLM2ForConditionalGeneration class.
        input_ids (mindspore.Tensor): The input tensor of shape (batch_size, seq_length) containing the input sequence indices.
        past_key_values (Optional[mindspore.Tensor]): Optional past key values tensor of shape
            (batch_size, num_heads, past_seq_length, hidden_size_per_head) used for generation in accordance with GPT-2.
        attention_mask (Optional[mindspore.Tensor]): Optional attention mask tensor of shape
            (batch_size, seq_length) used for masking out padded tokens.
        position_ids (Optional[mindspore.Tensor]): Optional position ids tensor of shape
            (batch_size, seq_length) used for generation in accordance with GPT-2.
        use_cache (Optional[bool]): Optional flag indicating whether to use cache during generation.
        is_first_forward (bool): Flag indicating whether it is the first forward pass.

    Returns:
        dict:
            A dictionary containing input tensors for generation:

            - input_ids (mindspore.Tensor): The input tensor of shape (batch_size, seq_length) containing the input sequence indices.
            - past_key_values (Optional[mindspore.Tensor]): Optional past key values tensor of shape
            (batch_size, num_heads, past_seq_length, hidden_size_per_head) used for generation in accordance with  GPT-2.
            - position_ids (mindspore.Tensor): The position ids tensor of shape (batch_size, seq_length) used for generation in accordance with GPT-2.
            - attention_mask (Optional[mindspore.Tensor]): Optional attention mask tensor of shape (batch_size, seq_length) used for masking out padded tokens.
            - return_last_logit (bool): Flag indicating whether to return the last logit during generation.
            - use_cache (Optional[bool]): Optional flag indicating whether to use cache during generation.

    Raises:
        None.
    """
    # only last token for input_ids if past is not None
    if position_ids is None:
        position_ids = self.get_position_ids(input_ids)
    if not is_first_forward:
        if past_key_values is not None:
            position_ids = position_ids[..., -1:]
            input_ids = input_ids[:, -1:]
    return {
        "input_ids": input_ids,
        "past_key_values": past_key_values,
        "position_ids": position_ids,
        "attention_mask": attention_mask,
        "return_last_logit": True,
        "use_cache": use_cache
    }

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2ForConditionalGeneration.process_response(response)

Process the response received from the chat model.

PARAMETER DESCRIPTION
self

An instance of the ChatGLM2ForConditionalGeneration class.

response

The response received from the chat model.

TYPE: str

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
def process_response(self, response):
    """
    Process the response received from the chat model.

    Args:
        self: An instance of the ChatGLM2ForConditionalGeneration class.
        response (str): The response received from the chat model.

    Returns:
        None.

    Raises:
        None.
    """
    response = response.strip()
    response = response.replace("[[训练时间]]", "2023年")
    return response

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2ForConditionalGeneration.quantize(bits, empty_init=False, **kwargs)

This method quantizes the input data to a specified number of bits.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM2ForConditionalGeneration class.

bits

The number of bits to quantize the input data to. Must be a positive integer.

TYPE: int

empty_init

Optional. If True, the initialization process is skipped. Defaults to False.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
ValueError

If the bits parameter is not a positive integer.

TypeError

If the bits parameter is not an integer.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
def quantize(self, bits: int, empty_init=False, **kwargs):
    """
    This method quantizes the input data to a specified number of bits.

    Args:
        self: The instance of the ChatGLM2ForConditionalGeneration class.
        bits (int): The number of bits to quantize the input data to.
            Must be a positive integer.
        empty_init (bool): Optional. If True, the initialization process is skipped.
            Defaults to False.

    Returns:
        None.

    Raises:
        ValueError: If the bits parameter is not a positive integer.
        TypeError: If the bits parameter is not an integer.
    """

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2ForConditionalGeneration.stream_chat(tokenizer, query, history=None, past_key_values=None, max_length=8192, do_sample=True, top_p=0.8, temperature=0.8, logits_processor=None, return_past_key_values=False, **kwargs)

Method to perform streaming chat using the ChatGLM2ForConditionalGeneration model.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM2ForConditionalGeneration class.

tokenizer

An instance of the tokenizer to encode/decode the input/output sequences.

query

The input query for the chat conversation.

TYPE: str

history

List of previous chat history tuples, where each tuple contains the input query and the corresponding response. Defaults to None.

TYPE: List[Tuple[str, str]] DEFAULT: None

past_key_values

The past key values for the model's autoregressive generation. Defaults to None.

DEFAULT: None

max_length

The maximum length of the output sequence. Defaults to 8192.

TYPE: int DEFAULT: 8192

do_sample

Flag to enable sampling of the output sequence. Defaults to True.

TYPE: bool DEFAULT: True

top_p

The nucleus sampling parameter for the output sequence generation. Defaults to 0.8.

TYPE: float DEFAULT: 0.8

temperature

The temperature parameter for the output sequence generation. Defaults to 0.8.

TYPE: float DEFAULT: 0.8

logits_processor

The logits processor to modify model's output distribution. Defaults to None.

DEFAULT: None

return_past_key_values

Flag to return the past key values along with the response. Defaults to False.

TYPE: bool DEFAULT: False

**kwargs

Additional keyword arguments for generating the output sequence.

DEFAULT: {}

RETURNS DESCRIPTION
None

However, yields a tuple containing the response, updated chat history, and past key values if return_past_key_values is True.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
@_no_grad()
def stream_chat(self, tokenizer, query: str, history: List[Tuple[str, str]] = None, past_key_values=None,
                max_length: int = 8192, do_sample=True, top_p=0.8, temperature=0.8, logits_processor=None,
                return_past_key_values=False, **kwargs):
    """
    Method to perform streaming chat using the ChatGLM2ForConditionalGeneration model.

    Args:
        self: The instance of the ChatGLM2ForConditionalGeneration class.
        tokenizer: An instance of the tokenizer to encode/decode the input/output sequences.
        query (str): The input query for the chat conversation.
        history (List[Tuple[str, str]], optional): List of previous chat history tuples,
            where each tuple contains the input query and the corresponding response. Defaults to None.
        past_key_values: The past key values for the model's autoregressive generation. Defaults to None.
        max_length (int): The maximum length of the output sequence. Defaults to 8192.
        do_sample (bool): Flag to enable sampling of the output sequence. Defaults to True.
        top_p (float): The nucleus sampling parameter for the output sequence generation. Defaults to 0.8.
        temperature (float): The temperature parameter for the output sequence generation. Defaults to 0.8.
        logits_processor: The logits processor to modify model's output distribution. Defaults to None.
        return_past_key_values (bool): Flag to return the past key values along with the response. Defaults to False.
        **kwargs: Additional keyword arguments for generating the output sequence.

    Returns:
        None: However, yields a tuple containing the response, updated chat history,
            and past key values if return_past_key_values is True.

    Raises:
        None.
    """
    if history is None:
        history = []
    if logits_processor is None:
        logits_processor = LogitsProcessorList()
    logits_processor.append(InvalidScoreLogitsProcessor())
    gen_kwargs = {"max_length": max_length, "do_sample": do_sample, "top_p": top_p,
                  "temperature": temperature, "logits_processor": logits_processor, **kwargs}
    if past_key_values is None and not return_past_key_values:
        inputs = self.build_inputs(tokenizer, query, history=history)
    else:
        inputs = self.build_stream_inputs(tokenizer, query, history=history)
    if past_key_values is not None:
        past_length = past_key_values[0][0].shape[0]
        if self.transformer.pre_seq_len is not None:
            past_length -= self.transformer.pre_seq_len
        inputs['position_ids'] = inputs.position_ids + past_length # mindspore do not support `x += 1`
        attention_mask = inputs.attention_mask
        attention_mask = ops.cat((attention_mask.new_ones((1, past_length), dtype=attention_mask.dtype), attention_mask), axis=1)
        inputs['attention_mask'] = attention_mask
    for outputs in self.stream_generate(**inputs, past_key_values=past_key_values,
                                        return_past_key_values=return_past_key_values, **gen_kwargs):
        if return_past_key_values:
            outputs, past_key_values = outputs
        outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):]
        response = tokenizer.decode(outputs)
        if response and response[-1] != "�":
            response = self.process_response(response)
            new_history = history + [(query, response)]
            if return_past_key_values:
                yield response, new_history, past_key_values
            else:
                yield response, new_history

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2ForConditionalGeneration.stream_generate(input_ids, generation_config=None, logits_processor=None, stopping_criteria=None, prefix_allowed_tokens_fn=None, return_past_key_values=False, **kwargs)

Generates a stream of conditional text based on the given input_ids using the ChatGLM2 model.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM2ForConditionalGeneration class.

TYPE: ChatGLM2ForConditionalGeneration

input_ids

The input token ids for text generation.

TYPE: Tensor

generation_config

The configuration for text generation. Default is None.

TYPE: Optional[GenerationConfig] DEFAULT: None

logits_processor

The list of logits processors to be applied on the generated logits. Default is None.

TYPE: Optional[LogitsProcessorList] DEFAULT: None

stopping_criteria

The list of stopping criteria to determine when to stop text generation. Default is None.

TYPE: Optional[StoppingCriteriaList] DEFAULT: None

prefix_allowed_tokens_fn

The function that returns a list of allowed tokens for each prefix. Default is None.

TYPE: Optional[Callable[[int, Tensor], List[int]]] DEFAULT: None

return_past_key_values

Whether to return the past key values during generation. Default is False.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
UserWarning

If using max_length's default value to control generation length. This behavior is deprecated and will be removed in v5 of Transformers. It is recommended to use max_new_tokens instead.

UserWarning

If both max_new_tokens and max_length are set. max_new_tokens takes precedence.

UserWarning

If the input length exceeds max_length and may lead to unexpected behavior.

Note

This method yields generated text in a streaming fashion.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
@_no_grad()
def stream_generate(
        self,
        input_ids,
        generation_config: Optional[GenerationConfig] = None,
        logits_processor: Optional[LogitsProcessorList] = None,
        stopping_criteria: Optional[StoppingCriteriaList] = None,
        prefix_allowed_tokens_fn: Optional[Callable[[int, mindspore.Tensor], List[int]]] = None,
        return_past_key_values=False,
        **kwargs,
):
    """
    Generates a stream of conditional text based on the given input_ids using the ChatGLM2 model.

    Args:
        self (ChatGLM2ForConditionalGeneration): The instance of the ChatGLM2ForConditionalGeneration class.
        input_ids (mindspore.Tensor): The input token ids for text generation.
        generation_config (Optional[GenerationConfig]): The configuration for text generation. Default is None.
        logits_processor (Optional[LogitsProcessorList]):
            The list of logits processors to be applied on the generated logits. Default is None.
        stopping_criteria (Optional[StoppingCriteriaList]):
            The list of stopping criteria to determine when to stop text generation. Default is None.
        prefix_allowed_tokens_fn (Optional[Callable[[int, mindspore.Tensor], List[int]]]):
            The function that returns a list of allowed tokens for each prefix. Default is None.
        return_past_key_values (bool): Whether to return the past key values during generation. Default is False.

    Returns:
        None.

    Raises:
        UserWarning: If using `max_length`'s default value to control generation length.
            This behavior is deprecated and will be removed in v5 of Transformers.
            It is recommended to use `max_new_tokens` instead.
        UserWarning: If both `max_new_tokens` and `max_length` are set. `max_new_tokens` takes precedence.
        UserWarning: If the input length exceeds `max_length` and may lead to unexpected behavior.

    Note:
        This method yields generated text in a streaming fashion.
    """
    _, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]

    if generation_config is None:
        generation_config = self.generation_config
    generation_config = copy.deepcopy(generation_config)
    model_kwargs = generation_config.update(**kwargs)
    model_kwargs["use_cache"] = generation_config.use_cache
    _, eos_token_id = generation_config.bos_token_id, generation_config.eos_token_id

    if isinstance(eos_token_id, int):
        eos_token_id = [eos_token_id]

    has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
    if has_default_max_length and generation_config.max_new_tokens is None:
        warnings.warn(
            f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
            "This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
            " recommend using `max_new_tokens` to control the maximum length of the generation.",
            UserWarning,
        )
    elif generation_config.max_new_tokens is not None:
        generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
        if not has_default_max_length:
            logger.warn(
                f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
                f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
                "Please refer to the documentation for more information. "
                "(https://hf-mirror.com/docs/transformers/main/en/main_classes/text_generation)",
                UserWarning,
            )

    if input_ids_seq_length >= generation_config.max_length:
        input_ids_string = "decoder_input_ids" if self.config.is_encoder_decoder else "input_ids"
        logger.warning(
            f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
            f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
            " increasing `max_new_tokens`."
        )

    # 2. Set generation parameters if not already defined
    logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
    stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()

    logits_processor = self._get_logits_processor(
        generation_config=generation_config,
        input_ids_seq_length=input_ids_seq_length,
        encoder_input_ids=input_ids,
        prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
        logits_processor=logits_processor,
    )

    stopping_criteria = self._get_stopping_criteria(
        generation_config=generation_config, stopping_criteria=stopping_criteria
    )
    logits_warper = self._get_logits_warper(generation_config)

    unfinished_sequences = ops.ones(input_ids.shape[0], dtype=input_ids.dtype)
    scores = None
    while True:
        model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
        # forward pass to get next token
        outputs = self(
            **model_inputs,
            return_dict=True,
            output_attentions=False,
            output_hidden_states=False,
        )

        next_token_logits = outputs.logits[:, -1, :]

        # pre-process distribution
        next_token_scores = logits_processor(input_ids, next_token_logits)
        next_token_scores = logits_warper(input_ids, next_token_scores)

        # sample
        probs = ops.softmax(next_token_scores, axis=-1)
        if generation_config.do_sample:
            next_tokens = ops.multinomial(probs, num_samples=1).squeeze(1)
        else:
            next_tokens = ops.argmax(probs, dim=-1)

        # update generated ids, model inputs, and length for next step
        input_ids = ops.cat([input_ids, next_tokens[:, None]], axis=-1)
        model_kwargs = self._update_model_kwargs_for_generation(
            outputs, model_kwargs, is_encoder_decoder=self.config.is_encoder_decoder
        )
        unfinished_sequences = unfinished_sequences.mul((sum(next_tokens != i for i in eos_token_id)).long())
        if return_past_key_values:
            yield input_ids, outputs.past_key_values
        else:
            yield input_ids
        # stop when each sentence is finished, or if we exceed the maximum length
        if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
            break

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2ForSequenceClassification

Bases: ChatGLM2PreTrainedModel

ChatGLM2ForSequenceClassification is a class representing a pre-trained model for sequence classification based on the ChatGLM2 architecture. It inherits from the ChatGLM2PreTrainedModel and provides methods for initializing the model and generating classification outputs.

The class contains an initializer method that takes in a ChatGLM2Config object and an optional boolean parameter for empty initialization. It initializes the model with the provided configuration and sets up the transformer and classifier head layers.

The forward method takes various input tensors and parameters for generating the sequence classification output. It returns a sequence classifier output with past states if the return_dict parameter is set, or a tuple of tensors including logits and transformer outputs. The method also handles the calculation of loss based on the provided labels and problem type.

This class provides a comprehensive interface for utilizing the ChatGLM2 model for sequence classification tasks, including handling transformer outputs, dropout, and classification head operations.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
class ChatGLM2ForSequenceClassification(ChatGLM2PreTrainedModel):
    """
    ChatGLM2ForSequenceClassification is a class representing a pre-trained model for sequence classification based on
    the ChatGLM2 architecture. It inherits from the ChatGLM2PreTrainedModel and provides methods for initializing the model
    and generating classification outputs.

    The class contains an initializer method that takes in a ChatGLM2Config object and an optional boolean parameter for
    empty initialization. It initializes the model with the provided configuration and sets up the transformer and
    classifier head layers.

    The forward method takes various input tensors and parameters for generating the sequence classification output.
    It returns a sequence classifier output with past states if the return_dict parameter is set, or a tuple of tensors
    including logits and transformer outputs. The method also handles the calculation of loss based on the provided labels and problem type.

    This class provides a comprehensive interface for utilizing the ChatGLM2 model for sequence classification tasks,
    including handling transformer outputs, dropout, and classification head operations.

    """
    def __init__(self, config: ChatGLM2Config, empty_init=True):
        """
        Initializes an instance of the ChatGLM2ForSequenceClassification class.

        Args:
            self: The object itself.
            config (ChatGLM2Config): An instance of the ChatGLM2Config class containing the configuration settings for the model.
            empty_init (bool): A flag indicating whether to initialize the transformer with empty values. Defaults to True.

        Returns:
            None

        Raises:
            None
        """
        super().__init__(config)

        self.num_labels = config.num_labels
        self.transformer = ChatGLM2Model(config, empty_init=empty_init)

        self.classifier_head = nn.Linear(config.hidden_size, config.num_labels, bias=True, dtype=mindspore.float16)
        if config.classifier_dropout is not None:
            self.dropout = nn.Dropout(p=config.classifier_dropout)
        else:
            self.dropout = None
        self.config = config

        if self.config.quantization_bit:
            self.quantize(self.config.quantization_bit, empty_init=True)

    def forward(
            self,
            input_ids: Optional[mindspore.Tensor] = None,
            position_ids: Optional[mindspore.Tensor] = None,
            attention_mask: Optional[mindspore.Tensor] = None,
            full_attention_mask: Optional[mindspore.Tensor] = None,
            past_key_values: Optional[Tuple[Tuple[mindspore.Tensor, mindspore.Tensor], ...]] = None,
            inputs_embeds: Optional[mindspore.Tensor] = None,
            labels: Optional[mindspore.Tensor] = None,
            use_cache: Optional[bool] = None,
            output_hidden_states: Optional[bool] = None,
            return_dict: Optional[bool] = None,
    ) -> Union[Tuple[mindspore.Tensor, ...], SequenceClassifierOutputWithPast]:
        '''
        Constructs the ChatGLM2ForSequenceClassification model.

        Args:
            self: The object instance.
            input_ids (Optional[mindspore.Tensor]): The input token IDs. Default: None.
            position_ids (Optional[mindspore.Tensor]): The position IDs. Default: None.
            attention_mask (Optional[mindspore.Tensor]): The attention mask. Default: None.
            full_attention_mask (Optional[mindspore.Tensor]): The full attention mask. Default: None.
            past_key_values (Optional[Tuple[Tuple[mindspore.Tensor, mindspore.Tensor], ...]]): The past key values. Default: None.
            inputs_embeds (Optional[mindspore.Tensor]): The input embeddings. Default: None.
            labels (Optional[mindspore.Tensor]): The labels. Default: None.
            use_cache (Optional[bool]): Whether to use cache. Default: None.
            output_hidden_states (Optional[bool]): Whether to output hidden states. Default: None.
            return_dict (Optional[bool]): Whether to return a dictionary. Default: None.

        Returns:
            Union[Tuple[mindspore.Tensor, ...], SequenceClassifierOutputWithPast]: The model outputs.

        Raises:
            None.
        '''
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        transformer_outputs = self.transformer(
            input_ids=input_ids,
            position_ids=position_ids,
            attention_mask=attention_mask,
            full_attention_mask=full_attention_mask,
            past_key_values=past_key_values,
            inputs_embeds=inputs_embeds,
            use_cache=use_cache,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        hidden_states = transformer_outputs[0]
        pooled_hidden_states = hidden_states[-1]
        if self.dropout is not None:
            pooled_hidden_states = self.dropout(pooled_hidden_states)
        logits = self.classifier_head(pooled_hidden_states)

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels > 1 and labels.dtype in (mindspore.int64, mindspore.int32):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                if self.num_labels == 1:
                    loss = ops.mse_loss(logits.squeeze().float(), labels.squeeze())
                else:
                    loss = ops.mse_loss(logits.float(), labels)
            elif self.config.problem_type == "single_label_classification":
                loss = ops.cross_entropy(logits.view(-1, self.num_labels).float(), labels.view(-1))
            elif self.config.problem_type == "multi_label_classification":
                loss = ops.binary_cross_entropy_with_logits(logits.float(), labels.view(-1, self.num_labels))

        if not return_dict:
            output = (logits,) + transformer_outputs[1:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutputWithPast(
            loss=loss,
            logits=logits,
            past_key_values=transformer_outputs.past_key_values,
            hidden_states=transformer_outputs.hidden_states,
            attentions=transformer_outputs.attentions,
        )

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2ForSequenceClassification.__init__(config, empty_init=True)

Initializes an instance of the ChatGLM2ForSequenceClassification class.

PARAMETER DESCRIPTION
self

The object itself.

config

An instance of the ChatGLM2Config class containing the configuration settings for the model.

TYPE: ChatGLM2Config

empty_init

A flag indicating whether to initialize the transformer with empty values. Defaults to True.

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION

None

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
def __init__(self, config: ChatGLM2Config, empty_init=True):
    """
    Initializes an instance of the ChatGLM2ForSequenceClassification class.

    Args:
        self: The object itself.
        config (ChatGLM2Config): An instance of the ChatGLM2Config class containing the configuration settings for the model.
        empty_init (bool): A flag indicating whether to initialize the transformer with empty values. Defaults to True.

    Returns:
        None

    Raises:
        None
    """
    super().__init__(config)

    self.num_labels = config.num_labels
    self.transformer = ChatGLM2Model(config, empty_init=empty_init)

    self.classifier_head = nn.Linear(config.hidden_size, config.num_labels, bias=True, dtype=mindspore.float16)
    if config.classifier_dropout is not None:
        self.dropout = nn.Dropout(p=config.classifier_dropout)
    else:
        self.dropout = None
    self.config = config

    if self.config.quantization_bit:
        self.quantize(self.config.quantization_bit, empty_init=True)

mindnlp.transformers.models.chatglm2.modeling_chatglm2.ChatGLM2ForSequenceClassification.forward(input_ids=None, position_ids=None, attention_mask=None, full_attention_mask=None, past_key_values=None, inputs_embeds=None, labels=None, use_cache=None, output_hidden_states=None, return_dict=None)

Constructs the ChatGLM2ForSequenceClassification model.

PARAMETER DESCRIPTION
self

The object instance.

input_ids

The input token IDs. Default: None.

TYPE: Optional[Tensor] DEFAULT: None

position_ids

The position IDs. Default: None.

TYPE: Optional[Tensor] DEFAULT: None

attention_mask

The attention mask. Default: None.

TYPE: Optional[Tensor] DEFAULT: None

full_attention_mask

The full attention mask. Default: None.

TYPE: Optional[Tensor] DEFAULT: None

past_key_values

The past key values. Default: None.

TYPE: Optional[Tuple[Tuple[Tensor, Tensor], ...]] DEFAULT: None

inputs_embeds

The input embeddings. Default: None.

TYPE: Optional[Tensor] DEFAULT: None

labels

The labels. Default: None.

TYPE: Optional[Tensor] DEFAULT: None

use_cache

Whether to use cache. Default: None.

TYPE: Optional[bool] DEFAULT: None

output_hidden_states

Whether to output hidden states. Default: None.

TYPE: Optional[bool] DEFAULT: None

return_dict

Whether to return a dictionary. Default: None.

TYPE: Optional[bool] DEFAULT: None

RETURNS DESCRIPTION
Union[Tuple[Tensor, ...], SequenceClassifierOutputWithPast]

Union[Tuple[mindspore.Tensor, ...], SequenceClassifierOutputWithPast]: The model outputs.

Source code in mindnlp/transformers/models/chatglm2/modeling_chatglm2.py
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
def forward(
        self,
        input_ids: Optional[mindspore.Tensor] = None,
        position_ids: Optional[mindspore.Tensor] = None,
        attention_mask: Optional[mindspore.Tensor] = None,
        full_attention_mask: Optional[mindspore.Tensor] = None,
        past_key_values: Optional[Tuple[Tuple[mindspore.Tensor, mindspore.Tensor], ...]] = None,
        inputs_embeds: Optional[mindspore.Tensor] = None,
        labels: Optional[mindspore.Tensor] = None,
        use_cache: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
) -> Union[Tuple[mindspore.Tensor, ...], SequenceClassifierOutputWithPast]:
    '''
    Constructs the ChatGLM2ForSequenceClassification model.

    Args:
        self: The object instance.
        input_ids (Optional[mindspore.Tensor]): The input token IDs. Default: None.
        position_ids (Optional[mindspore.Tensor]): The position IDs. Default: None.
        attention_mask (Optional[mindspore.Tensor]): The attention mask. Default: None.
        full_attention_mask (Optional[mindspore.Tensor]): The full attention mask. Default: None.
        past_key_values (Optional[Tuple[Tuple[mindspore.Tensor, mindspore.Tensor], ...]]): The past key values. Default: None.
        inputs_embeds (Optional[mindspore.Tensor]): The input embeddings. Default: None.
        labels (Optional[mindspore.Tensor]): The labels. Default: None.
        use_cache (Optional[bool]): Whether to use cache. Default: None.
        output_hidden_states (Optional[bool]): Whether to output hidden states. Default: None.
        return_dict (Optional[bool]): Whether to return a dictionary. Default: None.

    Returns:
        Union[Tuple[mindspore.Tensor, ...], SequenceClassifierOutputWithPast]: The model outputs.

    Raises:
        None.
    '''
    return_dict = return_dict if return_dict is not None else self.config.use_return_dict

    transformer_outputs = self.transformer(
        input_ids=input_ids,
        position_ids=position_ids,
        attention_mask=attention_mask,
        full_attention_mask=full_attention_mask,
        past_key_values=past_key_values,
        inputs_embeds=inputs_embeds,
        use_cache=use_cache,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
    )

    hidden_states = transformer_outputs[0]
    pooled_hidden_states = hidden_states[-1]
    if self.dropout is not None:
        pooled_hidden_states = self.dropout(pooled_hidden_states)
    logits = self.classifier_head(pooled_hidden_states)

    loss = None
    if labels is not None:
        if self.config.problem_type is None:
            if self.num_labels == 1:
                self.config.problem_type = "regression"
            elif self.num_labels > 1 and labels.dtype in (mindspore.int64, mindspore.int32):
                self.config.problem_type = "single_label_classification"
            else:
                self.config.problem_type = "multi_label_classification"

        if self.config.problem_type == "regression":
            if self.num_labels == 1:
                loss = ops.mse_loss(logits.squeeze().float(), labels.squeeze())
            else:
                loss = ops.mse_loss(logits.float(), labels)
        elif self.config.problem_type == "single_label_classification":
            loss = ops.cross_entropy(logits.view(-1, self.num_labels).float(), labels.view(-1))
        elif self.config.problem_type == "multi_label_classification":
            loss = ops.binary_cross_entropy_with_logits(logits.float(), labels.view(-1, self.num_labels))

    if not return_dict:
        output = (logits,) + transformer_outputs[1:]
        return ((loss,) + output) if loss is not None else output

    return SequenceClassifierOutputWithPast(
        loss=loss,
        logits=logits,
        past_key_values=transformer_outputs.past_key_values,
        hidden_states=transformer_outputs.hidden_states,
        attentions=transformer_outputs.attentions,
    )

mindnlp.transformers.models.chatglm2.tokenization_chatglm2.ChatGLM2Tokenizer

Bases: PreTrainedTokenizer

ChatGLM2Tokenizer

Source code in mindnlp/transformers/models/chatglm2/tokenization_chatglm2.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
class ChatGLM2Tokenizer(PreTrainedTokenizer):
    """ChatGLM2Tokenizer"""
    vocab_files_names = {"vocab_file": "tokenizer.model"}

    model_input_names = ["input_ids", "attention_mask", "position_ids"]

    def __init__(self, vocab_file, padding_side="left", clean_up_tokenization_spaces=False, **kwargs):
        """
        Initializes a ChatGLM2Tokenizer object.

        Args:
            vocab_file (str): The path to the vocabulary file used by the tokenizer.
            padding_side (str, optional): The side to pad sequences. Default is 'left'.
            clean_up_tokenization_spaces (bool, optional): Whether to clean up tokenization spaces. Default is False.
            **kwargs: Additional keyword arguments to pass to the parent class.

        Returns:
            None.

        Raises:
            None.
        """
        self.name = "GLMTokenizer"

        self.vocab_file = vocab_file
        self.tokenizer = SPTokenizer(vocab_file)
        self.special_tokens = {
            "<bos>": self.tokenizer.bos_id,
            "<eos>": self.tokenizer.eos_id,
            "<pad>": self.tokenizer.pad_id
        }
        super().__init__(padding_side=padding_side, clean_up_tokenization_spaces=clean_up_tokenization_spaces, **kwargs)

    def get_command(self, token):
        """
        This method `get_command` in the class `ChatGLM2Tokenizer` retrieves a command associated with a given token.

        Args:
            self (ChatGLM2Tokenizer): An instance of the ChatGLM2Tokenizer class.
                This parameter is used to access the special tokens and tokenizer associated with the instance.
            token (str): The token for which the associated command needs to be retrieved.
                This parameter specifies the token for which the command is to be fetched from the special tokens.

        Returns:
            None: This method returns None if the token does not match any special token.
                Otherwise, it returns the command associated with the token from the tokenizer's special tokens.

        Raises:
            AssertionError: If the provided token is not present in the special tokens of the ChatGLM2Tokenizer instance,
                an AssertionError is raised with a message indicating that the token is not a special
            token for the instance.
        """
        if token in self.special_tokens:
            return self.special_tokens[token]
        assert token in self.tokenizer.special_tokens, f"{token} is not a special token for {self.name}"
        return self.tokenizer.special_tokens[token]

    @property
    def unk_token(self) -> str:
        """
        Returns the unknown token.

        Args:
            self: An instance of the ChatGLM2Tokenizer class.

        Returns:
            str: The unknown token '<unk>'.

        Raises:
            None.
        """
        return "<unk>"

    @property
    def pad_token(self) -> str:
        """
        Method that returns the padding token for the ChatGLM2Tokenizer.

        Args:
            self: The instance of the ChatGLM2Tokenizer class.

        Returns:
            str: The padding token '<unk>' used for padding sequences during tokenization.

        Raises:
            None.
        """
        return "<unk>"

    @property
    def pad_token_id(self):
        """
        This method retrieves the token ID for the '<pad>' token in the ChatGLM2Tokenizer class.

        Args:
            self (ChatGLM2Tokenizer): The instance of the ChatGLM2Tokenizer class.
                This parameter represents the current instance of the ChatGLM2Tokenizer class.

        Returns:
           The token ID for the '<pad>' token in the ChatGLM2Tokenizer class.

        Raises:
            None.
        """
        return self.get_command("<pad>")

    @property
    def eos_token(self) -> str:
        """
        Returns the end-of-sentence token.

        This method is a property decorator that returns the end-of-sentence token as a string.

        Args:
            self: An instance of the ChatGLM2Tokenizer class.

        Returns:
            A string representing the end-of-sentence token.

        Raises:
            None.
        """
        return "</s>"

    @property
    def eos_token_id(self):
        """
        Returns the token ID for the end-of-sentence (EOS) token in the ChatGLM2Tokenizer class.

        Args:
            self (ChatGLM2Tokenizer): An instance of the ChatGLM2Tokenizer class.

        Returns:
            Token ID for the end-of-sentence (EOS) token.

        Raises:
            None: This method does not raise any exceptions.
        """
        return self.get_command("<eos>")

    @property
    def vocab_size(self):
        """
        Returns the vocabulary size of the ChatGLM2Tokenizer.

        Args:
            self (ChatGLM2Tokenizer): The instance of the ChatGLM2Tokenizer class.

        Returns:
            None.

        Raises:
            None: This method does not raise any exceptions.
        """
        return self.tokenizer.n_words

    def get_vocab(self):
        """ Returns vocab as a dict """
        vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
        vocab.update(self.added_tokens_encoder)
        return vocab

    def _tokenize(self, text, **kwargs):
        """
        Method to tokenize text using the tokenizer associated with the ChatGLM2Tokenizer class.

        Args:
            self (ChatGLM2Tokenizer): The instance of the ChatGLM2Tokenizer class.
            text (str): The input text to be tokenized.

        Returns:
            None.

        Raises:
            This method does not raise any exceptions.
        """
        return self.tokenizer.tokenize(text)

    def _convert_token_to_id(self, token):
        """ Converts a token (str) in an id using the vocab. """
        return self.tokenizer.convert_token_to_id(token)

    def _convert_id_to_token(self, index):
        """Converts an index (integer) in a token (str) using the vocab."""
        return self.tokenizer.convert_id_to_token(index)

    def convert_tokens_to_string(self, tokens: List[str]) -> str:
        """
        Converts a list of tokens into a single string representation using the ChatGLM2Tokenizer.

        Args:
            self (ChatGLM2Tokenizer): An instance of the ChatGLM2Tokenizer class.
            tokens (List[str]): A list of tokens to be converted into a string representation.

        Returns:
            str: The string representation of the given list of tokens.

        Raises:
            None.

        Note:
            The 'tokens' parameter should only contain valid tokens that are supported by the ChatGLM2Tokenizer.
            Any invalid tokens may result in unexpected behavior.

        Example:
            ```python
            >>> tokenizer = ChatGLM2Tokenizer()
            >>> tokens = ['Hello', ',', 'how', 'are', 'you', '?']
            >>> string_representation = tokenizer.convert_tokens_to_string(tokens)
            >>> # string_representation will be 'Hello, how are you?'
            ```
        """
        return self.tokenizer.decode_tokens(tokens)

    def save_vocabulary(self, save_directory, filename_prefix=None):
        """
        Save the vocabulary and special tokens file to a directory.
        Args:
            save_directory (`str`):
                The directory in which to save the vocabulary.
            filename_prefix (`str`, *optional*):
                An optional prefix to add to the named of the saved files.
        Returns:
            `Tuple(str)`: Paths to the files saved.
        """
        if os.path.isdir(save_directory):
            vocab_file = os.path.join(
                save_directory, self.vocab_files_names["vocab_file"]
            )
        else:
            vocab_file = save_directory

        with open(self.vocab_file, 'rb') as fin:
            proto_str = fin.read()

        with open(vocab_file, "wb") as writer:
            writer.write(proto_str)

        return (vocab_file,)

    def get_prefix_tokens(self):
        """
        Returns a list of prefix tokens used in the ChatGLM2Tokenizer class.

        Args:
            self: The instance of the ChatGLM2Tokenizer class.

        Returns:
            list: A list of prefix tokens used in the ChatGLM2Tokenizer class.
                The list contains two elements:

                1. The result of the self.get_command('[gMASK]') method.
                2. The result of the self.get_command('sop') method.

        Raises:
            None.
        """
        prefix_tokens = [self.get_command("[gMASK]"), self.get_command("sop")]
        return prefix_tokens

    def build_prompt(self, query, history=None):
        """
        This method builds a prompt for a chat history in the ChatGLM2Tokenizer class.

        Args:
            self: The instance of the class.
            query (str): The input query for the prompt.
            history (list): A list of tuples representing the chat history. Each tuple contains an old query and its response.

        Returns:
            str: A formatted prompt containing the chat history and the input query.

        Raises:
            None
        """
        if history is None:
            history = []
        prompt = ""
        for i, (old_query, response) in enumerate(history):
            prompt += "[Round {}]\n\n问:{}\n\n答:{}\n\n".format(i + 1, old_query, response)
        prompt += "[Round {}]\n\n问:{}\n\n答:".format(len(history) + 1, query)
        return prompt

    def build_inputs_with_special_tokens(
            self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
        adding special tokens. A BERT sequence has the following format:

        - single sequence: `[CLS] X [SEP]`
        - pair of sequences: `[CLS] A [SEP] B [SEP]`

        Args:
            token_ids_0 (`List[int]`):
                List of IDs to which the special tokens will be added.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
            `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
        """
        prefix_tokens = self.get_prefix_tokens()
        token_ids_0 = prefix_tokens + token_ids_0
        if token_ids_1 is not None:
            token_ids_0 = token_ids_0 + token_ids_1 + [self.get_command("<eos>")]
        return token_ids_0

    def _pad(
            self,
            encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
            max_length: Optional[int] = None,
            padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
            pad_to_multiple_of: Optional[int] = None,
            return_attention_mask: Optional[bool] = None,
    ) -> dict:
        """
        Pad encoded inputs (on left/right and up to predefined length or max length in the batch)

        Args:
            encoded_inputs:
                Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`).
            max_length: maximum length of the returned list and optionally padding length (see below).
                Will truncate by taking into account the special tokens.
            padding_strategy:
                PaddingStrategy to use for padding.

                - PaddingStrategy.LONGEST Pad to the longest sequence in the batch
                - PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
                - PaddingStrategy.DO_NOT_PAD: Do not pad
                - The tokenizer padding sides are defined in self.padding_side:

                    - 'left': pads on the left of the sequences
                    - 'right': pads on the right of the sequences
            pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
                This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
                `>= 7.5` (Volta).
            return_attention_mask:
                (optional) Set to False to avoid returning attention mask (default: set to model specifics)
        """
        # Load from model defaults
        assert self.padding_side == "left"

        required_input = encoded_inputs[self.model_input_names[0]]
        seq_length = len(required_input)

        if padding_strategy == PaddingStrategy.LONGEST:
            max_length = len(required_input)

        if max_length is not None and pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0):
            max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of

        needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(required_input) != max_length

        # Initialize attention mask if not present.
        if "attention_mask" not in encoded_inputs:
            encoded_inputs["attention_mask"] = [1] * seq_length

        if "position_ids" not in encoded_inputs:
            encoded_inputs["position_ids"] = list(range(seq_length))

        if needs_to_be_padded:
            difference = max_length - len(required_input)

            if "attention_mask" in encoded_inputs:
                encoded_inputs["attention_mask"] = [0] * difference + encoded_inputs["attention_mask"]
            if "position_ids" in encoded_inputs:
                encoded_inputs["position_ids"] = [0] * difference + encoded_inputs["position_ids"]
            encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input

        return encoded_inputs

mindnlp.transformers.models.chatglm2.tokenization_chatglm2.ChatGLM2Tokenizer.eos_token: str property

Returns the end-of-sentence token.

This method is a property decorator that returns the end-of-sentence token as a string.

PARAMETER DESCRIPTION
self

An instance of the ChatGLM2Tokenizer class.

RETURNS DESCRIPTION
str

A string representing the end-of-sentence token.

mindnlp.transformers.models.chatglm2.tokenization_chatglm2.ChatGLM2Tokenizer.eos_token_id property

Returns the token ID for the end-of-sentence (EOS) token in the ChatGLM2Tokenizer class.

PARAMETER DESCRIPTION
self

An instance of the ChatGLM2Tokenizer class.

TYPE: ChatGLM2Tokenizer

RETURNS DESCRIPTION

Token ID for the end-of-sentence (EOS) token.

RAISES DESCRIPTION
None

This method does not raise any exceptions.

mindnlp.transformers.models.chatglm2.tokenization_chatglm2.ChatGLM2Tokenizer.pad_token: str property

Method that returns the padding token for the ChatGLM2Tokenizer.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM2Tokenizer class.

RETURNS DESCRIPTION
str

The padding token '' used for padding sequences during tokenization.

TYPE: str

mindnlp.transformers.models.chatglm2.tokenization_chatglm2.ChatGLM2Tokenizer.pad_token_id property

This method retrieves the token ID for the '' token in the ChatGLM2Tokenizer class.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM2Tokenizer class. This parameter represents the current instance of the ChatGLM2Tokenizer class.

TYPE: ChatGLM2Tokenizer

RETURNS DESCRIPTION

The token ID for the '' token in the ChatGLM2Tokenizer class.

mindnlp.transformers.models.chatglm2.tokenization_chatglm2.ChatGLM2Tokenizer.unk_token: str property

Returns the unknown token.

PARAMETER DESCRIPTION
self

An instance of the ChatGLM2Tokenizer class.

RETURNS DESCRIPTION
str

The unknown token ''.

TYPE: str

mindnlp.transformers.models.chatglm2.tokenization_chatglm2.ChatGLM2Tokenizer.vocab_size property

Returns the vocabulary size of the ChatGLM2Tokenizer.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM2Tokenizer class.

TYPE: ChatGLM2Tokenizer

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
None

This method does not raise any exceptions.

mindnlp.transformers.models.chatglm2.tokenization_chatglm2.ChatGLM2Tokenizer.__init__(vocab_file, padding_side='left', clean_up_tokenization_spaces=False, **kwargs)

Initializes a ChatGLM2Tokenizer object.

PARAMETER DESCRIPTION
vocab_file

The path to the vocabulary file used by the tokenizer.

TYPE: str

padding_side

The side to pad sequences. Default is 'left'.

TYPE: str DEFAULT: 'left'

clean_up_tokenization_spaces

Whether to clean up tokenization spaces. Default is False.

TYPE: bool DEFAULT: False

**kwargs

Additional keyword arguments to pass to the parent class.

DEFAULT: {}

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/chatglm2/tokenization_chatglm2.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
def __init__(self, vocab_file, padding_side="left", clean_up_tokenization_spaces=False, **kwargs):
    """
    Initializes a ChatGLM2Tokenizer object.

    Args:
        vocab_file (str): The path to the vocabulary file used by the tokenizer.
        padding_side (str, optional): The side to pad sequences. Default is 'left'.
        clean_up_tokenization_spaces (bool, optional): Whether to clean up tokenization spaces. Default is False.
        **kwargs: Additional keyword arguments to pass to the parent class.

    Returns:
        None.

    Raises:
        None.
    """
    self.name = "GLMTokenizer"

    self.vocab_file = vocab_file
    self.tokenizer = SPTokenizer(vocab_file)
    self.special_tokens = {
        "<bos>": self.tokenizer.bos_id,
        "<eos>": self.tokenizer.eos_id,
        "<pad>": self.tokenizer.pad_id
    }
    super().__init__(padding_side=padding_side, clean_up_tokenization_spaces=clean_up_tokenization_spaces, **kwargs)

mindnlp.transformers.models.chatglm2.tokenization_chatglm2.ChatGLM2Tokenizer.build_inputs_with_special_tokens(token_ids_0, token_ids_1=None)

Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and adding special tokens. A BERT sequence has the following format:

  • single sequence: [CLS] X [SEP]
  • pair of sequences: [CLS] A [SEP] B [SEP]
PARAMETER DESCRIPTION
token_ids_0

List of IDs to which the special tokens will be added.

TYPE: `List[int]`

token_ids_1

Optional second list of IDs for sequence pairs.

TYPE: `List[int]`, *optional* DEFAULT: None

RETURNS DESCRIPTION
List[int]

List[int]: List of input IDs with the appropriate special tokens.

Source code in mindnlp/transformers/models/chatglm2/tokenization_chatglm2.py
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
def build_inputs_with_special_tokens(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
    """
    Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
    adding special tokens. A BERT sequence has the following format:

    - single sequence: `[CLS] X [SEP]`
    - pair of sequences: `[CLS] A [SEP] B [SEP]`

    Args:
        token_ids_0 (`List[int]`):
            List of IDs to which the special tokens will be added.
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.

    Returns:
        `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
    """
    prefix_tokens = self.get_prefix_tokens()
    token_ids_0 = prefix_tokens + token_ids_0
    if token_ids_1 is not None:
        token_ids_0 = token_ids_0 + token_ids_1 + [self.get_command("<eos>")]
    return token_ids_0

mindnlp.transformers.models.chatglm2.tokenization_chatglm2.ChatGLM2Tokenizer.build_prompt(query, history=None)

This method builds a prompt for a chat history in the ChatGLM2Tokenizer class.

PARAMETER DESCRIPTION
self

The instance of the class.

query

The input query for the prompt.

TYPE: str

history

A list of tuples representing the chat history. Each tuple contains an old query and its response.

TYPE: list DEFAULT: None

RETURNS DESCRIPTION
str

A formatted prompt containing the chat history and the input query.

Source code in mindnlp/transformers/models/chatglm2/tokenization_chatglm2.py
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
def build_prompt(self, query, history=None):
    """
    This method builds a prompt for a chat history in the ChatGLM2Tokenizer class.

    Args:
        self: The instance of the class.
        query (str): The input query for the prompt.
        history (list): A list of tuples representing the chat history. Each tuple contains an old query and its response.

    Returns:
        str: A formatted prompt containing the chat history and the input query.

    Raises:
        None
    """
    if history is None:
        history = []
    prompt = ""
    for i, (old_query, response) in enumerate(history):
        prompt += "[Round {}]\n\n问:{}\n\n答:{}\n\n".format(i + 1, old_query, response)
    prompt += "[Round {}]\n\n问:{}\n\n答:".format(len(history) + 1, query)
    return prompt

mindnlp.transformers.models.chatglm2.tokenization_chatglm2.ChatGLM2Tokenizer.convert_tokens_to_string(tokens)

Converts a list of tokens into a single string representation using the ChatGLM2Tokenizer.

PARAMETER DESCRIPTION
self

An instance of the ChatGLM2Tokenizer class.

TYPE: ChatGLM2Tokenizer

tokens

A list of tokens to be converted into a string representation.

TYPE: List[str]

RETURNS DESCRIPTION
str

The string representation of the given list of tokens.

TYPE: str

Note

The 'tokens' parameter should only contain valid tokens that are supported by the ChatGLM2Tokenizer. Any invalid tokens may result in unexpected behavior.

Example
>>> tokenizer = ChatGLM2Tokenizer()
>>> tokens = ['Hello', ',', 'how', 'are', 'you', '?']
>>> string_representation = tokenizer.convert_tokens_to_string(tokens)
>>> # string_representation will be 'Hello, how are you?'
Source code in mindnlp/transformers/models/chatglm2/tokenization_chatglm2.py
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
def convert_tokens_to_string(self, tokens: List[str]) -> str:
    """
    Converts a list of tokens into a single string representation using the ChatGLM2Tokenizer.

    Args:
        self (ChatGLM2Tokenizer): An instance of the ChatGLM2Tokenizer class.
        tokens (List[str]): A list of tokens to be converted into a string representation.

    Returns:
        str: The string representation of the given list of tokens.

    Raises:
        None.

    Note:
        The 'tokens' parameter should only contain valid tokens that are supported by the ChatGLM2Tokenizer.
        Any invalid tokens may result in unexpected behavior.

    Example:
        ```python
        >>> tokenizer = ChatGLM2Tokenizer()
        >>> tokens = ['Hello', ',', 'how', 'are', 'you', '?']
        >>> string_representation = tokenizer.convert_tokens_to_string(tokens)
        >>> # string_representation will be 'Hello, how are you?'
        ```
    """
    return self.tokenizer.decode_tokens(tokens)

mindnlp.transformers.models.chatglm2.tokenization_chatglm2.ChatGLM2Tokenizer.get_command(token)

This method get_command in the class ChatGLM2Tokenizer retrieves a command associated with a given token.

PARAMETER DESCRIPTION
self

An instance of the ChatGLM2Tokenizer class. This parameter is used to access the special tokens and tokenizer associated with the instance.

TYPE: ChatGLM2Tokenizer

token

The token for which the associated command needs to be retrieved. This parameter specifies the token for which the command is to be fetched from the special tokens.

TYPE: str

RETURNS DESCRIPTION
None

This method returns None if the token does not match any special token. Otherwise, it returns the command associated with the token from the tokenizer's special tokens.

RAISES DESCRIPTION
AssertionError

If the provided token is not present in the special tokens of the ChatGLM2Tokenizer instance, an AssertionError is raised with a message indicating that the token is not a special

Source code in mindnlp/transformers/models/chatglm2/tokenization_chatglm2.py
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def get_command(self, token):
    """
    This method `get_command` in the class `ChatGLM2Tokenizer` retrieves a command associated with a given token.

    Args:
        self (ChatGLM2Tokenizer): An instance of the ChatGLM2Tokenizer class.
            This parameter is used to access the special tokens and tokenizer associated with the instance.
        token (str): The token for which the associated command needs to be retrieved.
            This parameter specifies the token for which the command is to be fetched from the special tokens.

    Returns:
        None: This method returns None if the token does not match any special token.
            Otherwise, it returns the command associated with the token from the tokenizer's special tokens.

    Raises:
        AssertionError: If the provided token is not present in the special tokens of the ChatGLM2Tokenizer instance,
            an AssertionError is raised with a message indicating that the token is not a special
        token for the instance.
    """
    if token in self.special_tokens:
        return self.special_tokens[token]
    assert token in self.tokenizer.special_tokens, f"{token} is not a special token for {self.name}"
    return self.tokenizer.special_tokens[token]

mindnlp.transformers.models.chatglm2.tokenization_chatglm2.ChatGLM2Tokenizer.get_prefix_tokens()

Returns a list of prefix tokens used in the ChatGLM2Tokenizer class.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM2Tokenizer class.

RETURNS DESCRIPTION
list

A list of prefix tokens used in the ChatGLM2Tokenizer class. The list contains two elements:

  1. The result of the self.get_command('[gMASK]') method.
  2. The result of the self.get_command('sop') method.
Source code in mindnlp/transformers/models/chatglm2/tokenization_chatglm2.py
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
def get_prefix_tokens(self):
    """
    Returns a list of prefix tokens used in the ChatGLM2Tokenizer class.

    Args:
        self: The instance of the ChatGLM2Tokenizer class.

    Returns:
        list: A list of prefix tokens used in the ChatGLM2Tokenizer class.
            The list contains two elements:

            1. The result of the self.get_command('[gMASK]') method.
            2. The result of the self.get_command('sop') method.

    Raises:
        None.
    """
    prefix_tokens = [self.get_command("[gMASK]"), self.get_command("sop")]
    return prefix_tokens

mindnlp.transformers.models.chatglm2.tokenization_chatglm2.ChatGLM2Tokenizer.get_vocab()

Returns vocab as a dict

Source code in mindnlp/transformers/models/chatglm2/tokenization_chatglm2.py
327
328
329
330
331
def get_vocab(self):
    """ Returns vocab as a dict """
    vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
    vocab.update(self.added_tokens_encoder)
    return vocab

mindnlp.transformers.models.chatglm2.tokenization_chatglm2.ChatGLM2Tokenizer.save_vocabulary(save_directory, filename_prefix=None)

Save the vocabulary and special tokens file to a directory. Args: save_directory (str): The directory in which to save the vocabulary. filename_prefix (str, optional): An optional prefix to add to the named of the saved files. Returns: Tuple(str): Paths to the files saved.

Source code in mindnlp/transformers/models/chatglm2/tokenization_chatglm2.py
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
def save_vocabulary(self, save_directory, filename_prefix=None):
    """
    Save the vocabulary and special tokens file to a directory.
    Args:
        save_directory (`str`):
            The directory in which to save the vocabulary.
        filename_prefix (`str`, *optional*):
            An optional prefix to add to the named of the saved files.
    Returns:
        `Tuple(str)`: Paths to the files saved.
    """
    if os.path.isdir(save_directory):
        vocab_file = os.path.join(
            save_directory, self.vocab_files_names["vocab_file"]
        )
    else:
        vocab_file = save_directory

    with open(self.vocab_file, 'rb') as fin:
        proto_str = fin.read()

    with open(vocab_file, "wb") as writer:
        writer.write(proto_str)

    return (vocab_file,)