Skip to content

chatglm3

mindnlp.transformers.models.chatglm3.ChatGLM3Config

Bases: PretrainedConfig

ChatGLM2Config

Source code in mindnlp/transformers/models/chatglm2/configuration_chatglm2.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
class ChatGLM2Config(PretrainedConfig):
    """ChatGLM2Config"""
    model_type = "chatglm"
    def __init__(
        self,
        num_layers=28,
        padded_vocab_size=65024,
        hidden_size=4096,
        ffn_hidden_size=13696,
        kv_channels=128,
        num_attention_heads=32,
        seq_length=2048,
        hidden_dropout=0.0,
        classifier_dropout=None,
        attention_dropout=0.0,
        layernorm_epsilon=1e-5,
        rmsnorm=True,
        apply_residual_connection_post_layernorm=False,
        post_layer_norm=True,
        add_bias_linear=False,
        add_qkv_bias=False,
        bias_dropout_fusion=True,
        multi_query_attention=False,
        multi_query_group_num=1,
        apply_query_key_layer_scaling=True,
        attention_softmax_in_fp32=True,
        fp32_residual_connection=False,
        quantization_bit=0,
        pre_seq_len=None,
        prefix_projection=False,
        **kwargs
    ):
        """Initialize a ChatGLM2Config object.

        Args:
            self (ChatGLM2Config): An instance of the ChatGLM2Config class.
            num_layers (int, optional): The number of layers in the model. Defaults to 28.
            padded_vocab_size (int, optional): The size of the padded vocabulary. Defaults to 65024.
            hidden_size (int, optional): The size of the hidden layers. Defaults to 4096.
            ffn_hidden_size (int, optional): The size of the feed-forward network hidden layers. Defaults to 13696.
            kv_channels (int, optional): The number of channels in the key-value attention. Defaults to 128.
            num_attention_heads (int, optional): The number of attention heads. Defaults to 32.
            seq_length (int, optional): The maximum sequence length. Defaults to 2048.
            hidden_dropout (float, optional): The dropout probability for the hidden layers. Defaults to 0.0.
            classifier_dropout (float, optional): The dropout probability for the classifier layer. Defaults to None.
            attention_dropout (float, optional): The dropout probability for the attention layers. Defaults to 0.0.
            layernorm_epsilon (float, optional): The epsilon value for layer normalization. Defaults to 1e-05.
            rmsnorm (bool, optional): Whether to use RMSNorm for normalization. Defaults to True.
            apply_residual_connection_post_layernorm (bool, optional): Whether to apply residual connection after layer normalization. Defaults to False.
            post_layer_norm (bool, optional): Whether to apply layer normalization after each sublayer. Defaults to True.
            add_bias_linear (bool, optional): Whether to add bias to the linear layer. Defaults to False.
            add_qkv_bias (bool, optional): Whether to add bias to the query, key, and value layers. Defaults to False.
            bias_dropout_fusion (bool, optional): Whether to fuse bias dropout with linear layer. Defaults to True.
            multi_query_attention (bool, optional): Whether to use multi-query attention. Defaults to False.
            multi_query_group_num (int, optional): The number of groups for multi-query attention. Defaults to 1.
            apply_query_key_layer_scaling (bool, optional): Whether to apply scaling on query-key layer. Defaults to True.
            attention_softmax_in_fp32 (bool, optional): Whether to use FP32 for attention softmax. Defaults to True.
            fp32_residual_connection (bool, optional): Whether to use FP32 for residual connection. Defaults to False.
            quantization_bit (int, optional): The number of bits for quantization. Defaults to 0.
            pre_seq_len (int, optional): The length of the prefix sequence. Defaults to None.
            prefix_projection (bool, optional): Whether to use prefix projection. Defaults to False.

        Returns:
            None.

        Raises:
            None: This method does not raise any exceptions.
        """
        self.num_layers = num_layers
        self.vocab_size = padded_vocab_size
        self.padded_vocab_size = padded_vocab_size
        self.hidden_size = hidden_size
        self.ffn_hidden_size = ffn_hidden_size
        self.kv_channels = kv_channels
        self.num_attention_heads = num_attention_heads
        self.seq_length = seq_length
        self.hidden_dropout = hidden_dropout
        self.classifier_dropout = classifier_dropout
        self.attention_dropout = attention_dropout
        self.layernorm_epsilon = layernorm_epsilon
        self.rmsnorm = rmsnorm
        self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
        self.post_layer_norm = post_layer_norm
        self.add_bias_linear = add_bias_linear
        self.add_qkv_bias = add_qkv_bias
        self.bias_dropout_fusion = bias_dropout_fusion
        self.multi_query_attention = multi_query_attention
        self.multi_query_group_num = multi_query_group_num
        self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
        self.attention_softmax_in_fp32 = attention_softmax_in_fp32
        self.fp32_residual_connection = fp32_residual_connection
        self.quantization_bit = quantization_bit
        self.pre_seq_len = pre_seq_len
        self.prefix_projection = prefix_projection
        super().__init__(**kwargs)

mindnlp.transformers.models.chatglm3.ChatGLM3Config.__init__(num_layers=28, padded_vocab_size=65024, hidden_size=4096, ffn_hidden_size=13696, kv_channels=128, num_attention_heads=32, seq_length=2048, hidden_dropout=0.0, classifier_dropout=None, attention_dropout=0.0, layernorm_epsilon=1e-05, rmsnorm=True, apply_residual_connection_post_layernorm=False, post_layer_norm=True, add_bias_linear=False, add_qkv_bias=False, bias_dropout_fusion=True, multi_query_attention=False, multi_query_group_num=1, apply_query_key_layer_scaling=True, attention_softmax_in_fp32=True, fp32_residual_connection=False, quantization_bit=0, pre_seq_len=None, prefix_projection=False, **kwargs)

Initialize a ChatGLM2Config object.

PARAMETER DESCRIPTION
self

An instance of the ChatGLM2Config class.

TYPE: ChatGLM2Config

num_layers

The number of layers in the model. Defaults to 28.

TYPE: int DEFAULT: 28

padded_vocab_size

The size of the padded vocabulary. Defaults to 65024.

TYPE: int DEFAULT: 65024

hidden_size

The size of the hidden layers. Defaults to 4096.

TYPE: int DEFAULT: 4096

ffn_hidden_size

The size of the feed-forward network hidden layers. Defaults to 13696.

TYPE: int DEFAULT: 13696

kv_channels

The number of channels in the key-value attention. Defaults to 128.

TYPE: int DEFAULT: 128

num_attention_heads

The number of attention heads. Defaults to 32.

TYPE: int DEFAULT: 32

seq_length

The maximum sequence length. Defaults to 2048.

TYPE: int DEFAULT: 2048

hidden_dropout

The dropout probability for the hidden layers. Defaults to 0.0.

TYPE: float DEFAULT: 0.0

classifier_dropout

The dropout probability for the classifier layer. Defaults to None.

TYPE: float DEFAULT: None

attention_dropout

The dropout probability for the attention layers. Defaults to 0.0.

TYPE: float DEFAULT: 0.0

layernorm_epsilon

The epsilon value for layer normalization. Defaults to 1e-05.

TYPE: float DEFAULT: 1e-05

rmsnorm

Whether to use RMSNorm for normalization. Defaults to True.

TYPE: bool DEFAULT: True

apply_residual_connection_post_layernorm

Whether to apply residual connection after layer normalization. Defaults to False.

TYPE: bool DEFAULT: False

post_layer_norm

Whether to apply layer normalization after each sublayer. Defaults to True.

TYPE: bool DEFAULT: True

add_bias_linear

Whether to add bias to the linear layer. Defaults to False.

TYPE: bool DEFAULT: False

add_qkv_bias

Whether to add bias to the query, key, and value layers. Defaults to False.

TYPE: bool DEFAULT: False

bias_dropout_fusion

Whether to fuse bias dropout with linear layer. Defaults to True.

TYPE: bool DEFAULT: True

multi_query_attention

Whether to use multi-query attention. Defaults to False.

TYPE: bool DEFAULT: False

multi_query_group_num

The number of groups for multi-query attention. Defaults to 1.

TYPE: int DEFAULT: 1

apply_query_key_layer_scaling

Whether to apply scaling on query-key layer. Defaults to True.

TYPE: bool DEFAULT: True

attention_softmax_in_fp32

Whether to use FP32 for attention softmax. Defaults to True.

TYPE: bool DEFAULT: True

fp32_residual_connection

Whether to use FP32 for residual connection. Defaults to False.

TYPE: bool DEFAULT: False

quantization_bit

The number of bits for quantization. Defaults to 0.

TYPE: int DEFAULT: 0

pre_seq_len

The length of the prefix sequence. Defaults to None.

TYPE: int DEFAULT: None

prefix_projection

Whether to use prefix projection. Defaults to False.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
None

This method does not raise any exceptions.

Source code in mindnlp/transformers/models/chatglm2/configuration_chatglm2.py
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def __init__(
    self,
    num_layers=28,
    padded_vocab_size=65024,
    hidden_size=4096,
    ffn_hidden_size=13696,
    kv_channels=128,
    num_attention_heads=32,
    seq_length=2048,
    hidden_dropout=0.0,
    classifier_dropout=None,
    attention_dropout=0.0,
    layernorm_epsilon=1e-5,
    rmsnorm=True,
    apply_residual_connection_post_layernorm=False,
    post_layer_norm=True,
    add_bias_linear=False,
    add_qkv_bias=False,
    bias_dropout_fusion=True,
    multi_query_attention=False,
    multi_query_group_num=1,
    apply_query_key_layer_scaling=True,
    attention_softmax_in_fp32=True,
    fp32_residual_connection=False,
    quantization_bit=0,
    pre_seq_len=None,
    prefix_projection=False,
    **kwargs
):
    """Initialize a ChatGLM2Config object.

    Args:
        self (ChatGLM2Config): An instance of the ChatGLM2Config class.
        num_layers (int, optional): The number of layers in the model. Defaults to 28.
        padded_vocab_size (int, optional): The size of the padded vocabulary. Defaults to 65024.
        hidden_size (int, optional): The size of the hidden layers. Defaults to 4096.
        ffn_hidden_size (int, optional): The size of the feed-forward network hidden layers. Defaults to 13696.
        kv_channels (int, optional): The number of channels in the key-value attention. Defaults to 128.
        num_attention_heads (int, optional): The number of attention heads. Defaults to 32.
        seq_length (int, optional): The maximum sequence length. Defaults to 2048.
        hidden_dropout (float, optional): The dropout probability for the hidden layers. Defaults to 0.0.
        classifier_dropout (float, optional): The dropout probability for the classifier layer. Defaults to None.
        attention_dropout (float, optional): The dropout probability for the attention layers. Defaults to 0.0.
        layernorm_epsilon (float, optional): The epsilon value for layer normalization. Defaults to 1e-05.
        rmsnorm (bool, optional): Whether to use RMSNorm for normalization. Defaults to True.
        apply_residual_connection_post_layernorm (bool, optional): Whether to apply residual connection after layer normalization. Defaults to False.
        post_layer_norm (bool, optional): Whether to apply layer normalization after each sublayer. Defaults to True.
        add_bias_linear (bool, optional): Whether to add bias to the linear layer. Defaults to False.
        add_qkv_bias (bool, optional): Whether to add bias to the query, key, and value layers. Defaults to False.
        bias_dropout_fusion (bool, optional): Whether to fuse bias dropout with linear layer. Defaults to True.
        multi_query_attention (bool, optional): Whether to use multi-query attention. Defaults to False.
        multi_query_group_num (int, optional): The number of groups for multi-query attention. Defaults to 1.
        apply_query_key_layer_scaling (bool, optional): Whether to apply scaling on query-key layer. Defaults to True.
        attention_softmax_in_fp32 (bool, optional): Whether to use FP32 for attention softmax. Defaults to True.
        fp32_residual_connection (bool, optional): Whether to use FP32 for residual connection. Defaults to False.
        quantization_bit (int, optional): The number of bits for quantization. Defaults to 0.
        pre_seq_len (int, optional): The length of the prefix sequence. Defaults to None.
        prefix_projection (bool, optional): Whether to use prefix projection. Defaults to False.

    Returns:
        None.

    Raises:
        None: This method does not raise any exceptions.
    """
    self.num_layers = num_layers
    self.vocab_size = padded_vocab_size
    self.padded_vocab_size = padded_vocab_size
    self.hidden_size = hidden_size
    self.ffn_hidden_size = ffn_hidden_size
    self.kv_channels = kv_channels
    self.num_attention_heads = num_attention_heads
    self.seq_length = seq_length
    self.hidden_dropout = hidden_dropout
    self.classifier_dropout = classifier_dropout
    self.attention_dropout = attention_dropout
    self.layernorm_epsilon = layernorm_epsilon
    self.rmsnorm = rmsnorm
    self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
    self.post_layer_norm = post_layer_norm
    self.add_bias_linear = add_bias_linear
    self.add_qkv_bias = add_qkv_bias
    self.bias_dropout_fusion = bias_dropout_fusion
    self.multi_query_attention = multi_query_attention
    self.multi_query_group_num = multi_query_group_num
    self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
    self.attention_softmax_in_fp32 = attention_softmax_in_fp32
    self.fp32_residual_connection = fp32_residual_connection
    self.quantization_bit = quantization_bit
    self.pre_seq_len = pre_seq_len
    self.prefix_projection = prefix_projection
    super().__init__(**kwargs)

mindnlp.transformers.models.chatglm3.modeling_chatglm3.CHATGLM3_6B_PRETRAINED_MODEL_ARCHIVE_LIST = ['THUDM/chatglm3-6b'] module-attribute

mindnlp.transformers.models.chatglm3.modeling_chatglm3.ChatGLM3Model

Bases: ChatGLM2Model

ChatGLM3Model

Source code in mindnlp/transformers/models/chatglm3/modeling_chatglm3.py
37
38
class ChatGLM3Model(ChatGLM2Model):
    """ChatGLM3Model"""

mindnlp.transformers.models.chatglm3.modeling_chatglm3.ChatGLM3ForSequenceClassification

Bases: ChatGLM2ForSequenceClassification

ChatGLM3ForSequenceClassification

Source code in mindnlp/transformers/models/chatglm3/modeling_chatglm3.py
324
325
class ChatGLM3ForSequenceClassification(ChatGLM2ForSequenceClassification):
    """ChatGLM3ForSequenceClassification"""

mindnlp.transformers.models.chatglm3.modeling_chatglm3.ChatGLM3ForConditionalGeneration

Bases: ChatGLM2ForConditionalGeneration

ChatGLM3ForConditionalGeneration

Source code in mindnlp/transformers/models/chatglm3/modeling_chatglm3.py
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
class ChatGLM3ForConditionalGeneration(ChatGLM2ForConditionalGeneration):
    """ChatGLM3ForConditionalGeneration"""
    def process_response(self, output, history):
        """
        Process the response by splitting it into metadata and content, updating the history, and replacing placeholders.

        Args:
            self (ChatGLM3ForConditionalGeneration): An instance of the ChatGLM3ForConditionalGeneration class.
            output (str): The response string received from the model.
            history (list): The list of previous conversation history.

        Returns:
            None

        Raises:
            None
        """
        content = ""
        history = copy.deepcopy(history)
        for response in output.split("<|assistant|>"):
            if "\n" in response:
                metadata, content = response.split("\n", maxsplit=1)
            else:
                metadata, content = "", response
            if not metadata.strip():
                content = content.strip()
                history.append({"role": "assistant", "metadata": metadata, "content": content})
                content = content.replace("[[训练时间]]", "2023年")
            else:
                history.append({"role": "assistant", "metadata": metadata, "content": content})
                if history[0]["role"] == "system" and "tools" in history[0]:
                    content = "\n".join(content.split("\n")[1:-1])
                    parameters = eval(content)
                    content = {"name": metadata.strip(), "parameters": parameters}
                else:
                    content = {"name": metadata.strip(), "content": content}
        return content, history

    def chat(self, tokenizer, query: str, history: List[Dict] = None, role: str = "user",
             max_length: int = 8192, num_beams=1, do_sample=True, top_p=0.8, temperature=0.8, logits_processor=None,
             **kwargs):
        """
        This method 'chat' in the class 'ChatGLM3ForConditionalGeneration' is used to
        generate a response based on the given query in a chat scenario.

        Args:
            self: Reference to the current instance of the class.
            tokenizer: The tokenizer object used to tokenize the input text.
            query (str): The input query for which a response needs to be generated.
            history (List[Dict]): A list of dictionaries representing the chat history. Defaults to an empty list.
            role (str): The role of the current user in the conversation. Defaults to 'user'.
            max_length (int): The maximum length of the generated response. Defaults to 8192.
            num_beams (int): The number of beams to be used for beam search. Defaults to 1.
            do_sample (bool): Flag indicating whether to sample outputs. Defaults to True.
            top_p (float): The nucleus sampling probability. Defaults to 0.8.
            temperature (float): The temperature for sampling. Defaults to 0.8.
            logits_processor: An optional logits processor to post-process the model outputs.
            **kwargs: Additional keyword arguments to be passed to the generation process.

        Returns:
            None: This method does not return any value explicitly.
                It generates a response and updates the conversation history.

        Raises:
            None.
        """
        if history is None:
            history = []
        if logits_processor is None:
            logits_processor = LogitsProcessorList()
        logits_processor.append(InvalidScoreLogitsProcessor())
        gen_kwargs = {"max_length": max_length, "num_beams": num_beams, "do_sample": do_sample, "top_p": top_p,
                      "temperature": temperature, "logits_processor": logits_processor, **kwargs}
        inputs = tokenizer.build_chat_input(query, history=history, role=role)
        inputs = inputs.to(self.device)
        eos_token_id = [tokenizer.eos_token_id, tokenizer.get_command("<|user|>"),
                        tokenizer.get_command("<|observation|>")]
        outputs = self.generate(**inputs, **gen_kwargs, eos_token_id=eos_token_id)
        outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1]
        response = tokenizer.decode(outputs)
        history.append({"role": role, "content": query})
        response, history = self.process_response(response, history)
        return response, history

    def stream_chat(self, tokenizer, query: str, history: List[Dict] = None, role: str = "user",
                    past_key_values=None,max_length: int = 8192, do_sample=True, top_p=0.8, temperature=0.8,
                    logits_processor=None, return_past_key_values=False, **kwargs):
        """
        This method streams a chat response based on the given input query and history using the ChatGLM3 model for conditional generation.

        Args:
            self: The instance of the class.
            tokenizer: The tokenizer object used to tokenize the input and decode the outputs.
            query (str): The input text query for generating the chat response.
            history (List[Dict], optional): A list of dictionaries representing the chat history. Defaults to None.
            role (str): The role of the user in the conversation. Defaults to 'user'.
            past_key_values: The past key values used for generating the response. Defaults to None.
            max_length (int): The maximum length of the generated response. Defaults to 8192.
            do_sample (bool): Whether to sample from the logits during generation. Defaults to True.
            top_p (float): The nucleus sampling parameter. Defaults to 0.8.
            temperature (float): The temperature parameter for sampling. Defaults to 0.8.
            logits_processor: The logits processor used to process the model logits. Defaults to None.
            return_past_key_values (bool): Whether to return the past key values along with the response. Defaults to False.

        Returns:
            None: This method does not return any value explicitly,
                but yields the generated chat response along with the updated history if return_past_key_values is True.

        Raises:
            None.
        """
        if history is None:
            history = []
        if logits_processor is None:
            logits_processor = LogitsProcessorList()
        logits_processor.append(InvalidScoreLogitsProcessor())
        eos_token_id = [tokenizer.eos_token_id, tokenizer.get_command("<|user|>"),
                        tokenizer.get_command("<|observation|>")]
        gen_kwargs = {"max_length": max_length, "do_sample": do_sample, "top_p": top_p,
                      "temperature": temperature, "logits_processor": logits_processor, **kwargs}
        if past_key_values is None:
            inputs = tokenizer.build_chat_input(query, history=history, role=role)
        else:
            inputs = tokenizer.build_chat_input(query, role=role)

        if past_key_values is not None:
            past_length = past_key_values[0][0].shape[0]
            if self.transformer.pre_seq_len is not None:
                past_length -= self.transformer.pre_seq_len
            inputs['position_ids'] = inputs.position_ids + past_length
            attention_mask = inputs.attention_mask
            attention_mask = ops.cat((attention_mask.new_ones((1, past_length), dtype=attention_mask.dtype), attention_mask), axis=1)
            inputs['attention_mask'] = attention_mask
        history.append({"role": role, "content": query})
        for outputs in self.stream_generate(**inputs, past_key_values=past_key_values,
                                            eos_token_id=eos_token_id, return_past_key_values=return_past_key_values,
                                            **gen_kwargs):
            if return_past_key_values:
                outputs, past_key_values = outputs
            outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1]
            response = tokenizer.decode(outputs)
            if response and response[-1] != "�":
                response, new_history = self.process_response(response, history)
                if return_past_key_values:
                    yield response, new_history, past_key_values
                else:
                    yield response, new_history

    def stream_generate(
            self,
            input_ids,
            generation_config: Optional[GenerationConfig] = None,
            logits_processor: Optional[LogitsProcessorList] = None,
            stopping_criteria: Optional[StoppingCriteriaList] = None,
            prefix_allowed_tokens_fn: Optional[Callable[[int, mindspore.Tensor], List[int]]] = None,
            return_past_key_values=False,
            **kwargs,
    ):
        """
        Generate sequences of tokens based on the provided input_ids using the ChatGLM3 model for conditional generation.

        Args:
            self (ChatGLM3ForConditionalGeneration): The instance of the ChatGLM3ForConditionalGeneration class.
            input_ids (mindspore.Tensor): The input sequence of tokens.
            generation_config (Optional[GenerationConfig]):
                The configuration for the generation process. Defaults to None.
            logits_processor (Optional[LogitsProcessorList]):
                The list of logits processors for modifying the logits. Defaults to None.
            stopping_criteria (Optional[StoppingCriteriaList]):
                The list of stopping criteria for terminating the generation. Defaults to None.
            prefix_allowed_tokens_fn (Optional[Callable[[int, mindspore.Tensor], List[int]]]):
                The function to determine which tokens are allowed as prefixes during generation. Defaults to None.
            return_past_key_values (bool): Whether to return the past key values during generation. Defaults to False.
            **kwargs: Additional keyword arguments.

        Returns:
            None.

        Raises:
            UserWarning: If the `max_length` parameter is used to control the generation length, a warning is raised because this behavior is deprecated.
            UserWarning: If both `max_new_tokens` and `max_length` parameters are set, a warning is raised to indicate that `max_new_tokens` takes precedence.
            UserWarning: If the input length exceeds the `max_length` parameter, a warning is raised to consider increasing `max_new_tokens`.
        """
        _, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]

        if generation_config is None:
            generation_config = self.generation_config
        generation_config = copy.deepcopy(generation_config)
        model_kwargs = generation_config.update(**kwargs)
        model_kwargs["use_cache"] = generation_config.use_cache
        _, eos_token_id = generation_config.bos_token_id, generation_config.eos_token_id

        if isinstance(eos_token_id, int):
            eos_token_id = [eos_token_id]

        eos_token_id_tensor = mindspore.tensor(eos_token_id) if eos_token_id is not None else None

        has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
        if has_default_max_length and generation_config.max_new_tokens is None:
            warnings.warn(
                f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
                "This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
                " recommend using `max_new_tokens` to control the maximum length of the generation.",
                UserWarning,
            )
        elif generation_config.max_new_tokens is not None:
            generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
            if not has_default_max_length:
                logger.warn(
                    f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
                    f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
                    "Please refer to the documentation for more information. "
                    "(https://hf-mirror.com/docs/transformers/main/en/main_classes/text_generation)",
                    UserWarning,
                )

        if input_ids_seq_length >= generation_config.max_length:
            input_ids_string = "decoder_input_ids" if self.config.is_encoder_decoder else "input_ids"
            logger.warning(
                f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
                f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
                " increasing `max_new_tokens`."
            )

        # 2. Set generation parameters if not already defined
        logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
        stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()

        logits_processor = self._get_logits_processor(
            generation_config=generation_config,
            input_ids_seq_length=input_ids_seq_length,
            encoder_input_ids=input_ids,
            prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
            logits_processor=logits_processor,
        )

        stopping_criteria = self._get_stopping_criteria(
            generation_config=generation_config, stopping_criteria=stopping_criteria
        )
        logits_warper = self._get_logits_warper(generation_config)

        unfinished_sequences = ops.ones(input_ids.shape[0], dtype=input_ids.dtype)
        scores = None
        while True:
            model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
            # forward pass to get next token
            outputs = self(
                **model_inputs,
                return_dict=True,
                output_attentions=False,
                output_hidden_states=False,
            )

            next_token_logits = outputs.logits[:, -1, :]

            # pre-process distribution
            next_token_scores = logits_processor(input_ids, next_token_logits)
            next_token_scores = logits_warper(input_ids, next_token_scores)

            # sample
            probs = ops.softmax(next_token_scores, axis=-1)
            if generation_config.do_sample:
                next_tokens = ops.multinomial(probs, num_samples=1).squeeze(1)
            else:
                next_tokens = ops.argmax(probs, dim=-1)

            # update generated ids, model inputs, and length for next step
            input_ids = ops.cat([input_ids, next_tokens[:, None]], axis=-1)
            model_kwargs = self._update_model_kwargs_for_generation(
                outputs, model_kwargs, is_encoder_decoder=self.config.is_encoder_decoder
            )

            unfinished_sequences = unfinished_sequences.mul(
                next_tokens.tile((eos_token_id_tensor.shape[0], 1)).ne(eos_token_id_tensor.unsqueeze(1)).prod(axis=0)
            )

            if return_past_key_values:
                yield input_ids, outputs.past_key_values
            else:
                yield input_ids

            # stop when each sentence is finished, or if we exceed the maximum length
            if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
                break

mindnlp.transformers.models.chatglm3.modeling_chatglm3.ChatGLM3ForConditionalGeneration.chat(tokenizer, query, history=None, role='user', max_length=8192, num_beams=1, do_sample=True, top_p=0.8, temperature=0.8, logits_processor=None, **kwargs)

This method 'chat' in the class 'ChatGLM3ForConditionalGeneration' is used to generate a response based on the given query in a chat scenario.

PARAMETER DESCRIPTION
self

Reference to the current instance of the class.

tokenizer

The tokenizer object used to tokenize the input text.

query

The input query for which a response needs to be generated.

TYPE: str

history

A list of dictionaries representing the chat history. Defaults to an empty list.

TYPE: List[Dict] DEFAULT: None

role

The role of the current user in the conversation. Defaults to 'user'.

TYPE: str DEFAULT: 'user'

max_length

The maximum length of the generated response. Defaults to 8192.

TYPE: int DEFAULT: 8192

num_beams

The number of beams to be used for beam search. Defaults to 1.

TYPE: int DEFAULT: 1

do_sample

Flag indicating whether to sample outputs. Defaults to True.

TYPE: bool DEFAULT: True

top_p

The nucleus sampling probability. Defaults to 0.8.

TYPE: float DEFAULT: 0.8

temperature

The temperature for sampling. Defaults to 0.8.

TYPE: float DEFAULT: 0.8

logits_processor

An optional logits processor to post-process the model outputs.

DEFAULT: None

**kwargs

Additional keyword arguments to be passed to the generation process.

DEFAULT: {}

RETURNS DESCRIPTION
None

This method does not return any value explicitly. It generates a response and updates the conversation history.

Source code in mindnlp/transformers/models/chatglm3/modeling_chatglm3.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
def chat(self, tokenizer, query: str, history: List[Dict] = None, role: str = "user",
         max_length: int = 8192, num_beams=1, do_sample=True, top_p=0.8, temperature=0.8, logits_processor=None,
         **kwargs):
    """
    This method 'chat' in the class 'ChatGLM3ForConditionalGeneration' is used to
    generate a response based on the given query in a chat scenario.

    Args:
        self: Reference to the current instance of the class.
        tokenizer: The tokenizer object used to tokenize the input text.
        query (str): The input query for which a response needs to be generated.
        history (List[Dict]): A list of dictionaries representing the chat history. Defaults to an empty list.
        role (str): The role of the current user in the conversation. Defaults to 'user'.
        max_length (int): The maximum length of the generated response. Defaults to 8192.
        num_beams (int): The number of beams to be used for beam search. Defaults to 1.
        do_sample (bool): Flag indicating whether to sample outputs. Defaults to True.
        top_p (float): The nucleus sampling probability. Defaults to 0.8.
        temperature (float): The temperature for sampling. Defaults to 0.8.
        logits_processor: An optional logits processor to post-process the model outputs.
        **kwargs: Additional keyword arguments to be passed to the generation process.

    Returns:
        None: This method does not return any value explicitly.
            It generates a response and updates the conversation history.

    Raises:
        None.
    """
    if history is None:
        history = []
    if logits_processor is None:
        logits_processor = LogitsProcessorList()
    logits_processor.append(InvalidScoreLogitsProcessor())
    gen_kwargs = {"max_length": max_length, "num_beams": num_beams, "do_sample": do_sample, "top_p": top_p,
                  "temperature": temperature, "logits_processor": logits_processor, **kwargs}
    inputs = tokenizer.build_chat_input(query, history=history, role=role)
    inputs = inputs.to(self.device)
    eos_token_id = [tokenizer.eos_token_id, tokenizer.get_command("<|user|>"),
                    tokenizer.get_command("<|observation|>")]
    outputs = self.generate(**inputs, **gen_kwargs, eos_token_id=eos_token_id)
    outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1]
    response = tokenizer.decode(outputs)
    history.append({"role": role, "content": query})
    response, history = self.process_response(response, history)
    return response, history

mindnlp.transformers.models.chatglm3.modeling_chatglm3.ChatGLM3ForConditionalGeneration.process_response(output, history)

Process the response by splitting it into metadata and content, updating the history, and replacing placeholders.

PARAMETER DESCRIPTION
self

An instance of the ChatGLM3ForConditionalGeneration class.

TYPE: ChatGLM3ForConditionalGeneration

output

The response string received from the model.

TYPE: str

history

The list of previous conversation history.

TYPE: list

RETURNS DESCRIPTION

None

Source code in mindnlp/transformers/models/chatglm3/modeling_chatglm3.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def process_response(self, output, history):
    """
    Process the response by splitting it into metadata and content, updating the history, and replacing placeholders.

    Args:
        self (ChatGLM3ForConditionalGeneration): An instance of the ChatGLM3ForConditionalGeneration class.
        output (str): The response string received from the model.
        history (list): The list of previous conversation history.

    Returns:
        None

    Raises:
        None
    """
    content = ""
    history = copy.deepcopy(history)
    for response in output.split("<|assistant|>"):
        if "\n" in response:
            metadata, content = response.split("\n", maxsplit=1)
        else:
            metadata, content = "", response
        if not metadata.strip():
            content = content.strip()
            history.append({"role": "assistant", "metadata": metadata, "content": content})
            content = content.replace("[[训练时间]]", "2023年")
        else:
            history.append({"role": "assistant", "metadata": metadata, "content": content})
            if history[0]["role"] == "system" and "tools" in history[0]:
                content = "\n".join(content.split("\n")[1:-1])
                parameters = eval(content)
                content = {"name": metadata.strip(), "parameters": parameters}
            else:
                content = {"name": metadata.strip(), "content": content}
    return content, history

mindnlp.transformers.models.chatglm3.modeling_chatglm3.ChatGLM3ForConditionalGeneration.stream_chat(tokenizer, query, history=None, role='user', past_key_values=None, max_length=8192, do_sample=True, top_p=0.8, temperature=0.8, logits_processor=None, return_past_key_values=False, **kwargs)

This method streams a chat response based on the given input query and history using the ChatGLM3 model for conditional generation.

PARAMETER DESCRIPTION
self

The instance of the class.

tokenizer

The tokenizer object used to tokenize the input and decode the outputs.

query

The input text query for generating the chat response.

TYPE: str

history

A list of dictionaries representing the chat history. Defaults to None.

TYPE: List[Dict] DEFAULT: None

role

The role of the user in the conversation. Defaults to 'user'.

TYPE: str DEFAULT: 'user'

past_key_values

The past key values used for generating the response. Defaults to None.

DEFAULT: None

max_length

The maximum length of the generated response. Defaults to 8192.

TYPE: int DEFAULT: 8192

do_sample

Whether to sample from the logits during generation. Defaults to True.

TYPE: bool DEFAULT: True

top_p

The nucleus sampling parameter. Defaults to 0.8.

TYPE: float DEFAULT: 0.8

temperature

The temperature parameter for sampling. Defaults to 0.8.

TYPE: float DEFAULT: 0.8

logits_processor

The logits processor used to process the model logits. Defaults to None.

DEFAULT: None

return_past_key_values

Whether to return the past key values along with the response. Defaults to False.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
None

This method does not return any value explicitly, but yields the generated chat response along with the updated history if return_past_key_values is True.

Source code in mindnlp/transformers/models/chatglm3/modeling_chatglm3.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
def stream_chat(self, tokenizer, query: str, history: List[Dict] = None, role: str = "user",
                past_key_values=None,max_length: int = 8192, do_sample=True, top_p=0.8, temperature=0.8,
                logits_processor=None, return_past_key_values=False, **kwargs):
    """
    This method streams a chat response based on the given input query and history using the ChatGLM3 model for conditional generation.

    Args:
        self: The instance of the class.
        tokenizer: The tokenizer object used to tokenize the input and decode the outputs.
        query (str): The input text query for generating the chat response.
        history (List[Dict], optional): A list of dictionaries representing the chat history. Defaults to None.
        role (str): The role of the user in the conversation. Defaults to 'user'.
        past_key_values: The past key values used for generating the response. Defaults to None.
        max_length (int): The maximum length of the generated response. Defaults to 8192.
        do_sample (bool): Whether to sample from the logits during generation. Defaults to True.
        top_p (float): The nucleus sampling parameter. Defaults to 0.8.
        temperature (float): The temperature parameter for sampling. Defaults to 0.8.
        logits_processor: The logits processor used to process the model logits. Defaults to None.
        return_past_key_values (bool): Whether to return the past key values along with the response. Defaults to False.

    Returns:
        None: This method does not return any value explicitly,
            but yields the generated chat response along with the updated history if return_past_key_values is True.

    Raises:
        None.
    """
    if history is None:
        history = []
    if logits_processor is None:
        logits_processor = LogitsProcessorList()
    logits_processor.append(InvalidScoreLogitsProcessor())
    eos_token_id = [tokenizer.eos_token_id, tokenizer.get_command("<|user|>"),
                    tokenizer.get_command("<|observation|>")]
    gen_kwargs = {"max_length": max_length, "do_sample": do_sample, "top_p": top_p,
                  "temperature": temperature, "logits_processor": logits_processor, **kwargs}
    if past_key_values is None:
        inputs = tokenizer.build_chat_input(query, history=history, role=role)
    else:
        inputs = tokenizer.build_chat_input(query, role=role)

    if past_key_values is not None:
        past_length = past_key_values[0][0].shape[0]
        if self.transformer.pre_seq_len is not None:
            past_length -= self.transformer.pre_seq_len
        inputs['position_ids'] = inputs.position_ids + past_length
        attention_mask = inputs.attention_mask
        attention_mask = ops.cat((attention_mask.new_ones((1, past_length), dtype=attention_mask.dtype), attention_mask), axis=1)
        inputs['attention_mask'] = attention_mask
    history.append({"role": role, "content": query})
    for outputs in self.stream_generate(**inputs, past_key_values=past_key_values,
                                        eos_token_id=eos_token_id, return_past_key_values=return_past_key_values,
                                        **gen_kwargs):
        if return_past_key_values:
            outputs, past_key_values = outputs
        outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1]
        response = tokenizer.decode(outputs)
        if response and response[-1] != "�":
            response, new_history = self.process_response(response, history)
            if return_past_key_values:
                yield response, new_history, past_key_values
            else:
                yield response, new_history

mindnlp.transformers.models.chatglm3.modeling_chatglm3.ChatGLM3ForConditionalGeneration.stream_generate(input_ids, generation_config=None, logits_processor=None, stopping_criteria=None, prefix_allowed_tokens_fn=None, return_past_key_values=False, **kwargs)

Generate sequences of tokens based on the provided input_ids using the ChatGLM3 model for conditional generation.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM3ForConditionalGeneration class.

TYPE: ChatGLM3ForConditionalGeneration

input_ids

The input sequence of tokens.

TYPE: Tensor

generation_config

The configuration for the generation process. Defaults to None.

TYPE: Optional[GenerationConfig] DEFAULT: None

logits_processor

The list of logits processors for modifying the logits. Defaults to None.

TYPE: Optional[LogitsProcessorList] DEFAULT: None

stopping_criteria

The list of stopping criteria for terminating the generation. Defaults to None.

TYPE: Optional[StoppingCriteriaList] DEFAULT: None

prefix_allowed_tokens_fn

The function to determine which tokens are allowed as prefixes during generation. Defaults to None.

TYPE: Optional[Callable[[int, Tensor], List[int]]] DEFAULT: None

return_past_key_values

Whether to return the past key values during generation. Defaults to False.

TYPE: bool DEFAULT: False

**kwargs

Additional keyword arguments.

DEFAULT: {}

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
UserWarning

If the max_length parameter is used to control the generation length, a warning is raised because this behavior is deprecated.

UserWarning

If both max_new_tokens and max_length parameters are set, a warning is raised to indicate that max_new_tokens takes precedence.

UserWarning

If the input length exceeds the max_length parameter, a warning is raised to consider increasing max_new_tokens.

Source code in mindnlp/transformers/models/chatglm3/modeling_chatglm3.py
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
def stream_generate(
        self,
        input_ids,
        generation_config: Optional[GenerationConfig] = None,
        logits_processor: Optional[LogitsProcessorList] = None,
        stopping_criteria: Optional[StoppingCriteriaList] = None,
        prefix_allowed_tokens_fn: Optional[Callable[[int, mindspore.Tensor], List[int]]] = None,
        return_past_key_values=False,
        **kwargs,
):
    """
    Generate sequences of tokens based on the provided input_ids using the ChatGLM3 model for conditional generation.

    Args:
        self (ChatGLM3ForConditionalGeneration): The instance of the ChatGLM3ForConditionalGeneration class.
        input_ids (mindspore.Tensor): The input sequence of tokens.
        generation_config (Optional[GenerationConfig]):
            The configuration for the generation process. Defaults to None.
        logits_processor (Optional[LogitsProcessorList]):
            The list of logits processors for modifying the logits. Defaults to None.
        stopping_criteria (Optional[StoppingCriteriaList]):
            The list of stopping criteria for terminating the generation. Defaults to None.
        prefix_allowed_tokens_fn (Optional[Callable[[int, mindspore.Tensor], List[int]]]):
            The function to determine which tokens are allowed as prefixes during generation. Defaults to None.
        return_past_key_values (bool): Whether to return the past key values during generation. Defaults to False.
        **kwargs: Additional keyword arguments.

    Returns:
        None.

    Raises:
        UserWarning: If the `max_length` parameter is used to control the generation length, a warning is raised because this behavior is deprecated.
        UserWarning: If both `max_new_tokens` and `max_length` parameters are set, a warning is raised to indicate that `max_new_tokens` takes precedence.
        UserWarning: If the input length exceeds the `max_length` parameter, a warning is raised to consider increasing `max_new_tokens`.
    """
    _, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]

    if generation_config is None:
        generation_config = self.generation_config
    generation_config = copy.deepcopy(generation_config)
    model_kwargs = generation_config.update(**kwargs)
    model_kwargs["use_cache"] = generation_config.use_cache
    _, eos_token_id = generation_config.bos_token_id, generation_config.eos_token_id

    if isinstance(eos_token_id, int):
        eos_token_id = [eos_token_id]

    eos_token_id_tensor = mindspore.tensor(eos_token_id) if eos_token_id is not None else None

    has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
    if has_default_max_length and generation_config.max_new_tokens is None:
        warnings.warn(
            f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
            "This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
            " recommend using `max_new_tokens` to control the maximum length of the generation.",
            UserWarning,
        )
    elif generation_config.max_new_tokens is not None:
        generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
        if not has_default_max_length:
            logger.warn(
                f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
                f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
                "Please refer to the documentation for more information. "
                "(https://hf-mirror.com/docs/transformers/main/en/main_classes/text_generation)",
                UserWarning,
            )

    if input_ids_seq_length >= generation_config.max_length:
        input_ids_string = "decoder_input_ids" if self.config.is_encoder_decoder else "input_ids"
        logger.warning(
            f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
            f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
            " increasing `max_new_tokens`."
        )

    # 2. Set generation parameters if not already defined
    logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
    stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()

    logits_processor = self._get_logits_processor(
        generation_config=generation_config,
        input_ids_seq_length=input_ids_seq_length,
        encoder_input_ids=input_ids,
        prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
        logits_processor=logits_processor,
    )

    stopping_criteria = self._get_stopping_criteria(
        generation_config=generation_config, stopping_criteria=stopping_criteria
    )
    logits_warper = self._get_logits_warper(generation_config)

    unfinished_sequences = ops.ones(input_ids.shape[0], dtype=input_ids.dtype)
    scores = None
    while True:
        model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
        # forward pass to get next token
        outputs = self(
            **model_inputs,
            return_dict=True,
            output_attentions=False,
            output_hidden_states=False,
        )

        next_token_logits = outputs.logits[:, -1, :]

        # pre-process distribution
        next_token_scores = logits_processor(input_ids, next_token_logits)
        next_token_scores = logits_warper(input_ids, next_token_scores)

        # sample
        probs = ops.softmax(next_token_scores, axis=-1)
        if generation_config.do_sample:
            next_tokens = ops.multinomial(probs, num_samples=1).squeeze(1)
        else:
            next_tokens = ops.argmax(probs, dim=-1)

        # update generated ids, model inputs, and length for next step
        input_ids = ops.cat([input_ids, next_tokens[:, None]], axis=-1)
        model_kwargs = self._update_model_kwargs_for_generation(
            outputs, model_kwargs, is_encoder_decoder=self.config.is_encoder_decoder
        )

        unfinished_sequences = unfinished_sequences.mul(
            next_tokens.tile((eos_token_id_tensor.shape[0], 1)).ne(eos_token_id_tensor.unsqueeze(1)).prod(axis=0)
        )

        if return_past_key_values:
            yield input_ids, outputs.past_key_values
        else:
            yield input_ids

        # stop when each sentence is finished, or if we exceed the maximum length
        if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
            break

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer

Bases: PreTrainedTokenizer

The 'ChatGLM3Tokenizer' class represents a tokenizer for a chat model that inherits from PreTrainedTokenizer. It provides methods for tokenizing, converting tokens to IDs, converting IDs to tokens, building model inputs with special tokens, padding, and saving the vocabulary. The class also provides methods for forwarding chat inputs, building single messages, and extracting prefix tokens. Additionally, it offers properties for accessing special tokens and their IDs, as well as the vocabulary size. Furthermore, it provides a method for converting tokens to a string.

ATTRIBUTE DESCRIPTION
name

Name of the tokenizer.

TYPE: str

vocab_file

Path to the vocabulary file.

TYPE: str

tokenizer

Instance of the SPTokenizer for tokenization.

TYPE: SPTokenizer

special_tokens

Dictionary of special tokens and their corresponding IDs.

TYPE: dict

encode_special_tokens

Flag indicating whether to encode special tokens.

TYPE: bool

  • unk_token (str): Property for accessing the unknown token.
  • pad_token (str): Property for accessing the padding token.
  • eos_token (str): Property for accessing the end-of-sequence token.
  • unk_token_id (int): Property for accessing the ID of the unknown token.
  • pad_token_id (int): Property for accessing the ID of the padding token.
  • eos_token_id (int): Property for accessing the ID of the end-of-sequence token.
  • vocab_size (int): Property for accessing the size of the vocabulary.
METHOD DESCRIPTION
get_command

Retrieves the ID of a given token.

get_vocab

Returns the vocabulary as a dictionary.

_tokenize

Tokenizes the input text.

_convert_token_to_id

Converts a token to its corresponding ID.

_convert_id_to_token

Converts an ID to its corresponding token.

convert_tokens_to_string

Converts a list of tokens to a string.

save_vocabulary

Saves the vocabulary to a directory.

get_prefix_tokens

Retrieves prefix tokens.

build_single_message

Constructs a single message with role, metadata, and message.

build_chat_input

Constructs chat input from a query and history.

build_inputs_with_special_tokens

Builds model inputs with special tokens.

_pad

Pads encoded inputs according to specified parameters.

The 'ChatGLM3Tokenizer' class provides a comprehensive set of methods for tokenization and model input forwardion, making it suitable for use in chat model applications.

Source code in mindnlp/transformers/models/chatglm3/tokenization_chatglm3.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
class ChatGLM3Tokenizer(PreTrainedTokenizer):

    """
    The 'ChatGLM3Tokenizer' class represents a tokenizer for a chat model that inherits from PreTrainedTokenizer.
    It provides methods for tokenizing, converting tokens to IDs, converting IDs to tokens, building model inputs
    with special tokens, padding, and saving the vocabulary.
    The class also provides methods for forwarding chat inputs, building single messages, and extracting prefix tokens.
    Additionally, it offers properties for accessing special tokens and their IDs, as well as the vocabulary size.
    Furthermore, it provides a method for converting tokens to a string.

    Attributes:
        name (str): Name of the tokenizer.
        vocab_file (str): Path to the vocabulary file.
        tokenizer (SPTokenizer): Instance of the SPTokenizer for tokenization.
        special_tokens (dict): Dictionary of special tokens and their corresponding IDs.
        encode_special_tokens (bool): Flag indicating whether to encode special tokens.

    Properties:

    - unk_token (str): Property for accessing the unknown token.
    - pad_token (str): Property for accessing the padding token.
    - eos_token (str): Property for accessing the end-of-sequence token.
    - unk_token_id (int): Property for accessing the ID of the unknown token.
    - pad_token_id (int): Property for accessing the ID of the padding token.
    - eos_token_id (int): Property for accessing the ID of the end-of-sequence token.
    - vocab_size (int): Property for accessing the size of the vocabulary.

    Methods:
        get_command(token): Retrieves the ID of a given token.
        get_vocab(): Returns the vocabulary as a dictionary.
        _tokenize(text, **kwargs): Tokenizes the input text.
        _convert_token_to_id(token): Converts a token to its corresponding ID.
        _convert_id_to_token(index): Converts an ID to its corresponding token.
        convert_tokens_to_string(tokens): Converts a list of tokens to a string.
        save_vocabulary(save_directory, filename_prefix=None): Saves the vocabulary to a directory.
        get_prefix_tokens(): Retrieves prefix tokens.
        build_single_message(role, metadata, message): Constructs a single message with role, metadata, and message.
        build_chat_input(query, history=None, role='user'): Constructs chat input from a query and history.
        build_inputs_with_special_tokens(token_ids_0, token_ids_1=None): Builds model inputs with special tokens.
        _pad(encoded_inputs, max_length=None, padding_strategy=PaddingStrategy.DO_NOT_PAD, pad_to_multiple_of=None, return_attention_mask=None):
            Pads encoded inputs according to specified parameters.

    The 'ChatGLM3Tokenizer' class provides a comprehensive set of methods for tokenization and model input forwardion,
    making it suitable for use in chat model applications.
    """
    vocab_files_names = {"vocab_file": "tokenizer.model"}
    model_input_names = ["input_ids", "attention_mask", "position_ids"]

    def __init__(
        self,
        vocab_file,
        padding_side="left",
        clean_up_tokenization_spaces=False,
        encode_special_tokens=False,
        **kwargs
    ):
        """
        Initialize a ChatGLM3Tokenizer object.

        Args:
            vocab_file (str): The path to the vocabulary file.
            padding_side (str, optional): Specifies whether padding should be added to the 'left' or 'right'
                side of the input sequences. Default is 'left'.
            clean_up_tokenization_spaces (bool, optional): If True, clean up tokenization spaces. Default is False.
            encode_special_tokens (bool, optional): If True, special tokens will be encoded. Default is False.
            **kwargs: Additional keyword arguments.

        Returns:
            None.

        Raises:
            None.
        """
        self.name = "GLMTokenizer"
        self.vocab_file = vocab_file
        self.tokenizer = SPTokenizer(vocab_file)
        self.special_tokens = {
            "<bos>": self.tokenizer.bos_id,
            "<eos>": self.tokenizer.eos_id,
            "<unk>": self.tokenizer.pad_id,
            "<pad>": self.tokenizer.pad_id
        }
        self.encode_special_tokens = encode_special_tokens

        super().__init__(
            padding_side=padding_side,
            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
            **kwargs
        )

    def get_command(self, token):
        """
        Retrieves the command associated with a given token.

        Args:
            self (ChatGLM3Tokenizer): An instance of the ChatGLM3Tokenizer class.
            token (str): The token for which the command needs to be retrieved.

        Returns:
            None: This method returns None.

        Raises:
            AssertionError: If the token is not a special token for the ChatGLM3Tokenizer instance.

        Note:
            This method checks if the given token is one of the special tokens stored in the self.special_tokens dictionary.
            If it is, the corresponding command is returned. Otherwise, an assertion error is raised if the token is not a
            special token for the ChatGLM3Tokenizer instance.
        """
        if token in self.special_tokens:
            return self.special_tokens[token]
        assert token in self.tokenizer.special_tokens, f"{token} is not a special token for {self.name}"
        return self.tokenizer.special_tokens[token]

    @property
    def unk_token(self) -> str:
        """
        This method 'unk_token' in the class 'ChatGLM3Tokenizer' retrieves the unknown token from the tokenizer.

        Args:
            self: An instance of the ChatGLM3Tokenizer class.

        Returns:
            str: The unknown token retrieved from the tokenizer.

        Raises:
            No specific exceptions are raised within this method.
        """
        return self.tokenizer.sp_model.IdToPiece(self.get_command("<unk>"))

    @property
    def pad_token(self) -> str:
        """
        This method returns the string representation of the padding token used in the ChatGLM3Tokenizer.

        Args:
            self: The instance of the ChatGLM3Tokenizer class.

        Returns:
            str: The string representation of the padding token.

        Raises:
            None
        """
        return self.tokenizer.sp_model.IdToPiece(self.get_command("<pad>"))

    @property
    def eos_token(self) -> str:
        """
        Returns the end-of-sentence token as a string.

        Args:
            self: An instance of the ChatGLM3Tokenizer class.

        Returns:
            A string representing the end-of-sentence token.

        Raises:
            None.
        """
        return self.tokenizer.sp_model.IdToPiece(self.get_command("<eos>"))

    @property
    def unk_token_id(self) -> int:
        """
        This method returns the token ID corresponding to the '<unk>' token in the ChatGLM3Tokenizer class.

        Args:
            self: A reference to the instance of the ChatGLM3Tokenizer class.

        Returns:
            int: An integer representing the token ID of the '<unk>' token in the tokenizer.

        Raises:
            This method does not explicitly raise any exceptions.
        """
        return self.get_command("<unk>")

    @property
    def pad_token_id(self) -> int:
        """
        This method returns the token ID for the padding token within the ChatGLM3Tokenizer class.

        Args:
            self: An instance of the ChatGLM3Tokenizer class.

        Returns:
            int: The token ID corresponding to the '<pad>' token.

        Raises:
            - None
        """
        return self.get_command("<pad>")

    @property
    def eos_token_id(self):
        """
        Returns the ID of the end-of-sentence (EOS) token in the ChatGLM3Tokenizer class.

        Args:
            self (ChatGLM3Tokenizer): An instance of the ChatGLM3Tokenizer class.

        Returns:
            None.

        Raises:
            None: This method does not raise any exceptions.
        """
        return self.get_command("<eos>")

    @unk_token.setter
    def unk_token(self, value):
        """
        Method 'unk_token' in the class 'ChatGLM3Tokenizer'.

        Args:
            self (object):
                Reference to the instance of ChatGLM3Tokenizer.

                - Purpose: Represents the current object instance.
                - Restrictions: Must be an instance of ChatGLM3Tokenizer.

            value (any):
                The new value to set for the unk_token attribute.

                - Purpose: Specifies the value to set for the unk_token attribute.
                - Restrictions: None.

        Returns:
            None:
                - Purpose: There is no return value from this method.

        Raises:
            None:
                No exceptions are raised explicitly within this method.
        """
        logger.warning("Setting unk_token is not supported, use the default one.")

    @pad_token.setter
    def pad_token(self, value):
        """Set the pad_token value for the ChatGLM3Tokenizer.

        This method sets the pad_token value for the ChatGLM3Tokenizer object.
        The pad_token value is used during tokenization to represent padding tokens.
        If this method is called, a warning message will be logged indicating that setting pad_token is not supported
        and the default pad_token value will be used instead.

        Args:
            self (ChatGLM3Tokenizer): The ChatGLM3Tokenizer object.
            value (Any): The value to set as the pad_token.

        Returns:
            None.

        Raises:
            None: This method does not raise any exceptions.
        """
        logger.warning("Setting pad_token is not supported, use the default one.")

    @eos_token.setter
    def eos_token(self, value):
        """
        Method to set the end-of-sequence token for the ChatGLM3Tokenizer class.

        Args:
            self (ChatGLM3Tokenizer): The instance of the ChatGLM3Tokenizer class.
            value (Any): The value to be set as the end-of-sequence token.
                This parameter is not used for setting the end-of-sequence token, as it is a read-only property.

        Returns:
            None.

        Raises:
            None.
        """
        logger.warning("Setting eos_token is not supported, use the default one.")

    @property
    def vocab_size(self):
        """
        This method retrieves the vocabulary size from the ChatGLM3Tokenizer instance.

        Args:
            self (ChatGLM3Tokenizer): The instance of the ChatGLM3Tokenizer class.
                It represents the tokenizer used for processing the text data.

        Returns:
            int: The vocabulary size of the tokenizer.
                It indicates the total number of unique words present in the tokenizer's vocabulary.

        Raises:
            None
        """
        return self.tokenizer.n_words

    def get_vocab(self):
        """ Returns vocab as a dict """
        vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
        vocab.update(self.added_tokens_encoder)
        return vocab

    def _tokenize(self, text, **kwargs):
        """
        This method tokenizes the input text using the specified tokenizer.

        Args:
            text (str): The input text to be tokenized.
            **kwargs: Additional keyword arguments to be passed to the tokenizer.

        Returns:
            None.

        Raises:
            Any exceptions raised by the underlying 'tokenizer.tokenize' method may be propagated.
        """
        return self.tokenizer.tokenize(text, encode_special_tokens=self.encode_special_tokens)

    def _convert_token_to_id(self, token):
        """ Converts a token (str) in an id using the vocab. """
        return self.tokenizer.convert_token_to_id(token)

    def _convert_id_to_token(self, index):
        """Converts an index (integer) in a token (str) using the vocab."""
        return self.tokenizer.convert_id_to_token(index)

    def convert_tokens_to_string(self, tokens: List[str]) -> str:
        """
        Converts a list of tokens into a string representation using the ChatGLM3Tokenizer.

        Args:
            self (ChatGLM3Tokenizer): An instance of the ChatGLM3Tokenizer class.
            tokens (List[str]): A list of tokens to be converted into a string.

        Returns:
            str: The string representation of the tokens.

        Raises:
            None.

        This method takes in an instance of the ChatGLM3Tokenizer class and a list of tokens as input.
        It then uses the tokenizer's 'decode_tokens' method to convert the tokens into a string representation.
        The resulting string is returned as the output.

        The 'self' parameter is a reference to the current instance of the ChatGLM3Tokenizer class.
        It is used to access the tokenizer object and its methods.

        The 'tokens' parameter is a list of strings representing the tokens to be converted into a string.
        The tokens should be in the same order as they were generated by the tokenizer.

        The return value is a string representation of the tokens.
        This can be useful for displaying or manipulating the tokens in a human-readable format.

        This method does not raise any exceptions.
        """
        return self.tokenizer.decode_tokens(tokens)

    def save_vocabulary(self, save_directory, filename_prefix=None):
        """
        Save the vocabulary and special tokens file to a directory.

        Args:
            save_directory (`str`):
                The directory in which to save the vocabulary.
            filename_prefix (`str`, *optional*):
                An optional prefix to add to the named of the saved files.

        Returns:
            `Tuple(str)`: Paths to the files saved.
        """
        if os.path.isdir(save_directory):
            vocab_file = os.path.join(
                save_directory, self.vocab_files_names["vocab_file"]
            )
        else:
            vocab_file = save_directory

        with open(self.vocab_file, 'rb') as fin:
            proto_str = fin.read()

        with open(vocab_file, "wb") as writer:
            writer.write(proto_str)

        return (vocab_file,)

    def get_prefix_tokens(self):
        """
        This method 'get_prefix_tokens' is defined within the 'ChatGLM3Tokenizer' class and retrieves a list of prefix tokens.

        Args:
            self: A reference to the instance of the class. It is used to access the instance variables and methods of the class.

        Returns:
            Returns a list of prefix tokens which is a combination of the commands '[gMASK]' and 'sop'.

        Raises:
            This method does not raise any exceptions.
        """
        prefix_tokens = [self.get_command("[gMASK]"), self.get_command("sop")]
        return prefix_tokens

    def build_single_message(self, role, metadata, message):
        """
        Builds a single message token for the ChatGLM3Tokenizer.

        Args:
            self (ChatGLM3Tokenizer): The instance of the ChatGLM3Tokenizer class.
            role (str): The role of the message sender. It should be one of ['system', 'user', 'assistant', 'observation'].
            metadata (str): The metadata associated with the message.
            message (str): The actual message content.

        Returns:
            list: A list of tokens representing the single message built from the role, metadata, and message.

        Raises:
            AssertionError: If the 'role' parameter is not one of ['system', 'user', 'assistant', 'observation'].
        """
        assert role in ["system", "user", "assistant", "observation"], role
        role_tokens = [self.get_command(f"<|{role}|>")] + self.tokenizer.encode(f"{metadata}\n")
        message_tokens = self.tokenizer.encode(message)
        tokens = role_tokens + message_tokens
        return tokens

    def build_chat_input(self, query, history=None, role="user"):
        """
        This method builds input for a chat conversation in the ChatGLM3Tokenizer class.

        Args:
            self: The instance of the ChatGLM3Tokenizer class.
            query (str): The user's input for the chat conversation.
            history (list): A list of dictionaries representing the chat history.
                Each dictionary should have the keys 'role' (str), 'metadata' (str), and 'content' (str).

                - The 'role' key specifies the role of the participant in the conversation (either 'user' or 'system').
                - The 'metadata' key contains optional metadata for the message.
                - The 'content' key contains the actual text content of the message.
            role (str): The role of the participant for the current input. It can be either 'user' or 'system'.

        Returns:
            None: This method builds the input for the chat conversation and does not return any value.

        Raises:
            TypeError: If the input_ids are not of the expected type.
            ValueError: If the return_tensors parameter is not set to 'ms'.
            KeyError: If the role provided is not valid (i.e., not 'user' or 'system').
            JSONDecodeError: If there is an error in decoding the JSON content of the message.
            AttributeError: If the 'tools' key is missing in the history item when the role is 'system'.
        """
        if history is None:
            history = []
        input_ids = []
        for item in history:
            content = item["content"]
            if item["role"] == "system" and "tools" in item:
                content = content + "\n" + json.dumps(item["tools"], indent=4, ensure_ascii=False)
            input_ids.extend(self.build_single_message(item["role"], item.get("metadata", ""), content))
        input_ids.extend(self.build_single_message(role, "", query))
        input_ids.extend([self.get_command("<|assistant|>")])
        return self.batch_encode_plus([input_ids], return_tensors="ms", is_split_into_words=True)

    def build_inputs_with_special_tokens(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
        adding special tokens. A BERT sequence has the following format:

        - single sequence: `[CLS] X [SEP]`
        - pair of sequences: `[CLS] A [SEP] B [SEP]`

        Args:
            token_ids_0 (`List[int]`):
                List of IDs to which the special tokens will be added.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
            `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
        """
        prefix_tokens = self.get_prefix_tokens()
        token_ids_0 = prefix_tokens + token_ids_0
        if token_ids_1 is not None:
            token_ids_0 = token_ids_0 + token_ids_1 + [self.get_command("<eos>")]
        return token_ids_0

    def _pad(
        self,
        encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
        max_length: Optional[int] = None,
        padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
        pad_to_multiple_of: Optional[int] = None,
        return_attention_mask: Optional[bool] = None,
    ) -> dict:
        """
        Pad encoded inputs (on left/right and up to predefined length or max length in the batch)

        Args:
            encoded_inputs:
                Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`).
            max_length: maximum length of the returned list and optionally padding length (see below).
                Will truncate by taking into account the special tokens.
            padding_strategy:
                PaddingStrategy to use for padding.

                - PaddingStrategy.LONGEST Pad to the longest sequence in the batch
                - PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
                - PaddingStrategy.DO_NOT_PAD: Do not pad
                - The tokenizer padding sides are defined in self.padding_side:

                    - 'left': pads on the left of the sequences
                    - 'right': pads on the right of the sequences
            pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
                This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
                `>= 7.5` (Volta).
            return_attention_mask:
                (optional) Set to False to avoid returning attention mask (default: set to model specifics)
        """
        # Load from model defaults
        assert self.padding_side == "left"

        required_input = encoded_inputs[self.model_input_names[0]]
        seq_length = len(required_input)

        if padding_strategy == PaddingStrategy.LONGEST:
            max_length = len(required_input)

        if max_length is not None and pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0):
            max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of

        needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(required_input) != max_length

        # Initialize attention mask if not present.
        if "attention_mask" not in encoded_inputs:
            encoded_inputs["attention_mask"] = [1] * seq_length

        if "position_ids" not in encoded_inputs:
            encoded_inputs["position_ids"] = list(range(seq_length))

        if needs_to_be_padded:
            difference = max_length - len(required_input)

            if "attention_mask" in encoded_inputs:
                encoded_inputs["attention_mask"] = [0] * difference + encoded_inputs["attention_mask"]
            if "position_ids" in encoded_inputs:
                encoded_inputs["position_ids"] = [0] * difference + encoded_inputs["position_ids"]
            encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input

        return encoded_inputs

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer.eos_token: str property writable

Returns the end-of-sentence token as a string.

PARAMETER DESCRIPTION
self

An instance of the ChatGLM3Tokenizer class.

RETURNS DESCRIPTION
str

A string representing the end-of-sentence token.

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer.eos_token_id property

Returns the ID of the end-of-sentence (EOS) token in the ChatGLM3Tokenizer class.

PARAMETER DESCRIPTION
self

An instance of the ChatGLM3Tokenizer class.

TYPE: ChatGLM3Tokenizer

RETURNS DESCRIPTION

None.

RAISES DESCRIPTION
None

This method does not raise any exceptions.

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer.pad_token: str property writable

This method returns the string representation of the padding token used in the ChatGLM3Tokenizer.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM3Tokenizer class.

RETURNS DESCRIPTION
str

The string representation of the padding token.

TYPE: str

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer.pad_token_id: int property

This method returns the token ID for the padding token within the ChatGLM3Tokenizer class.

PARAMETER DESCRIPTION
self

An instance of the ChatGLM3Tokenizer class.

RETURNS DESCRIPTION
int

The token ID corresponding to the '' token.

TYPE: int

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer.unk_token: str property writable

This method 'unk_token' in the class 'ChatGLM3Tokenizer' retrieves the unknown token from the tokenizer.

PARAMETER DESCRIPTION
self

An instance of the ChatGLM3Tokenizer class.

RETURNS DESCRIPTION
str

The unknown token retrieved from the tokenizer.

TYPE: str

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer.unk_token_id: int property

This method returns the token ID corresponding to the '' token in the ChatGLM3Tokenizer class.

PARAMETER DESCRIPTION
self

A reference to the instance of the ChatGLM3Tokenizer class.

RETURNS DESCRIPTION
int

An integer representing the token ID of the '' token in the tokenizer.

TYPE: int

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer.vocab_size property

This method retrieves the vocabulary size from the ChatGLM3Tokenizer instance.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM3Tokenizer class. It represents the tokenizer used for processing the text data.

TYPE: ChatGLM3Tokenizer

RETURNS DESCRIPTION
int

The vocabulary size of the tokenizer. It indicates the total number of unique words present in the tokenizer's vocabulary.

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer.__init__(vocab_file, padding_side='left', clean_up_tokenization_spaces=False, encode_special_tokens=False, **kwargs)

Initialize a ChatGLM3Tokenizer object.

PARAMETER DESCRIPTION
vocab_file

The path to the vocabulary file.

TYPE: str

padding_side

Specifies whether padding should be added to the 'left' or 'right' side of the input sequences. Default is 'left'.

TYPE: str DEFAULT: 'left'

clean_up_tokenization_spaces

If True, clean up tokenization spaces. Default is False.

TYPE: bool DEFAULT: False

encode_special_tokens

If True, special tokens will be encoded. Default is False.

TYPE: bool DEFAULT: False

**kwargs

Additional keyword arguments.

DEFAULT: {}

RETURNS DESCRIPTION

None.

Source code in mindnlp/transformers/models/chatglm3/tokenization_chatglm3.py
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
def __init__(
    self,
    vocab_file,
    padding_side="left",
    clean_up_tokenization_spaces=False,
    encode_special_tokens=False,
    **kwargs
):
    """
    Initialize a ChatGLM3Tokenizer object.

    Args:
        vocab_file (str): The path to the vocabulary file.
        padding_side (str, optional): Specifies whether padding should be added to the 'left' or 'right'
            side of the input sequences. Default is 'left'.
        clean_up_tokenization_spaces (bool, optional): If True, clean up tokenization spaces. Default is False.
        encode_special_tokens (bool, optional): If True, special tokens will be encoded. Default is False.
        **kwargs: Additional keyword arguments.

    Returns:
        None.

    Raises:
        None.
    """
    self.name = "GLMTokenizer"
    self.vocab_file = vocab_file
    self.tokenizer = SPTokenizer(vocab_file)
    self.special_tokens = {
        "<bos>": self.tokenizer.bos_id,
        "<eos>": self.tokenizer.eos_id,
        "<unk>": self.tokenizer.pad_id,
        "<pad>": self.tokenizer.pad_id
    }
    self.encode_special_tokens = encode_special_tokens

    super().__init__(
        padding_side=padding_side,
        clean_up_tokenization_spaces=clean_up_tokenization_spaces,
        **kwargs
    )

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer.build_chat_input(query, history=None, role='user')

This method builds input for a chat conversation in the ChatGLM3Tokenizer class.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM3Tokenizer class.

query

The user's input for the chat conversation.

TYPE: str

history

A list of dictionaries representing the chat history. Each dictionary should have the keys 'role' (str), 'metadata' (str), and 'content' (str).

  • The 'role' key specifies the role of the participant in the conversation (either 'user' or 'system').
  • The 'metadata' key contains optional metadata for the message.
  • The 'content' key contains the actual text content of the message.

TYPE: list DEFAULT: None

role

The role of the participant for the current input. It can be either 'user' or 'system'.

TYPE: str DEFAULT: 'user'

RETURNS DESCRIPTION
None

This method builds the input for the chat conversation and does not return any value.

RAISES DESCRIPTION
TypeError

If the input_ids are not of the expected type.

ValueError

If the return_tensors parameter is not set to 'ms'.

KeyError

If the role provided is not valid (i.e., not 'user' or 'system').

JSONDecodeError

If there is an error in decoding the JSON content of the message.

AttributeError

If the 'tools' key is missing in the history item when the role is 'system'.

Source code in mindnlp/transformers/models/chatglm3/tokenization_chatglm3.py
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
def build_chat_input(self, query, history=None, role="user"):
    """
    This method builds input for a chat conversation in the ChatGLM3Tokenizer class.

    Args:
        self: The instance of the ChatGLM3Tokenizer class.
        query (str): The user's input for the chat conversation.
        history (list): A list of dictionaries representing the chat history.
            Each dictionary should have the keys 'role' (str), 'metadata' (str), and 'content' (str).

            - The 'role' key specifies the role of the participant in the conversation (either 'user' or 'system').
            - The 'metadata' key contains optional metadata for the message.
            - The 'content' key contains the actual text content of the message.
        role (str): The role of the participant for the current input. It can be either 'user' or 'system'.

    Returns:
        None: This method builds the input for the chat conversation and does not return any value.

    Raises:
        TypeError: If the input_ids are not of the expected type.
        ValueError: If the return_tensors parameter is not set to 'ms'.
        KeyError: If the role provided is not valid (i.e., not 'user' or 'system').
        JSONDecodeError: If there is an error in decoding the JSON content of the message.
        AttributeError: If the 'tools' key is missing in the history item when the role is 'system'.
    """
    if history is None:
        history = []
    input_ids = []
    for item in history:
        content = item["content"]
        if item["role"] == "system" and "tools" in item:
            content = content + "\n" + json.dumps(item["tools"], indent=4, ensure_ascii=False)
        input_ids.extend(self.build_single_message(item["role"], item.get("metadata", ""), content))
    input_ids.extend(self.build_single_message(role, "", query))
    input_ids.extend([self.get_command("<|assistant|>")])
    return self.batch_encode_plus([input_ids], return_tensors="ms", is_split_into_words=True)

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer.build_inputs_with_special_tokens(token_ids_0, token_ids_1=None)

Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and adding special tokens. A BERT sequence has the following format:

  • single sequence: [CLS] X [SEP]
  • pair of sequences: [CLS] A [SEP] B [SEP]
PARAMETER DESCRIPTION
token_ids_0

List of IDs to which the special tokens will be added.

TYPE: `List[int]`

token_ids_1

Optional second list of IDs for sequence pairs.

TYPE: `List[int]`, *optional* DEFAULT: None

RETURNS DESCRIPTION
List[int]

List[int]: List of input IDs with the appropriate special tokens.

Source code in mindnlp/transformers/models/chatglm3/tokenization_chatglm3.py
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
def build_inputs_with_special_tokens(
    self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
    """
    Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
    adding special tokens. A BERT sequence has the following format:

    - single sequence: `[CLS] X [SEP]`
    - pair of sequences: `[CLS] A [SEP] B [SEP]`

    Args:
        token_ids_0 (`List[int]`):
            List of IDs to which the special tokens will be added.
        token_ids_1 (`List[int]`, *optional*):
            Optional second list of IDs for sequence pairs.

    Returns:
        `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
    """
    prefix_tokens = self.get_prefix_tokens()
    token_ids_0 = prefix_tokens + token_ids_0
    if token_ids_1 is not None:
        token_ids_0 = token_ids_0 + token_ids_1 + [self.get_command("<eos>")]
    return token_ids_0

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer.build_single_message(role, metadata, message)

Builds a single message token for the ChatGLM3Tokenizer.

PARAMETER DESCRIPTION
self

The instance of the ChatGLM3Tokenizer class.

TYPE: ChatGLM3Tokenizer

role

The role of the message sender. It should be one of ['system', 'user', 'assistant', 'observation'].

TYPE: str

metadata

The metadata associated with the message.

TYPE: str

message

The actual message content.

TYPE: str

RETURNS DESCRIPTION
list

A list of tokens representing the single message built from the role, metadata, and message.

RAISES DESCRIPTION
AssertionError

If the 'role' parameter is not one of ['system', 'user', 'assistant', 'observation'].

Source code in mindnlp/transformers/models/chatglm3/tokenization_chatglm3.py
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
def build_single_message(self, role, metadata, message):
    """
    Builds a single message token for the ChatGLM3Tokenizer.

    Args:
        self (ChatGLM3Tokenizer): The instance of the ChatGLM3Tokenizer class.
        role (str): The role of the message sender. It should be one of ['system', 'user', 'assistant', 'observation'].
        metadata (str): The metadata associated with the message.
        message (str): The actual message content.

    Returns:
        list: A list of tokens representing the single message built from the role, metadata, and message.

    Raises:
        AssertionError: If the 'role' parameter is not one of ['system', 'user', 'assistant', 'observation'].
    """
    assert role in ["system", "user", "assistant", "observation"], role
    role_tokens = [self.get_command(f"<|{role}|>")] + self.tokenizer.encode(f"{metadata}\n")
    message_tokens = self.tokenizer.encode(message)
    tokens = role_tokens + message_tokens
    return tokens

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer.convert_tokens_to_string(tokens)

Converts a list of tokens into a string representation using the ChatGLM3Tokenizer.

PARAMETER DESCRIPTION
self

An instance of the ChatGLM3Tokenizer class.

TYPE: ChatGLM3Tokenizer

tokens

A list of tokens to be converted into a string.

TYPE: List[str]

RETURNS DESCRIPTION
str

The string representation of the tokens.

TYPE: str

This method takes in an instance of the ChatGLM3Tokenizer class and a list of tokens as input. It then uses the tokenizer's 'decode_tokens' method to convert the tokens into a string representation. The resulting string is returned as the output.

The 'self' parameter is a reference to the current instance of the ChatGLM3Tokenizer class. It is used to access the tokenizer object and its methods.

The 'tokens' parameter is a list of strings representing the tokens to be converted into a string. The tokens should be in the same order as they were generated by the tokenizer.

The return value is a string representation of the tokens. This can be useful for displaying or manipulating the tokens in a human-readable format.

This method does not raise any exceptions.

Source code in mindnlp/transformers/models/chatglm3/tokenization_chatglm3.py
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
def convert_tokens_to_string(self, tokens: List[str]) -> str:
    """
    Converts a list of tokens into a string representation using the ChatGLM3Tokenizer.

    Args:
        self (ChatGLM3Tokenizer): An instance of the ChatGLM3Tokenizer class.
        tokens (List[str]): A list of tokens to be converted into a string.

    Returns:
        str: The string representation of the tokens.

    Raises:
        None.

    This method takes in an instance of the ChatGLM3Tokenizer class and a list of tokens as input.
    It then uses the tokenizer's 'decode_tokens' method to convert the tokens into a string representation.
    The resulting string is returned as the output.

    The 'self' parameter is a reference to the current instance of the ChatGLM3Tokenizer class.
    It is used to access the tokenizer object and its methods.

    The 'tokens' parameter is a list of strings representing the tokens to be converted into a string.
    The tokens should be in the same order as they were generated by the tokenizer.

    The return value is a string representation of the tokens.
    This can be useful for displaying or manipulating the tokens in a human-readable format.

    This method does not raise any exceptions.
    """
    return self.tokenizer.decode_tokens(tokens)

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer.get_command(token)

Retrieves the command associated with a given token.

PARAMETER DESCRIPTION
self

An instance of the ChatGLM3Tokenizer class.

TYPE: ChatGLM3Tokenizer

token

The token for which the command needs to be retrieved.

TYPE: str

RETURNS DESCRIPTION
None

This method returns None.

RAISES DESCRIPTION
AssertionError

If the token is not a special token for the ChatGLM3Tokenizer instance.

Note

This method checks if the given token is one of the special tokens stored in the self.special_tokens dictionary. If it is, the corresponding command is returned. Otherwise, an assertion error is raised if the token is not a special token for the ChatGLM3Tokenizer instance.

Source code in mindnlp/transformers/models/chatglm3/tokenization_chatglm3.py
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
def get_command(self, token):
    """
    Retrieves the command associated with a given token.

    Args:
        self (ChatGLM3Tokenizer): An instance of the ChatGLM3Tokenizer class.
        token (str): The token for which the command needs to be retrieved.

    Returns:
        None: This method returns None.

    Raises:
        AssertionError: If the token is not a special token for the ChatGLM3Tokenizer instance.

    Note:
        This method checks if the given token is one of the special tokens stored in the self.special_tokens dictionary.
        If it is, the corresponding command is returned. Otherwise, an assertion error is raised if the token is not a
        special token for the ChatGLM3Tokenizer instance.
    """
    if token in self.special_tokens:
        return self.special_tokens[token]
    assert token in self.tokenizer.special_tokens, f"{token} is not a special token for {self.name}"
    return self.tokenizer.special_tokens[token]

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer.get_prefix_tokens()

This method 'get_prefix_tokens' is defined within the 'ChatGLM3Tokenizer' class and retrieves a list of prefix tokens.

PARAMETER DESCRIPTION
self

A reference to the instance of the class. It is used to access the instance variables and methods of the class.

RETURNS DESCRIPTION

Returns a list of prefix tokens which is a combination of the commands '[gMASK]' and 'sop'.

Source code in mindnlp/transformers/models/chatglm3/tokenization_chatglm3.py
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
def get_prefix_tokens(self):
    """
    This method 'get_prefix_tokens' is defined within the 'ChatGLM3Tokenizer' class and retrieves a list of prefix tokens.

    Args:
        self: A reference to the instance of the class. It is used to access the instance variables and methods of the class.

    Returns:
        Returns a list of prefix tokens which is a combination of the commands '[gMASK]' and 'sop'.

    Raises:
        This method does not raise any exceptions.
    """
    prefix_tokens = [self.get_command("[gMASK]"), self.get_command("sop")]
    return prefix_tokens

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer.get_vocab()

Returns vocab as a dict

Source code in mindnlp/transformers/models/chatglm3/tokenization_chatglm3.py
510
511
512
513
514
def get_vocab(self):
    """ Returns vocab as a dict """
    vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
    vocab.update(self.added_tokens_encoder)
    return vocab

mindnlp.transformers.models.chatglm3.tokenization_chatglm3.ChatGLM3Tokenizer.save_vocabulary(save_directory, filename_prefix=None)

Save the vocabulary and special tokens file to a directory.

PARAMETER DESCRIPTION
save_directory

The directory in which to save the vocabulary.

TYPE: `str`

filename_prefix

An optional prefix to add to the named of the saved files.

TYPE: `str`, *optional* DEFAULT: None

RETURNS DESCRIPTION

Tuple(str): Paths to the files saved.

Source code in mindnlp/transformers/models/chatglm3/tokenization_chatglm3.py
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
def save_vocabulary(self, save_directory, filename_prefix=None):
    """
    Save the vocabulary and special tokens file to a directory.

    Args:
        save_directory (`str`):
            The directory in which to save the vocabulary.
        filename_prefix (`str`, *optional*):
            An optional prefix to add to the named of the saved files.

    Returns:
        `Tuple(str)`: Paths to the files saved.
    """
    if os.path.isdir(save_directory):
        vocab_file = os.path.join(
            save_directory, self.vocab_files_names["vocab_file"]
        )
    else:
        vocab_file = save_directory

    with open(self.vocab_file, 'rb') as fin:
        proto_str = fin.read()

    with open(vocab_file, "wb") as writer:
        writer.write(proto_str)

    return (vocab_file,)