modify: update modeling code

Files changed (5) hide show

configuration_baichuan.py CHANGED Viewed

	@@ -1,3 +1,4 @@

1
2	from transformers.configuration_utils import PretrainedConfig
3


1	+ # Copyright (c) 2023, Baichuan Intelligent Technology. All rights reserved.
2
3	from transformers.configuration_utils import PretrainedConfig
4

modeling_baichuan.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import math
 from typing import List, Optional, Tuple, Union
@@ -238,7 +240,7 @@ class BaichuanModel(BaichuanPreTrainedModel):
         if self.first_run:
             self.first_run = False
             self.register_buffer("future_mask", _gen_alibi_mask(self.n_head, self.max_cache_pos).to(tensor), persistent=False)
-        if (seq_length_with_past > self.max_cache_pos):
             self.max_cache_pos = seq_length_with_past
             self.register_buffer("future_mask", _gen_alibi_mask(self.n_head, self.max_cache_pos).to(tensor), persistent=False)
         mask = self.future_mask[:self.n_head, :seq_length_with_past, :seq_length_with_past]
@@ -266,7 +268,6 @@ class BaichuanModel(BaichuanPreTrainedModel):
             raise ValueError("You need to provide input_ids or inputs_embeds")
         seq_length_with_past = seq_length
-        past_key_values_length = 0
         if past_key_values is not None:
             past_key_values_length = past_key_values[0][0].shape[2]
@@ -366,6 +367,7 @@ class BaichuanForCausalLM(BaichuanPreTrainedModel):
             output_attentions: Optional[bool] = False,
             output_hidden_states: Optional[bool] = False,
             return_dict: Optional[bool] = True,
     ) -> Union[Tuple, CausalLMOutputWithPast]:

+# Copyright (c) 2023, Baichuan Intelligent Technology. All rights reserved.
 import math
 from typing import List, Optional, Tuple, Union
         if self.first_run:
             self.first_run = False
             self.register_buffer("future_mask", _gen_alibi_mask(self.n_head, self.max_cache_pos).to(tensor), persistent=False)
+        if seq_length_with_past > self.max_cache_pos:
             self.max_cache_pos = seq_length_with_past
             self.register_buffer("future_mask", _gen_alibi_mask(self.n_head, self.max_cache_pos).to(tensor), persistent=False)
         mask = self.future_mask[:self.n_head, :seq_length_with_past, :seq_length_with_past]
             raise ValueError("You need to provide input_ids or inputs_embeds")
         seq_length_with_past = seq_length
         if past_key_values is not None:
             past_key_values_length = past_key_values[0][0].shape[2]
             output_attentions: Optional[bool] = False,
             output_hidden_states: Optional[bool] = False,
             return_dict: Optional[bool] = True,
+            **kwargs
     ) -> Union[Tuple, CausalLMOutputWithPast]:

quantizer.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import torch
 from typing import List
 import bz2
@@ -92,10 +94,11 @@ class QLinear(torch.nn.Module):
         super().__init__()
         self.quant_bits = bits
         self.scale = weight.abs().max(dim=-1).values / ((2 ** (bits - 1)) - 1)
         if self.quant_bits == 4:
             self.weight = quant4(weight, self.scale)
         elif self.quant_bits == 8:
-            self.weight = torch.round(weight / self.scale[:, None]).to(torch.int8)
         if self.quant_bits == 8:
             self.weight = self.weight.T
         self.bias = None

+# Copyright (c) 2023, Baichuan Intelligent Technology. All rights reserved.
 import torch
 from typing import List
 import bz2
         super().__init__()
         self.quant_bits = bits
         self.scale = weight.abs().max(dim=-1).values / ((2 ** (bits - 1)) - 1)
+        self.scale = self.scale.to(torch.float32)
         if self.quant_bits == 4:
             self.weight = quant4(weight, self.scale)
         elif self.quant_bits == 8:
+            self.weight = torch.round(weight.to(self.scale.dtype) / self.scale[:, None]).to(torch.int8)
         if self.quant_bits == 8:
             self.weight = self.weight.T
         self.bias = None

requirements.txt ADDED Viewed

+accelerate
+colorama
+cpm_kernels
+sentencepiece
+streamlit
+transformers_stream_generator

tokenization_baichuan.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import os
 from shutil import copyfile
 from typing import Any, Dict, List, Optional, Tuple
 import sentencepiece as spm
 from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
 from transformers.utils import logging

+# Copyright (c) 2023, Baichuan Intelligent Technology. All rights reserved.
 import os
 from shutil import copyfile
 from typing import Any, Dict, List, Optional, Tuple
 import sentencepiece as spm
 from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
 from transformers.utils import logging