wli1995 commited on
Commit
b50c751
·
verified ·
1 Parent(s): eb4deb3

delete old qwen2.5_tokenizer.py

Browse files
Files changed (1) hide show
  1. qwen2.5_tokenizer.py +0 -133
qwen2.5_tokenizer.py DELETED
@@ -1,133 +0,0 @@
1
- from transformers import AutoTokenizer, PreTrainedTokenizerFast
2
- from http.server import HTTPServer, BaseHTTPRequestHandler
3
- import json
4
- import argparse
5
-
6
-
7
- class Tokenizer_Http():
8
-
9
- def __init__(self):
10
- model_id = "qwen2.5_tokenizer"
11
- self.tokenizer = AutoTokenizer.from_pretrained(model_id)
12
-
13
- def encode(self, prompt):
14
- messages = [
15
- {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
16
- {"role": "user", "content": prompt}
17
- ]
18
- text = self.tokenizer.apply_chat_template(
19
- messages,
20
- tokenize=False,
21
- add_generation_prompt=True
22
- )
23
- print(text)
24
- token_ids = self.tokenizer.encode(text)
25
- return token_ids
26
-
27
- def decode(self, token_ids):
28
- return self.tokenizer.decode(token_ids)
29
-
30
- @property
31
- def bos_id(self):
32
- return self.tokenizer.bos_token_id
33
-
34
- @property
35
- def eos_id(self):
36
- return self.tokenizer.eos_token_id
37
-
38
- @property
39
- def bos_token(self):
40
- return self.tokenizer.bos_token
41
-
42
- @property
43
- def eos_token(self):
44
- return self.tokenizer.eos_token
45
-
46
-
47
- tokenizer = Tokenizer_Http()
48
-
49
- print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token)
50
- print(tokenizer.encode("hello world"))
51
-
52
-
53
- class Request(BaseHTTPRequestHandler):
54
- #通过类继承,新定义类
55
- timeout = 5
56
- server_version = 'Apache'
57
-
58
- def do_GET(self):
59
- print(self.path)
60
- #在新类中定义get的内容(当客户端向该服务端使用get请求时,本服务端将如下运行)
61
- self.send_response(200)
62
- self.send_header("type", "get") #设置响应头,可省略或设置多个
63
- self.end_headers()
64
-
65
- if self.path == '/bos_id':
66
- bos_id = tokenizer.bos_id
67
- # print(bos_id)
68
- # to json
69
- if bos_id is None:
70
- msg = json.dumps({'bos_id': -1})
71
- else:
72
- msg = json.dumps({'bos_id': bos_id})
73
- elif self.path == '/eos_id':
74
- eos_id = tokenizer.eos_id
75
- if eos_id is None:
76
- msg = json.dumps({'eos_id': -1})
77
- else:
78
- msg = json.dumps({'eos_id': eos_id})
79
- else:
80
- msg = 'error'
81
-
82
- print(msg)
83
- msg = str(msg).encode() #转为str再转为byte格式
84
-
85
- self.wfile.write(msg) #将byte格式的信息返回给客户端
86
-
87
- def do_POST(self):
88
- #在新类中定义post的内容(当客户端向该服务端使用post请求时,本服务端将如下运行)
89
- data = self.rfile.read(int(
90
- self.headers['content-length'])) #获取从客户端传入的参数(byte格式)
91
- data = data.decode() #将byte格式转为str格式
92
-
93
- self.send_response(200)
94
- self.send_header("type", "post") #设置响应头,可省略或设置多个
95
- self.end_headers()
96
-
97
- if self.path == '/encode':
98
- req = json.loads(data)
99
- prompt = req['text']
100
-
101
- token_ids = tokenizer.encode(prompt)
102
- if token_ids is None:
103
- msg = json.dumps({'token_ids': -1})
104
- else:
105
- msg = json.dumps({'token_ids': token_ids})
106
-
107
- elif self.path == '/decode':
108
- req = json.loads(data)
109
- token_ids = req['token_ids']
110
- text = tokenizer.decode(token_ids)
111
- if text is None:
112
- msg = json.dumps({'text': ""})
113
- else:
114
- msg = json.dumps({'text': text})
115
- else:
116
- msg = 'error'
117
- print(msg)
118
- msg = str(msg).encode() #转为str再转为byte格式
119
-
120
- self.wfile.write(msg) #将byte格式的信息返回给客户端
121
-
122
-
123
- if __name__ == "__main__":
124
-
125
- args = argparse.ArgumentParser()
126
- args.add_argument('--host', type=str, default='localhost')
127
- args.add_argument('--port', type=int, default=8080)
128
- args = args.parse_args()
129
-
130
- host = (args.host, args.port) #设定地址与端口号,'localhost'等价于'127.0.0.1'
131
- print('http://%s:%s' % host)
132
- server = HTTPServer(host, Request) #根据地址端口号和新定义的类,创建服务器实例
133
- server.serve_forever() #开启服务