Add new SentenceTransformer model

Browse files

Files changed (13) hide show

.gitattributes +1 -0
1_Pooling/config.json +10 -0
README.md +1691 -0
config.json +49 -0
config_sentence_transformers.json +14 -0
configuration.py +114 -0
model.safetensors +3 -0
modeling.py +1319 -0
modules.json +20 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +51 -0
tokenizer.json +3 -0
tokenizer_config.json +62 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "word_embedding_dimension": 768,
+    "pooling_mode_cls_token": true,
+    "pooling_mode_mean_tokens": false,
+    "pooling_mode_max_tokens": false,
+    "pooling_mode_mean_sqrt_len_tokens": false,
+    "pooling_mode_weightedmean_tokens": false,
+    "pooling_mode_lasttoken": false,
+    "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,1691 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- dense
+- generated_from_trainer
+- dataset_size:500
+- loss:MatryoshkaLoss
+- loss:MultipleNegativesRankingLoss
+base_model: dangvantuan/vietnamese-document-embedding
+widget:
+- source_sentence: Ngành Kỹ thuật Trắc địa – Bản đồ là gì và sinh viên sẽ được trang
+    bị những kỹ năng nào?
+  sentences:
+  - 'Triển vọng việc làm
+    Các công ty xây dựng, nhà thầu xây dựng và các công ty tư vấn về xây dựng. Các
+    cơ quan, tổ chức quản lý nhà nước liên quan đến xây dựng, phát triển hạ tầng và
+    đô thị. Các công ty tài chính, ngân hàng và các tổ chức cho vay đầu tư vào các
+    dự án xây dựng. Các công ty cung cấp dịch vụ quản lý dự án và tư vấn xây dựng.
+    KỸ THUẬT XÂY DỰNG CÔNG TRÌNH GIAO THÔNG
+    Thông tin tuyển sinh:
+    Website: http://www.dce.hcmut.edu.vn/
+    Mã tuyển sinh: 115 (Chương trình tiêu chuẩn), 215 (Chương trình giảng dạy bằng
+    tiếng Anh)
+    Ngành Kỹ thuật Xây dựng Công trình Giao thông có 01 chuyên ngành : Cầu đường.
+    KỸ THUẬT TRẮC ĐỊA – BẢN ĐỒ
+    Website: http://www.dce.hcmut.edu.vn/
+    Ngành Kỹ thuật Trắc địa – Bản đồ thuộc Khoa Kỹ thuật Xây dựng. TỔNG QUAN CHƯƠNG
+    TRÌNH
+    Ngành Kỹ thuật Trắc địa – Bản đồ bao gồm Khoa học và công nghệ về việc thu thập,
+    phân tích và biễu diễn các thông tin không gian (dựa trên Trái đất). Nó bao gồm
+    những ứng dụng thú vị như định vị vệ tinh, viễn thám, trắc địa, địa chính và hệ
+    thông tin địa lý. Kỹ thuật Trắc địa – Bản đồ là một trong những ngành khoa học
+    thông tin phát triển nhanh nhất ở Việt Nam và khắp thế giới. Chương trình sẽ đào
+    tạo thành những kỹ sư có tay nghề cao, có năng lực thiết kế và tổ chức thi công
+    các công trình định vị chính xác cao bằng những kỹ thuật hiện đại như toàn đạc
+    điện tử và các hệ thống định vị bằng vệ tinh; công trình thành lập bản đồ địa
+    hình, địa chính và chuyên đề dưới dạng số bằng phương pháp toàn đạc, trắc lượng
+    ảnh, viễn thám và biên tập; công trình thành lập và vận hành các hệ thống thông
+    tin địa lý; các công tác trắc địa phục vụ xây dựng công trình.'
+  - 'Ngành: Sư phạm tiếng Anh
+    Mã ngành: 7140231D, Tổ hợp: (D01, D96), Điểm chuẩn: 26.08
+    2. Ngành: Thiết kế đồ họa
+    Mã ngành: 7210403D, Tổ hợp: (V01, V02, V07, V08), Điểm chuẩn: 24.50
+    3. Ngành: Thiết kế thời trang
+    Mã ngành: 7210404D, Tổ hợp: (V01, V02, V07, V09), Điểm chuẩn: 21.50
+    4. Ngành: Ngôn ngữ Anh
+    Mã ngành: 7220201D, Tổ hợp: (A00, A01, D01, D96), Điểm chuẩn: 22.75
+    5. Ngành: Kinh doanh Quốc tế
+    Mã ngành: 7340120D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 25.25
+    6. Ngành: Thương mại điện tử
+    Mã ngành: 7340122D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 25.25
+    7. Ngành: Kế toán
+    Mã ngành: 7340301D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 23.25
+    8. Ngành: Công nghệ kỹ thuật máy tính
+    Mã ngành: 7480106D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 25.00
+    9. Ngành: Hệ thống nhúng và IoT
+    Mã ngành: 7480109D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 23.75
+    10. Ngành: Công nghệ thông tin
+    Mã ngành: 7480201D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 25.25
+    11. Ngành: Công nghệ kỹ thuật công trình xây dựng
+    Mã ngành: 7510102D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 19.00
+    12. Ngành: Hệ thống kỹ thuật công trình xây dựng
+    Mã ngành: 7510105D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 19.00
+    13. Ngành: Công nghệ kỹ thuật cơ khí
+    Mã ngành: 7510201D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 21.50
+    14. Ngành: Công nghệ chế tạo máy
+    Mã ngành: 7510202D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 18.00
+    15. Ngành: Công nghệ kỹ thuật ô tô
+    Mã ngành: 7510205D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 22.50
+    16. Ngành: Công nghệ kỹ thuật nhiệt
+    Mã ngành: 7510206D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 16.50
+    17. Ngành: Công nghệ kỹ thuật điện, điện tử
+    Mã ngành: 7510301D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 23.25
+    18. Ngành: Công nghệ kỹ thuật điện tử, viễn thông
+    Mã ngành: 7510302, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 23.75
+    19. Ngành: Công nghệ kỹ thuật điều khiển và tự động hóa
+    Mã ngành: 7510303, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 25.00
+    20. Ngành: Công nghệ kỹ thuật hóa học
+    Mã ngành: 7510401, Tổ hợp: (A00, B00, D07, D90), Điểm chuẩn: 17.00
+    21. Ngành: Công nghệ vật liệu
+    Mã ngành: 7510402D, Tổ hợp: (A00, B00, D07, D90), Điểm chuẩn: 17.00
+    22. Ngành: Công nghệ kỹ thuật môi trường
+    Mã ngành: 7510406, Tổ hợp: (A00, B00, D07, D90), Điểm chuẩn: 17.00
+    23. Ngành: Quản lý công nghiệp
+    Mã ngành: 7510601, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 23.75
+    24. Ngành: Logistics và quản lý chuỗi cung ứng
+    Mã ngành: 7510605D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 24.25
+    25. Ngành: Công nghệ kỹ thuật in
+    Mã ngành: 7510802D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 17.00
+    26. Ngành: Công nghệ may
+    Mã ngành: 7540204D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 17.50
+    27. Ngành: Kiến trúc
+    Mã ngành: 7580101, Tổ hợp: (V00, V01, V02, V03), Điểm chuẩn: 22.25
+    28. Ngành: Kiến trúc nội thất
+    Mã ngành: 7580101D, Tổ hợp: (V03, V04, V05, V06), Điểm chuẩn: 22.00
+    29. Ngành: Kỹ thuật xây dựng công trình giao thông
+    Mã ngành: 7580205D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 17.00
+    30. Ngành: Kỹ thuật xây dựng
+    Mã ngành: 7580201D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 17.00
+    31. Ngành: Quản trị nhà hàng và dịch vụ ăn uống
+    Mã ngành: 7840110D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 17.50
+    32. Ngành: Quản lý và vận hành hạ tầng
+    Mã ngành: 7840101D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 17.70
+    33. Ngành: Công nghệ kỹ thuật điều khiển và tự động hóa
+    Mã ngành: 7510303D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 22.75
+    34. Ngành: Công nghệ kỹ thuật điện tử - viễn thông
+    Mã ngành: 7510302D, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 21.50
+    CLC Tiếng Việt
+    1. Ngành: Thiết kế thời trang
+    Mã ngành: 7210404C, Tổ hợp: (V01, V02, V07, V09), Điểm chuẩn: 21.60
+    2. Ngành: Thương mại điện tử
+    Mã ngành: 7340122C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 25.50
+    3. Ngành: Kế toán
+    Mã ngành: 7340301C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 23.75
+    4. Ngành: Công nghệ kỹ thuật máy tính
+    Mã ngành: 7480106C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 25.65
+    5. Ngành: Công nghệ thông tin
+    Mã ngành: 7480201C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 26.00
+    6. Ngành: Công nghệ kỹ thuật công trình xây dựng
+    Mã ngành: 7510102C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 20.00
+    7. Ngành: Công nghệ kỹ thuật cơ khí
+    Mã ngành: 7510201C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 22.00
+    8. Ngành: Công nghệ chế tạo máy
+    Mã ngành: 7510202C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 19.00
+    9. Ngành: Công nghệ kỹ thuật ô tô
+    Mã ngành: 7510205C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 23.00
+    10. Ngành: Công nghệ kỹ thuật nhiệt
+    Mã ngành: 7510206C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 17.00
+    11. Ngành: Công nghệ kỹ thuật điện, điện tử
+    Mã ngành: 7510301C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 23.00
+    12. Ngành: Công nghệ kỹ thuật điều khiển và tự động hóa
+    Mã ngành: 7510303C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 23.00
+    13. Ngành: Công nghệ kỹ thuật điện tử - viễn thông
+    Mã ngành: 7510302C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 22.50
+    14. Ngành: Công nghệ kỹ thuật điện tử, viễn thông
+    Mã ngành: 7510302C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 22.50
+    15. Ngành: Công nghệ kỹ thuật điều khiển và tự động hóa
+    Mã ngành: 7510303C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 24.00
+    16. Ngành: Công nghệ kỹ thuật môi trường
+    Mã ngành: 7510406C, Tổ hợp: (A00, B00, D07, D90), Điểm chuẩn: 17.50
+    17. Ngành: Quản lý công nghiệp
+    Mã ngành: 7510601C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 19.75
+    18. Ngành: Công nghệ kỹ thuật cơ điện tử
+    Mã ngành: 7510203C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 21.50
+    19. Ngành: Công nghệ may
+    Mã ngành: 7540204C, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 23.25
+    CLC Tiếng Anh:
+    1. Ngành: Công nghệ kỹ thuật máy tính
+    Mã ngành: 7480106A, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 26.00
+    2. Ngành: Công nghệ thông tin
+    Mã ngành: 7480201A, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 26.50
+    3. Ngành: Công nghệ kỹ thuật công trình xây dựng
+    Mã ngành: 7510102A, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 19.75
+    4. Ngành: Công nghệ kỹ thuật cơ khí
+    Mã ngành: 7510201A, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 22.50
+    5. Ngành: Công nghệ chế tạo máy
+    Mã ngành: 7510202A, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 19.50
+    6. Ngành: Công nghệ kỹ thuật ô tô
+    Mã ngành: 7510205A, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 23.50
+    7. Ngành: Công nghệ kỹ thuật nhiệt
+    Mã ngành: 7510206A, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 17.00
+    8. Ngành: Công nghệ kỹ thuật điện, điện tử
+    Mã ngành: 7510301A, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 23.50
+    9. Ngành: Công nghệ kỹ thuật điều khiển và tự động hóa
+    Mã ngành: 7510303A, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 23.50
+    10. Ngành: Công nghệ kỹ thuật điện tử, viễn thông
+    Mã ngành: 7510302A, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 22.25
+    11. Ngành: Công nghệ kỹ thuật điều khiển và tự động hóa
+    Mã ngành: 7510303A, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 23.50
+    12. Ngành: Quản lý công nghiệp
+    Mã ngành: 7510601A, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 19.25
+    Nhân tài
+    Ngành: Robot và trí tuệ nhân tạo
+    Mã ngành: 7510209NT, Tổ hợp: (A00, A01, D01, D90), Điểm chuẩn: 25.00'
+  - 'xiv. Khoa tiếng Nhật
+    1. CHẤT LƯỢNG ĐÀO TẠO
+    ĐÀO TẠO CỬ NHÂN (4 NĂM)
+    2.CHẤT LƯỢNG GIẢNG VIÊN
+    ĐỘI NGŨ GIẢNG VIÊN: 8
+    Tiến sĩ: 1
+    Thạc sĩ: 5
+    Cử nhân: 2
+    3. MỤC TIÊU ĐÀO TẠO
+    Đào tạo cử nhân ngôn ngữ Nhật, có phẩm chất chính trị, đạo đức và sức khỏe tốt,
+    hiểu và vận dụng các tri thức cơ bản về ngôn ngữ tiếng Nhật. Sau khi tốt nghiệp,
+    người học có đủ năng lực để làm việc ở các cơ quan, tổ chức, các công ty, doanh
+    nghiệp yêu cầu chuẩn về kỹ năng sử dụng tiếng Nhật đáp ứng những yêu cầu của xã
+    hội và của nền kinh tế trong quá trình hội nhập quốc tế. 4. CÁC NGÀNH ĐÀO TẠO
+    ĐẠI HỌC
+    Cử nhân Ngôn ngữ Nhật
+    xv. Khoa tiếng Hàn Quốc
+    1. CHẤT LƯỢNG ĐÀO TẠO
+    ĐÀO TẠO CỬ NHÂN (4 NĂM)
+    TUYỂN SINH:
+    2016 - 2017: 150 SV
+    2017 - 2018: 195 SV
+    2018 - 2019: 120 SV
+    2019 - 2020: 54 SV
+    2021 - 2022: 120 SV
+    2022 - 2023: 88 SV
+    Song ngành: 20 SV
+    2. CHẤT LƯỢNG GIẢNG VIÊN
+    ĐỘI NGŨ GIẢNG VIÊN: 6
+    Tiến sĩ: 1
+    Thạc sĩ: 4
+    Cử nhân: 1
+    3. MỤC TIÊU ĐÀO TẠO
+    Đào tạo cử nhân ngôn ngữ Hàn Quốc, có phẩm chất chính trị, đạo đức và sức khỏe
+    tốt, hiểu và vận dụng các tri thức cơ bản về ngôn ngữ tiếng Hàn Quốc - định hướng
+    biên phiên dịch. Sau khi tốt nghiệp, người học có đủ năng lực để làm việc ở các
+    cơ quan, tổ chức, các công ty, doanh nghiệp yêu cầu chuẩn về kỹ năng sử dụng tiếng
+    Hàn Quốc đáp ứng những yêu cầu của xã hội và của nền kinh tế trong quá trình hội
+    nhập quốc tế. Đào tạo cử nhân ngôn ngữ Hàn Quốc chuyên ngành Du lịch, có phẩm
+    chất chính trị, đạo đức và sức khỏe tốt, hiểu và vận dụng các tri thức cơ bản
+    về ngôn ngữ Hàn Quốc ngành Du lịch. Sau khi tốt nghiệp, người học có đủ năng lực
+    để làm việc ở các cơ quan, tổ chức, các công ty, doanh nghiệp yêu cầu chuẩn về
+    công việc du lịch và các công việc có liên quan đáp ứng những yêu cầu của xã hội
+    và của nền kinh tế trong quá trình hội nhập quốc tế.'
+- source_sentence: Điểm chuẩn dự kiến của một số ngành học như Vật lý học hay Công
+    nghệ Thông tin là bao nhiêu?
+  sentences:
+  - STT Mã ngành Tên ngành Tổ hợp môn Điểm chuẩn Ghi chú 1 7220201 Ngôn ngữ Anh A;
+    B; C01; C02; C03; C04 21 2 7220204 Ngôn ngữ Trung Quốc A; B; C01; C02; C03;
+    C04 21 3 7220209 Ngôn ngữ Nhật A; B; C01; C02; C03; C04 21 4 7220210 Ngôn ngữ
+    Hàn Quốc A; B; C01; C02; C03; C04 21 5 7320106 Công nghệ truyền thông A; B;
+    C01; C02; C03; C04 21 6 7340101 Quản trị kinh doanh A; B; C01; C02; C03; C04 21
+    7 7480201 Công nghệ thông tin A; B; C01; C02; C03; C04 21 Phương thức xét tuyển
+    của fpt khá giống nhau.
+  - 'An ninh mạng: Làm việc tại các công ty bảo mật, ngân hàng, các tổ chức phòng
+    chống tội phạm mạng và bảo vệ thông tin. Quản lý hệ thống và mạng máy tính: Làm
+    việc tại các công ty quản lý hệ thống máy tính, mạng dữ liệu lớn, và cơ sở hạ
+    tầng công nghệ thông tin. Trí tuệ nhân tạo (AI) và Khoa học dữ liệu: Phát triển
+    các hệ thống AI, phân tích dữ liệu và học máy (machine learning) trong các ứng
+    dụng như tự động hóa, nhận diện hình ảnh và phân tích dự đoán. CHƯƠNG TRÌNH ĐÀO
+    TẠO & MÔN HỌC
+    Sinh viên sẽ học các môn như:
+    Lập trình và phát triển phần mềm: Học cách lập trình bằng các ngôn ngữ phổ biến
+    như Java, Python, C++, và phát triển các ứng dụng phần mềm cho các nền tảng khác
+    nhau. Mạng máy tính và hệ thống phân tán: Kiến thức về thiết kế, triển khai và
+    quản lý các mạng máy tính, hệ thống phân tán, và các giao thức mạng hiện đại.
+    An ninh mạng: Các phương pháp và công nghệ bảo mật trong việc bảo vệ dữ liệu và
+    hệ thống mạng khỏi các mối đe dọa. Trí tuệ nhân tạo và học máy: Học các lý thuyết
+    và kỹ thuật liên quan đến AI, học máy, khai thác dữ liệu và phân tích dữ liệu
+    lớn. Cơ sở dữ liệu: Quản lý và tối ưu hóa cơ sở dữ liệu, học cách phát triển các
+    hệ thống cơ sở dữ liệu lớn, ứng dụng trong các doanh nghiệp và tổ chức. Trí tuệ
+    nhân tạo
+    Mã tuyển sinh: 7480107
+    Tổ hợp xét tuyển: A00; A01; B08; D07
+    Thông tin liên hệ:
+    Địa chỉ: Khoa Công nghệ thông tin, Trường Đại học Khoa học Tự nhiên TP.HCM, 227
+    Nguyễn Văn Cừ, Quận 5, TP.HCM
+    Điện thoại: (028) 38 585 726
+    Email: [email protected]
+    Website: https://www.fit.hcmus.edu.vn/
+    TỔNG QUAN CHƯƠNG TRÌNH
+    Chương trình đào tạo ngành Trí tuệ nhân tạo tại Trường Đại học Khoa học Tự nhiên
+    TP.HCM trang bị cho sinh viên kiến thức vững chắc về các lý thuyết và ứng dụng
+    của trí tuệ nhân tạo (AI), học máy (machine learning), và các kỹ thuật xử lý dữ
+    liệu lớn. Sinh viên sẽ được đào tạo để phát triển các hệ thống thông minh, phân
+    tích dữ liệu và ứng dụng AI trong các lĩnh vực như nhận diện hình ảnh, xử lý ngôn
+    ngữ tự nhiên, robot và các hệ thống tự động hóa. Chương trình học chú trọng đến
+    việc kết hợp lý thuyết với thực hành, giúp sinh viên có thể giải quyết các bài
+    toán thực tế trong môi trường công nghệ phát triển nhanh chóng. CƠ SỞ VẬT CHẤT
+    CHẤT LƯỢNG CAO
+    Khoa Công nghệ thông tin có các phòng thí nghiệm hiện đại phục vụ cho việc nghiên
+    cứu và học tập trong ngành Trí tuệ nhân tạo, bao gồm các phòng lab chuyên về học
+    máy, xử lý ngôn ngữ tự nhiên, robot và các ứng dụng AI. Sinh viên có cơ hội tiếp
+    cận với các công cụ phần mềm mạnh mẽ, mô phỏng AI và các nền tảng học máy được
+    ứng dụng trong các ngành công nghiệp khác nhau. CÁC ĐIỂM ĐẶC BIỆT
+    Chương trình học theo hướng thực tiễn: Sinh viên được đào tạo các kỹ năng giải
+    quyết bài toán thực tế thông qua các dự án nghiên cứu và hợp tác với các công
+    ty công nghệ. Môi trường học tập sáng tạo: Các chương trình hackathon, workshop
+    và dự án nghiên cứu là những cơ hội để sinh viên thể hiện sự sáng tạo và tìm ra
+    giải pháp sáng tạo cho các vấn đề trong trí tuệ nhân tạo. Cơ hội nghề nghiệp và
+    du học: Sinh viên có thể tham gia vào các chương trình trao đổi sinh viên quốc
+    tế và có cơ hội làm việc tại các công ty hàng đầu về AI trong và ngoài nước. Các
+    công ty như Google, Facebook, và các công ty khởi nghiệp về AI luôn tìm kiếm những
+    tài năng có nền tảng vững về AI. TRIỂN VỌNG NGHỀ NGHIỆP & CƠ HỘI VIỆC LÀM
+    Sinh viên tốt nghiệp ngành Trí tuệ nhân tạo có thể làm việc trong các lĩnh vực
+    như:
+    Phát triển phần mềm AI: Làm việc tại các công ty công nghệ phát triển các hệ thống
+    AI, phần mềm học máy, và các sản phẩm tự động hóa.'
+  - 'Vật lý học – 22.55
+    xxxix. Hóa học – 23.47
+    xl. Công nghệ Thông tin – 23.34
+    xli. Công tác Xã hội – 22.00
+    xlii. Du lịch – 22.00
+    8. Học phí trường Đại học Sư phạm TPHCM 2023 dự kiến từ 9,8 đến 11,7 triệu đồng
+    một năm. Riêng các ngành sư phạm được miễn học phí.'
+- source_sentence: Làm thế nào để đăng ký xét tuyển và thời gian dự kiến cho các phương
+    thức tuyển sinh là khi nào?
+  sentences:
+  - '4. Các ngành đào tạo
+    a. ĐẠI HỌC
+    Cử nhân Sư phạm Ngữ văn
+    Cử nhân Văn học (Hệ Chính quy; Văn bằng 2)
+    Cử nhân Việt Nam học
+    Cử nhân Tiếng Việt và Văn hóa Việt Nam
+    b. SAU ĐẠI HỌC
+    Thạc sĩ Văn học Việt Nam
+    Thạc sĩ Văn học nước ngoài
+    Thạc sĩ Lý luận văn học
+    Thạc sĩ Ngôn ngữ học
+    Tiến sĩ Văn học Việt Nam
+    Tiến sĩ Văn học nước ngoài
+    Tiến sĩ Lý luận ngôn ngữ
+    c. BỒI DƯỠNG
+    Chuyên đề cung cấp kiến thức mới; kỹ năng, phương pháp dạy học mới, nâng cao tính
+    thực tiễn, chú trọng phát triển năng lực ở người học
+    ii. Khoa Lịch sử
+    1. CHẤT LƯỢNG ĐÀO TẠO
+    ĐÀO TẠO CỬ NHÂN (4 NĂM)
+    Cử nhân Sư phạm Lịch sử
+    TUYỂN SINH:
+    2005 - 2015: 1.425 Cử nhân Sư phạm Lịch sử
+    2006 - 2015: 343 Cử nhân Quốc tế học
+    2007 - 2015: 464 Cử nhân Sử - GDQP
+    ĐÀO TẠO CAO HỌC (2 NĂM)
+    Bắt đầu đào tạo Thạc sĩ từ 1999
+    ThS Lịch sử Việt Nam và ThS Lịch sử thế giới
+    TUYỂN SINH: 20 - 25 HV/năm
+    2016 - 2017: 09 HV
+    ĐÀO TẠO TIẾN SĨ
+    Tiến sĩ Lịch sử Việt Nam
+    TUYỂN SINH: 5 - 10 NCS/năm
+    2017- 2018: 15 NCS
+    2. CHẤT LƯỢNG GIẢNG VIÊN
+    SỐ LƯỢNG GIẢNG VIÊN: 22
+    SỐ LƯỢNG CHUYÊN VIÊN: 02
+    PGS.TS: 01
+    Tiến sĩ: 12
+    Thạc sĩ: 09
+    3. MỤC TIÊU ĐÀO TẠO
+    Đào tạo Cử nhân Sư phạm Lịch sử đáp ứng yêu cầu về phẩm chất, năng lực đối với
+    giáo viên Lịch sử, bao gồm: có phẩm chất chính trị và đạo đức nghề nghiệp, có
+    kiến thức hệ thống, toàn diện về khoa học Lịch sử và phương pháp dạy học Lịch
+    sử, có năng lực thích ứng, sáng tạo, tự bồi dưỡng phát triển chuyên môn theo yêu
+    cầu của ngành giáo dục và của xã hội. Đào tạo Cử nhân Quốc tế học có phẩm chất
+    chính trị, đạo đức tốt, có kiến thức vững chắc về ngành học để đáp ứng nhu cầu
+    công tác trong các cơ quan, tổ chức chuyên về hoạt động đối ngoại của nhà nước,
+    trong các công ty đa quốc gia hoặc các tổ chức phi chính phủ; đồng thời có khả
+    năng thích ứng cao trong bối cảnh hội nhập quốc tế và khu vực.'
+  - 'Kết quả được công bố trước kỳ thi tốt nghiệp THPT. Cách thức đăng ký: Thí sinh
+    đăng ký trực tuyến thông qua hệ thống website của trường và nộp hồ sơ theo thông
+    báo tuyển sinh theo quy định. 1.2 Phương thức 2: Xét tuyển kết hợp Chứng chỉ ngoại
+    ngữ quốc tế và kết quả học tập/chứng chỉ năng lực quốc tế dành cho thí sinh hệ
+    chuyên và hệ không chuyên
+    Đối tượng xét tuyển: Thí sinh thuộc 1 trong 3 đối tượng sau:
+    Đối tượng 1: Thí sinh thuộc hệ chuyên, lớp chuyên Toán – Tin, Lý, Hóa, Văn và
+    Ngoại ngữ của các trường THPT trọng điểm quốc gia/chuyên. Đối tượng 2: Thí sinh
+    hệ không chuyên; hoặc hệ chuyên lớp chuyên (khác với các tổ hợp môn xét tuyển
+    của Nhà trường). Đối tượng 3: Thí sinh (hệ chuyên và không chuyên) có các chứng
+    chỉ năng lực quốc tế SAT, ACT hoặc A-level. Lưu ý: Áp dụng cho các chương trình
+    giảng dạy bằng tiếng Anh và các chương trình Chất lượng cao Ngôn ngữ thương mại.
+    Thời gian tuyển sinh: Dự kiến từ 22/5 – 31/5/2023. Kết quả được công bố trước
+    kỳ thi tốt nghiệp THPT. Cách thức đăng ký: Thí sinh đăng ký trực tuyến thông qua
+    hệ thống website của trường và nộp hồ sơ theo thông báo tuyển sinh của Nhà trường.
+    1.3 Phương thức 3: X��t tuyển kết hợp Chứng chỉ ngoại ngữ quốc tế và kết quả thi
+    tốt nghiệp THPT năm 2023
+    Áp dụng cho: Các chương trình giảng dạy bằng tiếng Anh và Ngôn ngữ thương mại.
+    Thời gian tuyển sinh: Dự kiến cuối tháng 7/2023, ngay sau khi có kết quả thi tốt
+    nghiệp THPT năm 2023. Cách thức đăng ký: Thí sinh đăng ký trực tuyến thông qua
+    hệ thống website của trường và nộp hồ sơ theo thông báo tuyển sinh của Nhà trường.
+    1.4 Phương thức 4: Xét tuyển dựa trên kết quả thi tốt nghiệp THPT năm 2023
+    Áp dụng cho: Các chương trình tiêu chuẩn và định hướng nghề nghiệp quốc tế.'
+  - 'Ứng dụng hạt nhân: Tìm hiểu về ứng dụng năng lượng hạt nhân, y học hạt nhân,
+    và xử lý chất thải hạt nhân trong công nghiệp. Kỹ thuật đo lường: Các kỹ thuật
+    đo đạc, phân tích phóng xạ, và các ứng dụng trong nghiên cứu khoa học. Vật lý
+    Y khoa
+    Mã tuyển sinh: 7520403
+    Tổ hợp xét tuyển: A00; A01; A02; D90
+    Thông tin liên hệ:
+    Địa chỉ: Trường Đại học Khoa học Tự nhiên TP.HCM, 227 Nguyễn Văn Cừ, Quận 5, TP.HCM
+    Điện thoại: (028) 38 585 726
+    Email: [email protected]
+    Website: https://www.hcmus.edu.vn
+    TỔNG QUAN CHƯƠNG TRÌNH
+    Chương trình đào tạo ngành Vật lý Y khoa tại Trường Đại học Khoa học Tự nhiên
+    TP.HCM cung cấp cho sinh viên kiến thức nền tảng về vật lý, sinh học và y học,
+    đặc biệt là các ứng dụng vật lý trong y tế. Sinh viên sẽ được trang bị kiến thức
+    về các thiết bị y tế như máy chụp X-quang, máy cộng hưởng từ (MRI), máy cắt lớp
+    vi tính (CT), và các phương pháp điều trị bằng tia xạ. Chương trình học kết hợp
+    lý thuyết với thực hành để sinh viên có thể ứng dụng các kiến thức trong các môi
+    trường y tế thực tế. CƠ SỞ VẬT CHẤT CHẤT LƯỢNG CAO
+    Trường sở hữu các phòng thí nghiệm hiện đại chuyên về vật lý y khoa, với các thiết
+    bị y tế tiên tiến phục vụ cho việc học tập và nghiên cứu. Các phòng thí nghiệm
+    này cung cấp cơ hội cho sinh viên thực hành với các thiết bị như máy chụp X-quang,
+    CT, MRI và các thiết bị điều trị ung thư bằng tia xạ. CÁC ĐIỂM ĐẶC BIỆT
+    Chương trình học kết hợp lý thuyết và thực hành: Sinh viên không chỉ học lý thuyết
+    mà còn được thực hành trên các thiết bị y tế hiện đại trong các bệnh viện và cơ
+    sở y tế. Liên kết với các bệnh viện và cơ sở y tế: Trường có các mối quan hệ hợp
+    tác với các bệnh viện lớn, giúp sinh viên có cơ hội thực tập và nghiên cứu tại
+    các cơ sở y tế có ứng dụng vật lý y khoa. Cơ hội du học và học bổng: Sinh viên
+    có thể tham gia các chương trình trao đổi sinh viên và nhận học bổng du học thạc
+    sĩ, tiến sĩ tại các trường đại học nổi tiếng trên thế giới. TRIỂN VỌNG NGHỀ NGHIỆP
+    & CƠ HỘI VIỆC LÀM
+    Sinh viên tốt nghiệp ngành Vật lý Y khoa có thể làm việc trong các lĩnh vực như:
+    Các bệnh viện, phòng khám và cơ sở y tế: Vật lý gia trong các bệnh viện và phòng
+    khám sử dụng thiết bị y tế như X-quang, CT, MRI, và các phương pháp điều trị tia
+    xạ.'
+- source_sentence: Các phương thức xét tuyển vào Trường Đại học Sư phạm TP Hồ Chí
+    Minh năm 2022 là gì?
+  sentences:
+  - 'Công nghệ thông tin, 7480201V, Tổ hợp: A00; A01; D01; D90, Điểm chuẩn: 25.97
+    13. An toàn thông tin, 7480202V, Tổ hợp: A00; A01; D01; D90, Điểm chuẩn: 24.89
+    14. Kỹ thuật dữ liệu, 7480203V, Tổ hợp: A00; A01; D01; D90, Điểm chuẩn: 24.05
+    15. Công nghệ kỹ thuật công trình xây dựng, 7510102V, Tổ hợp: A00; A01; D01; D90,
+    Điểm chuẩn: 23.05 16. Hệ thống kỹ thuật công trình xây dựng, 7510106V, Tổ hợp:
+    A00; A01; D01; D90, Điểm chuẩn: 22.22 17. Công nghệ kỹ thuật cơ khí, 7510201V,
+    Tổ hợp: A00; A01; D01; D90, Điểm chuẩn: 25.1 18. Công nghệ chế tạo máy, 7510202V,
+    Tổ hợp: A00; A01; D01; D90, Điểm chuẩn: 24.27 19. Công nghệ kỹ thuật cơ điện tử,
+    7510203V, Tổ hợp: A00; A01; D01; D90, Điểm chuẩn: 25.45 20. Công nghệ kỹ thuật
+    ô tô, 7510205V, Tổ hợp: A00; A01; D01; D90, Điểm chuẩn: 25.39 21.'
+  - '1. Phương thức xét tuyển
+    Trường Đại học Sư phạm TP Hồ Chí Minh xét tuyển đại học chính quy năm 2022 theo
+    các phương thức sau:
+    Xét tuyển thẳng
+    Ưu tiên xét tuyển và xét tuyển thí sinh là học sinh lớp chuyên
+    Xét kết quả thi tốt nghiệp THPT năm 2022
+    Xét học bạ THPT
+    Xét tuyển kết hợp thi tuyển với các ngành năng khiếu
+    Thông tin chi tiết về từng phương thức như sau:
+    A. Phương thức 1: Xét tuyển thẳng
+    Thực hiện theo quy định của Bộ GD&ĐT. B. Phương thức 2: Ưu tiên xét tuyển và xét
+    tuyển thí sinh lớp chuyên
+    a) Ưu tiên xét tuyển
+    Ngưỡng đảm bảo chất lượng đầu vào:
+    Áp dụng với thí sinh tốt nghiệp THPT năm 2022
+    Thí sinh xét tuyển ngành Giáo dục mầm non: Phải tham gia kỳ thi năng khiếu do
+    trường Đại học Sư phạm TPHCM tổ chức và >= 6.5 điểm. b) Xét tuyển thí sinh là
+    học sinh lớp chuyên
+    Đối tượng ưu tiên xét tuyển lớp chuyên: Thí sinh tốt nghiệp các trường THPT (theo
+    danh sách) có học lực lớp 12 chuyên năm 2021 – 2022 từ giỏi trở lên và đạt 1 trong
+    các điều kiện theo thứ tự ưu tiên vào ngành đúng hoặc gần đúng:
+    (1) Tham gia đội tuyển HSG quốc gia hoặc đội tuyển dự cuộc thi KHKT cấp quốc gia
+    (2) Đạt giải nhất, nhì, ba kỳ thi HSG do cấp tỉnh trở lên tổ chức
+    (3) Thí sinh có chứng chỉ ngoại ngữ B2 trở lên hoặc tương đương (áp dụng với các
+    ngành ngoại ngữ đúng hoặc gần)
+    (4) Có học lực lớp 10, 11 chuyên đạt học sinh giỏi.'
+  - '2. Xét tuyển sử dụng kết quả thi tốt nghiệp THPT năm 2024 (phương thức này áp
+    dụng với tất cả các ngành trừ Giáo dục Mầm non và Giáo dục Thể chất)
+    Ngưỡng điểm đảm bảo chất lượng đầu vào, điều kiện nhận hồ sơ đăng ký xét tuyển
+    được thông báo chính thức sau khi Bộ Giáo dục và Đào tạo xác định ngưỡng đảm bảo
+    chất lượng đầu vào đại học (căn cứ kết quả kỳ thi tốt nghiệp THPT năm 2024). 3.
+    Xét tuyển sử dụng kết quả học tập THPT (phương thức này áp dụng với tất cả các
+    ngành trừ Giáo dục Mầm non và Giáo dục Thể chất)
+    Phương thức xét tuyển này chỉ áp dụng đối với thí sinh tốt nghiệp THPT năm 2024
+    đồng thời phải thỏa một trong hai điều kiện sau:
+    + Có học lực lớp 12 xếp loại giỏi;
+    + Có điểm xét tốt nghiệp THPT từ 8,0 trở lên.'
+- source_sentence: Chương trình đào tạo của Đại học FPT có những điểm nổi bật nào?
+  sentences:
+  - 'Học phí: 11,900,000 VNĐ/mức. 3. Các khoản phí cần nộp khi nhập học
+    Học phí kỳ định hướng: 11,900,000 VNĐ (có 01 học kỳ định hướng, chỉ áp dụng cho
+    SV mới nhập học năm 2024)
+    Học phí 01 mức tiếng Anh chuẩn bị: 11,900,000 VNĐ/mức
+    1. Giáo trình chuẩn quốc tế
+    Chương trình học của Đại học FPT được thiết kế theo chuẩn của Hiệp hội Máy tính
+    (Association for Computing Machinery-ACM), chuẩn đào tạo kỹ sư phần mềm của Accreditation
+    Board for Engineering and Technology (ABET – Mỹ), Hiệp hội Phần mềm Việt Nam (VINASA)
+    Tất cả các giáo trình của Đại học FPT đều được nhập khẩu từ nước ngoài về và cập
+    nhật liên tục theo từng kỳ học
+    2. Chú trọng vào thực hành
+    Với thời lượng số tiết thực hành lên đến 60%, sinh viên Đại học FPT đã được làm
+    quen với công việc ngay từ trên ghế nhà trường. 1 lớp học tối đa 30 sinh viên
+    nên trong các giờ thực hành sinh viên sẽ được các thầy cô giảng viên hướng dẫn
+    rất tỉ mỉ và chi tiết
+    3. Chú trọng vào ngoại ngữ và kỹ năng mềm
+    Năm đầu tiên các bạn sinh viên sẽ phải trải qua 6 kỳ học tiếng Anh để có thể đạt
+    được nền tảng ngoại ngữ nhất định trước khi sang năm thứ 2 bước vào kỳ học chuyên
+    ngành. Ngoài tiếng Anh là ngôn ngữ bắt buộc ra thì sinh viên Đại học FPT còn được
+    học thêm tiếng Nhật. Như vậy sau khi ra trường các bạn sinh viên có thêm rất nhiều
+    cơ hội để có thể làm việc tại nước ngoài
+    Ngay từ những ngày đầu thành lập, đại học FPT đã đề cao việc phát triển bản thân
+    đặc biệt là kỹ năng mềm cho sinh viên. Các bạn sinh viên sẽ được học rất nhiều
+    các khoá học như kỹ năng thuyết trình, tư duy phản biện, kỹ năng làm việc nhóm,
+    kỹ năng phỏng vấn, kỹ năng viết CV .'
+  - STT Mã ngành Tên ngành Tổ hợp môn Điểm chuẩn Ghi chú 1 7220201 Ngôn ngữ Anh A;
+    B; C01; C02; C03; C04 21 2 7220204 Ngôn ngữ Trung Quốc A; B; C01; C02; C03;
+    C04 21 3 7220209 Ngôn ngữ Nhật A; B; C01; C02; C03; C04 21 4 7220210 Ngôn ngữ
+    Hàn Quốc A; B; C01; C02; C03; C04 21 5 7320106 Công nghệ truyền thông A; B;
+    C01; C02; C03; C04 21 6 7340101 Quản trị kinh doanh A; B; C01; C02; C03; C04 21
+    7 7480201 Công nghệ thông tin A; B; C01; C02; C03; C04 21 Phương thức xét tuyển
+    của fpt khá giống nhau.
+  - 'Các môn học được giảng dạy chủ yếu bằng tiếng Anh, giúp sinh viên phát triển
+    kỹ năng ngoại ngữ cũng như kiến thức chuyên môn trong lĩnh vực sinh học. Chương
+    trình chú trọng vào các kỹ năng nghiên cứu, phân tích và giải quyết vấn đề trong
+    môi trường sinh học toàn cầu. CƠ SỞ VẬT CHẤT CHẤT LƯỢNG CAO
+    Các sinh viên trong chương trình này sẽ được học tập và nghiên cứu trong một môi
+    trường quốc tế hóa, với các thiết bị nghiên cứu hiện đại và các cơ sở vật chất
+    đạt chuẩn quốc tế. Ngoài ra, các phòng thí nghiệm và khu vực học tập đều được
+    trang bị công nghệ tiên tiến phục vụ cho nghiên cứu chuyên sâu. CÁC ĐIỂM ĐẶC BIỆT
+    Học bằng tiếng Anh: Mọi môn học đều được giảng dạy bằng tiếng Anh, giúp sinh viên
+    nâng cao khả năng ngoại ngữ và tiếp cận kiến thức quốc tế. Cơ hội trao đổi quốc
+    tế: Sinh viên có cơ hội tham gia các chương trình trao đổi sinh viên, học tập
+    tại các trường đối tác quốc tế. Hỗ trợ nghề nghiệp: Chương trình cung cấp các
+    khóa huấn luyện và tư vấn nghề nghiệp, giúp sinh viên chuẩn bị tốt cho công việc
+    sau khi tốt nghiệp. TRIỂN VỌNG NGHỀ NGHIỆP & CƠ HỘI VIỆC LÀM
+    Với nền tảng kiến thức vững chắc và khả năng sử dụng tiếng Anh, sinh viên chương
+    trình tăng cường tiếng Anh có thể làm việc tại:
+    Các tổ chức nghiên cứu quốc tế, các viện nghiên cứu sinh học.'
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+metrics:
+- cosine_accuracy@1
+- cosine_accuracy@3
+- cosine_accuracy@5
+- cosine_accuracy@10
+- cosine_precision@1
+- cosine_precision@3
+- cosine_precision@5
+- cosine_precision@10
+- cosine_recall@1
+- cosine_recall@3
+- cosine_recall@5
+- cosine_recall@10
+- cosine_ndcg@10
+- cosine_mrr@10
+- cosine_map@100
+model-index:
+- name: SentenceTransformer based on dangvantuan/vietnamese-document-embedding
+  results:
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: dim 768
+      type: dim_768
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.466
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 0.63
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 0.678
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 0.752
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.466
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.21
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.1356
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.0752
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.466
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 0.63
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 0.678
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 0.752
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.6061109322735273
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.5599031746031748
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.5685727812660539
+      name: Cosine Map@100
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: dim 512
+      type: dim_512
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.464
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 0.626
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 0.684
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 0.746
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.464
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.20866666666666664
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.1368
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.0746
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.464
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 0.626
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 0.684
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 0.746
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.6015125723964755
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.55565
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.5647769486133264
+      name: Cosine Map@100
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: dim 256
+      type: dim_256
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.452
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 0.602
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 0.672
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 0.732
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.452
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.20066666666666666
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.13439999999999996
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.0732
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.452
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 0.602
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 0.672
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 0.732
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.5883841253468854
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.5428119047619047
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.5525017191408654
+      name: Cosine Map@100
+---
+# SentenceTransformer based on dangvantuan/vietnamese-document-embedding
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [dangvantuan/vietnamese-document-embedding](https://huggingface.co/dangvantuan/vietnamese-document-embedding). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [dangvantuan/vietnamese-document-embedding](https://huggingface.co/dangvantuan/vietnamese-document-embedding) <!-- at revision 6fa4e2f8ed2d33120b0f4442cc81f8f973c3f56b -->
+- **Maximum Sequence Length:** 8192 tokens
+- **Output Dimensionality:** 768 dimensions
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False, 'architecture': 'VietnameseModel'})
+  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+  (2): Normalize()
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("HoangVuSnape/vietnamese-document-embedding_pr_v3")
+# Run inference
+sentences = [
+    'Chương trình đào tạo của Đại học FPT có những điểm nổi bật nào?',
+    'Học phí: 11,900,000 VNĐ/mức. 3. Các khoản phí cần nộp khi nhập học\n\nHọc phí kỳ định hướng: 11,900,000 VNĐ (có 01 học kỳ định hướng, chỉ áp dụng cho SV mới nhập học năm 2024)\n\nHọc phí 01 mức tiếng Anh chuẩn bị: 11,900,000 VNĐ/mức\n\n1. Giáo trình chuẩn quốc tế\n\nChương trình học của Đại học FPT được thiết kế theo chuẩn của Hiệp hội Máy tính (Association for Computing Machinery-ACM), chuẩn đào tạo kỹ sư phần mềm của Accreditation Board for Engineering and Technology (ABET – Mỹ), Hiệp hội Phần mềm Việt Nam (VINASA)\n\nTất cả các giáo trình của Đại học FPT đều được nhập khẩu từ nước ngoài về và cập nhật liên tục theo từng kỳ học\n\n2. Chú trọng vào thực hành\n\nVới thời lượng số tiết thực hành lên đến 60%, sinh viên Đại học FPT đã được làm quen với công việc ngay từ trên ghế nhà trường. 1 lớp học tối đa 30 sinh viên nên trong các giờ thực hành sinh viên sẽ được các thầy cô giảng viên hướng dẫn rất tỉ mỉ và chi tiết\n\n3. Chú trọng vào ngoại ngữ và kỹ năng mềm\n\nNăm đầu tiên các bạn sinh viên sẽ phải trải qua 6 kỳ học tiếng Anh để có thể đạt được nền tảng ngoại ngữ nhất định trước khi sang năm thứ 2 bước vào kỳ học chuyên ngành. Ngoài tiếng Anh là ngôn ngữ bắt buộc ra thì sinh viên Đại học FPT còn được học thêm tiếng Nhật. Như vậy sau khi ra trường các bạn sinh viên có thêm rất nhiều cơ hội để có thể làm việc tại nước ngoài\n\nNgay từ những ngày đầu thành lập, đại học FPT đã đề cao việc phát triển bản thân đặc biệt là kỹ năng mềm cho sinh viên. Các bạn sinh viên sẽ được học rất nhiều các khoá học như kỹ năng thuyết trình, tư duy phản biện, kỹ năng làm việc nhóm, kỹ năng phỏng vấn, kỹ năng viết CV .',
+    'STT Mã ngành Tên ngành Tổ hợp môn Điểm chuẩn Ghi chú 1 7220201 Ngôn ngữ Anh A; B; C01; C02; C03; C04 21 2 7220204 Ngôn ngữ Trung Quốc A; B; C01; C02; C03; C04 21 3 7220209 Ngôn ngữ Nhật A; B; C01; C02; C03; C04 21 4 7220210 Ngôn ngữ Hàn Quốc A; B; C01; C02; C03; C04 21 5 7320106 Công nghệ truyền thông A; B; C01; C02; C03; C04 21 6 7340101 Quản trị kinh doanh A; B; C01; C02; C03; C04 21 7 7480201 Công nghệ thông tin A; B; C01; C02; C03; C04 21 Phương thức xét tuyển của fpt khá giống nhau.',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 768]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities)
+# tensor([[1.0000, 0.7188, 0.5351],
+#         [0.7188, 1.0000, 0.5165],
+#         [0.5351, 0.5165, 1.0000]])
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+## Evaluation
+### Metrics
+#### Information Retrieval
+* Dataset: `dim_768`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
+  ```json
+  {
+      "truncate_dim": 768
+  }
+  ```
+| Metric              | Value      |
+|:--------------------|:-----------|
+| cosine_accuracy@1   | 0.466      |
+| cosine_accuracy@3   | 0.63       |
+| cosine_accuracy@5   | 0.678      |
+| cosine_accuracy@10  | 0.752      |
+| cosine_precision@1  | 0.466      |
+| cosine_precision@3  | 0.21       |
+| cosine_precision@5  | 0.1356     |
+| cosine_precision@10 | 0.0752     |
+| cosine_recall@1     | 0.466      |
+| cosine_recall@3     | 0.63       |
+| cosine_recall@5     | 0.678      |
+| cosine_recall@10    | 0.752      |
+| **cosine_ndcg@10**  | **0.6061** |
+| cosine_mrr@10       | 0.5599     |
+| cosine_map@100      | 0.5686     |
+#### Information Retrieval
+* Dataset: `dim_512`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
+  ```json
+  {
+      "truncate_dim": 512
+  }
+  ```
+| Metric              | Value      |
+|:--------------------|:-----------|
+| cosine_accuracy@1   | 0.464      |
+| cosine_accuracy@3   | 0.626      |
+| cosine_accuracy@5   | 0.684      |
+| cosine_accuracy@10  | 0.746      |
+| cosine_precision@1  | 0.464      |
+| cosine_precision@3  | 0.2087     |
+| cosine_precision@5  | 0.1368     |
+| cosine_precision@10 | 0.0746     |
+| cosine_recall@1     | 0.464      |
+| cosine_recall@3     | 0.626      |
+| cosine_recall@5     | 0.684      |
+| cosine_recall@10    | 0.746      |
+| **cosine_ndcg@10**  | **0.6015** |
+| cosine_mrr@10       | 0.5556     |
+| cosine_map@100      | 0.5648     |
+#### Information Retrieval
+* Dataset: `dim_256`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
+  ```json
+  {
+      "truncate_dim": 256
+  }
+  ```
+| Metric              | Value      |
+|:--------------------|:-----------|
+| cosine_accuracy@1   | 0.452      |
+| cosine_accuracy@3   | 0.602      |
+| cosine_accuracy@5   | 0.672      |
+| cosine_accuracy@10  | 0.732      |
+| cosine_precision@1  | 0.452      |
+| cosine_precision@3  | 0.2007     |
+| cosine_precision@5  | 0.1344     |
+| cosine_precision@10 | 0.0732     |
+| cosine_recall@1     | 0.452      |
+| cosine_recall@3     | 0.602      |
+| cosine_recall@5     | 0.672      |
+| cosine_recall@10    | 0.732      |
+| **cosine_ndcg@10**  | **0.5884** |
+| cosine_mrr@10       | 0.5428     |
+| cosine_map@100      | 0.5525     |
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 500 training samples
+* Columns: <code>anchor</code> and <code>positive</code>
+* Approximate statistics based on the first 500 samples:
+  |         | anchor                                                                             | positive                                                                              |
+  |:--------|:-----------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|
+  | type    | string                                                                             | string                                                                                |
+  | details | <ul><li>min: 10 tokens</li><li>mean: 26.05 tokens</li><li>max: 62 tokens</li></ul> | <ul><li>min: 29 tokens</li><li>mean: 605.11 tokens</li><li>max: 6602 tokens</li></ul> |
+* Samples:
+  | anchor                                                                                                                                                     | positive                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+  |:-----------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>Ngành Quản lý Tài nguyên và Môi trường trang bị cho sinh viên những kiến thức và kỹ năng gì?</code>                                                  | <code>Sau khi tốt nghiệp, người học sẽ:<br><br>Có kiến thức cơ bản về toán học, khoa học tự nhiên, đáp ứng cho việc tiếp thu các kiến thức giáo dục chuyên nghiệp và khả năng học tập ở trình độ cao hơn<br><br>Có các kiến thức kỹ thuật cơ sở ngành và chuyên ngành giúp đủ năng lực phát hiện, giải quyết các vấn đề liên quan đến công nghệ sản xuất, chế tạo và ứng dụng vật liệu vào trong xây dựng, kiểm soát chất lượng nguyên vật li��u và cấu kiện sản phẩm xây dựng, nghiên cứu sản xuất chế tạo và phát triển các loại vật liệu mới, hiện đại, tiên tiến, độc đáo, hiệu quả, xanh, bền vững… nhằm hướng tới sự phát triển bền vững trong công nghiệp xây dựng và kiến trúc, thiết kế và thi công trong các công trình xây dựng; có tính sáng tạo trong hoạt động nghề nghiệp, có khả năng tự học và tự nghiên cứu;<br><br>Có kỹ năng cá nhân, nghề nghiệp, giao tiếp, làm việc nhóm đủ để làm việc trong môi trường làm việc liên ngành, đa văn hóa;<br><br>Có hiểu biết về kinh tế, chính trị, có các kiến thức cơ bản trong lĩnh vực khoa học xã hội và n...</code> |
+  | <code>Chương trình Kỹ thuật Môi trường đào tạo sinh viên về những năng lực nào và có điểm gì nổi bật đối với chương trình giảng dạy bằng tiếng Anh?</code> | <code>Sau khi tốt nghiệp, người học sẽ:<br><br>Có kiến thức cơ bản về toán học, khoa học tự nhiên, đáp ứng cho việc tiếp thu các kiến thức giáo dục chuyên nghiệp và khả năng học tập ở trình độ cao hơn<br><br>Có các kiến thức kỹ thuật cơ sở ngành và chuyên ngành giúp đủ năng lực phát hiện, giải quyết các vấn đề liên quan đến công nghệ sản xuất, chế tạo và ứng dụng vật liệu vào trong xây dựng, kiểm soát chất lượng nguyên vật liệu và cấu kiện sản phẩm xây dựng, nghiên cứu sản xuất chế tạo và phát triển các loại vật liệu mới, hiện đại, tiên tiến, độc đáo, hiệu quả, xanh, bền vững… nhằm hướng tới sự phát triển bền vững trong công nghiệp xây dựng và kiến trúc, thiết kế và thi công trong các công trình xây dựng; có tính sáng tạo trong hoạt động nghề nghiệp, có khả năng tự học và tự nghiên cứu;<br><br>Có kỹ năng cá nhân, nghề nghiệp, giao tiếp, làm việc nhóm đủ để làm việc trong môi trường làm việc liên ngành, đa văn hóa;<br><br>Có hiểu biết về kinh tế, chính trị, có các kiến thức cơ bản trong lĩnh vực khoa học xã hội và n...</code> |
+  | <code>Ngành Kỹ thuật Dầu khí và Kỹ thuật Địa chất tập trung nghiên cứu và ứng dụng những lĩnh vực cốt lõi nào?</code>                                      | <code>Các công ty nghiên cứu và khảo sát địa chất, tư vấn về nền móng công trình. Các tổ chức liên quan đến quy hoạch và phát triển đô thị. Kỹ thuật Dầu khí<br><br>Tổng quan<br><br>Kỹ thuật Dầu khí là ngành học chuyên nghiên cứu về các kỹ thuật khai thác, sản xuất và xử lý dầu khí. Sinh viên sẽ học các phương pháp khoan, khai thác dầu, khí tự nhiên, và xử lý các vấn đề kỹ thuật trong ngành dầu khí, từ việc tìm kiếm và khai thác tài nguyên cho đến việc tối ưu hóa quy trình sản xuất. CÁC ĐIỂM ĐẶC BIỆT<br><br>Khả năng ứng dụng cao: Sinh viên ngành Kỹ thuật Dầu khí sẽ được trang bị kiến thức thực tế về công nghệ khai thác dầu khí và các phương pháp tối ưu hóa sản xuất. Ngành công nghiệp chiến lược: Dầu khí vẫn là một trong những ngành công nghiệp mũi nhọn và cần nguồn nhân lực có trình độ cao trong việc khai thác và xử lý tài nguyên thiên nhiên. Triển vọng việc làm<br><br>Các công ty khai thác dầu khí trong nước và quốc tế. Các công ty tư vấn và kỹ thuật dầu khí, nghiên cứu các giải pháp tối ưu trong khai thác. Các côn...</code> |
+* Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
+  ```json
+  {
+      "loss": "MultipleNegativesRankingLoss",
+      "matryoshka_dims": [
+          768,
+          512,
+          256
+      ],
+      "matryoshka_weights": [
+          1,
+          1,
+          1
+      ],
+      "n_dims_per_step": -1
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: steps
+- `per_device_train_batch_size`: 16
+- `per_device_eval_batch_size`: 16
+- `gradient_accumulation_steps`: 8
+- `learning_rate`: 2e-05
+- `num_train_epochs`: 1
+- `lr_scheduler_type`: cosine
+- `warmup_ratio`: 0.1
+- `bf16`: True
+- `tf32`: True
+- `dataloader_drop_last`: True
+- `dataloader_num_workers`: 8
+- `load_best_model_at_end`: True
+- `batch_sampler`: no_duplicates
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: steps
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 16
+- `per_device_eval_batch_size`: 16
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 8
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 2e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 1
+- `max_steps`: -1
+- `lr_scheduler_type`: cosine
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.1
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: True
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: True
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: True
+- `dataloader_num_workers`: 8
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: True
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch_fused
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `hub_revision`: None
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `liger_kernel_config`: None
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: None
+- `batch_sampler`: no_duplicates
+- `multi_dataset_batch_sampler`: proportional
+- `router_mapping`: {}
+- `learning_rate_mapping`: {}
+</details>
+### Training Logs
+| Epoch | Step | dim_768_cosine_ndcg@10 | dim_512_cosine_ndcg@10 | dim_256_cosine_ndcg@10 |
+|:-----:|:----:|:----------------------:|:----------------------:|:----------------------:|
+| -1    | -1   | 0.6061                 | 0.6015                 | 0.5884                 |
+### Framework Versions
+- Python: 3.10.12
+- Sentence Transformers: 5.1.0
+- Transformers: 4.55.2
+- PyTorch: 2.8.0+cu128
+- Accelerate: 1.10.0
+- Datasets: 4.0.0
+- Tokenizers: 0.21.4
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### MatryoshkaLoss
+```bibtex
+@misc{kusupati2024matryoshka,
+    title={Matryoshka Representation Learning},
+    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
+    year={2024},
+    eprint={2205.13147},
+    archivePrefix={arXiv},
+    primaryClass={cs.LG}
+}
+```
+#### MultipleNegativesRankingLoss
+```bibtex
+@misc{henderson2017efficient,
+    title={Efficient Natural Language Response Suggestion for Smart Reply},
+    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
+    year={2017},
+    eprint={1705.00652},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "architectures": [
+    "VietnameseModel"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "auto_map": {
+    "AutoConfig": "configuration.VietnameseConfig",
+    "AutoModel": "modeling.VietnameseModel",
+    "AutoModelForMaskedLM": "dangvantuan/Vietnamese_impl--modeling.VietnameseForMaskedLM",
+    "AutoModelForMultipleChoice": "dangvantuan/Vietnamese_impl--modeling.VietnameseForMultipleChoice",
+    "AutoModelForQuestionAnswering": "dangvantuan/Vietnamese_impl--modeling.VietnameseForQuestionAnswering",
+    "AutoModelForSequenceClassification": "dangvantuan/Vietnamese_impl--modeling.VietnameseForSequenceClassification",
+    "AutoModelForTokenClassification": "dangvantuan/Vietnamese_impl--modeling.VietnameseForTokenClassification"
+  },
+  "classifier_dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "layer_norm_type": "layer_norm",
+  "logn_attention_clip1": false,
+  "logn_attention_scale": false,
+  "max_position_embeddings": 8192,
+  "model_type": "Vietnamese",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pack_qkv": true,
+  "pad_token_id": 1,
+  "position_embedding_type": "rope",
+  "rope_scaling": {
+    "factor": 8.0,
+    "type": "ntk"
+  },
+  "rope_theta": 20000,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.2",
+  "type_vocab_size": 1,
+  "unpad_inputs": false,
+  "use_memory_efficient_attention": false,
+  "vocab_size": 250048
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "__version__": {
+    "sentence_transformers": "5.1.0",
+    "transformers": "4.55.2",
+    "pytorch": "2.8.0+cu128"
+  },
+  "prompts": {
+    "query": "",
+    "document": ""
+  },
+  "default_prompt_name": null,
+  "model_type": "SentenceTransformer",
+  "similarity_fn_name": "cosine"
+}

configuration.py ADDED Viewed

	@@ -0,0 +1,114 @@

+# limitations under the License.
+""" Vietnamese model configuration"""
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+class VietnameseConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`VietnameseModel`] or a [`TFVietnameseModel`]. It is used to
+    instantiate a Vietnamese model according to the specified arguments, defining the model architecture. Instantiating a
+    configuration with the defaults will yield a similar configuration to that of the Vietnamese
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        vocab_size (`int`, *optional*, defaults to 30522):
+            Vocabulary size of the Vietnamese model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`VietnameseModel`] or [`TFVietnameseModel`].
+        hidden_size (`int`, *optional*, defaults to 768):
+            Dimensionality of the encoder layers and the pooler layer.
+        num_hidden_layers (`int`, *optional*, defaults to 12):
+            Number of hidden layers in the Transformer encoder.
+        num_attention_heads (`int`, *optional*, defaults to 12):
+            Number of attention heads for each attention layer in the Transformer encoder.
+        intermediate_size (`int`, *optional*, defaults to 3072):
+            Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
+        hidden_act (`str` or `Callable`, *optional*, defaults to `"gelu"`):
+            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
+            `"relu"`, `"silu"` and `"gelu_Vietnamese"` are supported.
+        hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
+        attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
+            The dropout ratio for the attention probabilities.
+        max_position_embeddings (`int`, *optional*, defaults to 512):
+            The maximum sequence length that this model might ever be used with. Typically set this to something large
+            just in case (e.g., 512 or 1024 or 2048).
+        type_vocab_size (`int`, *optional*, defaults to 2):
+            The vocabulary size of the `token_type_ids` passed when calling [`VietnameseModel`] or [`TFVietnameseModel`].
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        layer_norm_eps (`float`, *optional*, defaults to 1e-12):
+            The epsilon used by the layer normalization layers.
+        position_embedding_type (`str`, *optional*, defaults to `"rope"`):
+            Type of position embedding. Choose one of `"absolute"`, `"rope"`.
+        rope_theta (`float`, *optional*, defaults to 10000.0):
+            The base period of the RoPE embeddings.
+        rope_scaling (`Dict`, *optional*):
+            Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling
+            strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is
+            `{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update
+            `max_position_embeddings` to the expected new maximum. See the following thread for more information on how
+            these scaling strategies behave:
+            https://www.reddit.com/r/LocalLLaMA/comments/14mrgpr/dynamically_scaled_rope_further_increases/. This is an
+            experimental feature, subject to breaking API changes in future versions.
+        classifier_dropout (`float`, *optional*):
+            The dropout ratio for the classification head.
+    Examples:
+    """
+    model_type = "Vietnamese"
+    def __init__(
+        self,
+        vocab_size=30528,
+        hidden_size=768,
+        num_hidden_layers=12,
+        num_attention_heads=12,
+        intermediate_size=3072,
+        hidden_act="gelu",
+        hidden_dropout_prob=0.1,
+        attention_probs_dropout_prob=0.0,
+        max_position_embeddings=2048,
+        type_vocab_size=1,
+        initializer_range=0.02,
+        layer_norm_type='layer_norm',
+        layer_norm_eps=1e-12,
+        # pad_token_id=0,
+        position_embedding_type="rope",
+        rope_theta=10000.0,
+        rope_scaling=None,
+        classifier_dropout=None,
+        pack_qkv=True,
+        unpad_inputs=False,
+        use_memory_efficient_attention=False,
+        logn_attention_scale=False,
+        logn_attention_clip1=False,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.hidden_act = hidden_act
+        self.intermediate_size = intermediate_size
+        self.hidden_dropout_prob = hidden_dropout_prob
+        self.attention_probs_dropout_prob = attention_probs_dropout_prob
+        self.max_position_embeddings = max_position_embeddings
+        self.type_vocab_size = type_vocab_size
+        self.initializer_range = initializer_range
+        self.layer_norm_type = layer_norm_type
+        self.layer_norm_eps = layer_norm_eps
+        self.position_embedding_type = position_embedding_type
+        self.rope_theta = rope_theta
+        self.rope_scaling = rope_scaling
+        self.classifier_dropout = classifier_dropout
+        self.pack_qkv = pack_qkv
+        self.unpad_inputs = unpad_inputs
+        self.use_memory_efficient_attention = use_memory_efficient_attention
+        self.logn_attention_scale = logn_attention_scale
+        self.logn_attention_clip1 = logn_attention_clip1

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26b7c934bc01c424e66fca55f931b702b22940ca9af56b98d849eeaeca96ca09
+size 1221487872

modeling.py ADDED Viewed

	@@ -0,0 +1,1319 @@

+"""PyTorch Vietnamese model."""
+import math
+from dataclasses import dataclass
+from typing import List, Optional, Tuple, Union
+import torch
+import torch.utils.checkpoint
+from torch import nn
+from transformers.activations import ACT2FN
+from transformers.modeling_outputs import (
+    BaseModelOutput,
+    BaseModelOutputWithPooling,
+    MaskedLMOutput,
+    MultipleChoiceModelOutput,
+    QuestionAnsweringModelOutput,
+    SequenceClassifierOutput,
+    ModelOutput,
+)
+from transformers.modeling_utils import PreTrainedModel
+from transformers.utils import logging
+try:
+    import xformers.ops as xops
+except ImportError as e:
+    xops = None
+from .configuration import VietnameseConfig
+logger = logging.get_logger(__name__)
+# Adapted from https://github.com/HazyResearch/flash-attention/blob/main/flash_attn/bert_padding.py
+# Which was adapted from https://github.com/mlcommons/training_results_v1.1/blob/main/NVIDIA/benchmarks/bert/implementations/pytorch/padding.py
+class IndexFirstAxis(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, input, indices):
+        ctx.save_for_backward(indices)
+        assert input.ndim >= 2
+        ctx.first_axis_dim, other_shape = input.shape[0], input.shape[1:]
+        second_dim = other_shape.numel()
+        return torch.gather(
+            input.view(ctx.first_axis_dim, second_dim),
+            0,
+            indices.unsqueeze(-1).expand(indices.size(0), second_dim)
+        ).reshape(-1, *other_shape)
+    @staticmethod
+    def backward(ctx, grad_output):
+        (indices,) = ctx.saved_tensors
+        assert grad_output.ndim >= 2
+        other_shape = grad_output.shape[1:]
+        grad_output = grad_output.view(grad_output.size(0), other_shape.numel())
+        grad_input = torch.zeros(
+            [ctx.first_axis_dim, grad_output.shape[1]],
+            device=grad_output.device,
+            dtype=grad_output.dtype,
+        )
+        grad_input.scatter_(
+            0, indices.unsqueeze(-1).expand(indices.size(0), grad_output.size(1)), grad_output
+        )
+        return grad_input.reshape(ctx.first_axis_dim, *other_shape), None
+index_first_axis = IndexFirstAxis.apply
+def unpad_input(hidden_states, attention_mask=None, indices=None):
+    """
+    Arguments:
+        hidden_states: (batch, seqlen, ...)
+        attention_mask: (batch, seqlen), bool / int, 1 means valid and 0 means not valid.
+        indices: (total_nnz), the indices of non-masked tokens from the flattened input sequence.
+    Return:
+        hidden_states: (total_nnz, ...), where total_nnz = number of tokens in selected in attention_mask.
+    """
+    if indices is None:
+        assert attention_mask is not None
+        indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
+    hidden_states = hidden_states.view(-1, *hidden_states.shape[2:])
+    return index_first_axis(hidden_states, indices)
+class IndexPutFirstAxis(torch.autograd.Function):
+    @staticmethod
+    def forward(
+        ctx,
+        values: torch.Tensor,
+        indices: torch.Tensor,
+        first_axis_dim
+    ) -> torch.Tensor:
+        ctx.save_for_backward(indices)
+        assert indices.ndim == 1
+        assert values.ndim >= 2
+        output = torch.zeros(
+            first_axis_dim, *values.shape[1:], device=values.device, dtype=values.dtype
+        )
+        output[indices] = values
+        return output
+    @staticmethod
+    def backward(ctx, grad_output: torch.Tensor) -> Tuple[torch.Tensor, None, None]:
+        indices, = ctx.saved_tensors
+        grad_values = grad_output[indices]
+        return grad_values, None, None
+index_put_first_axis = IndexPutFirstAxis.apply
+def pad_input(inputs: torch.Tensor, indices: torch.Tensor, batch: int, seqlen: int) -> torch.Tensor:
+    """Add padding to sequences.
+    Arguments:
+        inputs: (total_nnz, ...), where total_nnz = number of tokens in selected in attention_mask.
+        indices: (total_nnz), `indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()`
+        batch: int batch_size
+        seqlen: int max sequence length
+    Returns:
+        inputs: (batch, seqlen, ...)
+    """
+    output = index_put_first_axis(inputs, indices, batch * seqlen)
+    return output.view(batch, seqlen, *inputs.shape[1:])
+def rotate_half(x):
+    """Rotates half the hidden dims of the input."""
+    x1 = x[..., : x.shape[-1] // 2]
+    x2 = x[..., x.shape[-1] // 2 :]
+    return torch.cat((-x2, x1), dim=-1)
+def apply_rotary_pos_emb(q, k, cos, sin):
+    """Applies Rotary Position Embedding to the query and key tensors.
+    Args:
+        q (`torch.Tensor`): The query tensor.
+        k (`torch.Tensor`): The key tensor.
+        cos (`torch.Tensor`): The cosine part of the rotary embedding.
+        sin (`torch.Tensor`): The sine part of the rotary embedding.
+    Returns:
+        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
+    """
+    cos, sin = cos.to(q.dtype), sin.to(q.dtype)
+    q_embed = (q * cos) + (rotate_half(q) * sin)
+    k_embed = (k * cos) + (rotate_half(k) * sin)
+    return q_embed, k_embed
+class RotaryEmbedding(torch.nn.Module):
+    def __init__(self, dim, max_position_embeddings=512, base=10000.0, device=None):
+        super().__init__()
+        self.dim = dim
+        self.max_position_embeddings = max_position_embeddings
+        self.base = base
+        inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
+        self.register_buffer("inv_freq", inv_freq, persistent=False)
+        self._set_cos_sin_cache(
+            seq_len=max_position_embeddings, device=self.inv_freq.device, dtype=torch.get_default_dtype()
+        )
+    def _set_cos_sin_cache(self, seq_len, device, dtype):
+        self.max_seq_len_cached = seq_len
+        t = torch.arange(self.max_seq_len_cached, device=device, dtype=torch.float32)
+        freqs = torch.einsum("i,j->ij", t, self.inv_freq)
+        emb = torch.cat((freqs, freqs), dim=-1)
+        self.register_buffer("cos_cached", emb.cos().to(dtype), persistent=False)
+        self.register_buffer("sin_cached", emb.sin().to(dtype), persistent=False)
+    def forward(self, x, seq_len=None):
+        if seq_len > self.max_seq_len_cached:
+            self._set_cos_sin_cache(seq_len=seq_len, device=x.device, dtype=x.dtype)
+        return (
+            self.cos_cached[:seq_len, ...].to(dtype=x.dtype),
+            self.sin_cached[:seq_len, ...].to(dtype=x.dtype),
+        )
+class NTKScalingRotaryEmbedding(RotaryEmbedding):
+    """RotaryEmbedding extended with fixed and mixed NTK scaling. https://kexue.fm/archives/9706 """
+    def __init__(self, dim, max_position_embeddings=512, base=10000, device=None, scaling_factor=1.0, mixed_b=None):
+        self.scaling_factor = scaling_factor
+        self.mixed_b = mixed_b
+        super().__init__(dim, max_position_embeddings, base, device)
+        max_position_embeddings = max_position_embeddings * self.scaling_factor
+        self._set_cos_sin_cache(max_position_embeddings, self.inv_freq.device, torch.get_default_dtype())
+    def _set_cos_sin_cache(self, seq_len, device, dtype):
+        self.max_seq_len_cached = seq_len
+        if seq_len > self.max_position_embeddings:
+            base = self.base * (self.scaling_factor if self.mixed_b is None else 1)
+            inv_freq = 1.0 / (base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
+            if self.mixed_b is None:
+                inv_freq = inv_freq / self.scaling_factor ** (2 / self.dim)
+            else:
+                a = torch.tensor(self.scaling_factor).log() / (self.dim / 2) ** self.mixed_b
+                lambda_1_m = (a * torch.arange(1, self.dim // 2 + 1).float().to(device) ** self.mixed_b).exp()
+                inv_freq = inv_freq / lambda_1_m
+            self.register_buffer("inv_freq", inv_freq, persistent=False)
+        t = torch.arange(self.max_seq_len_cached, device=device, dtype=torch.float32)
+        freqs = torch.einsum("i,j->ij", t, self.inv_freq)
+        emb = torch.cat((freqs, freqs), dim=-1)
+        self.register_buffer("cos_cached", emb.cos().to(dtype), persistent=False)
+        self.register_buffer("sin_cached", emb.sin().to(dtype), persistent=False)
+class RMSNorm(nn.Module):
+    def __init__(self, hidden_size, eps=1e-6):
+        """
+        RMSNorm is equivalent to T5LayerNorm
+        """
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(hidden_size))
+        self.variance_epsilon = eps
+    def forward(self, hidden_states):
+        input_dtype = hidden_states.dtype
+        hidden_states = hidden_states.to(torch.float32)
+        variance = hidden_states.pow(2).mean(-1, keepdim=True)
+        hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
+        return self.weight * hidden_states.to(input_dtype)
+LAYER_NORM = {
+    'layer_norm': nn.LayerNorm,
+    'rms_norm': RMSNorm
+}
+class VietnameseEmbeddings(nn.Module):
+    """
+    Embedding and Unpadding.
+    """
+    def __init__(self, config: VietnameseConfig):
+        super().__init__()
+        self.padding_idx = config.pad_token_id
+        self.word_embeddings = nn.Embedding(
+            config.vocab_size, config.hidden_size, padding_idx=self.padding_idx
+        )
+        self.position_embedding_type = config.position_embedding_type
+        if self.position_embedding_type == 'absolute':
+            self.position_embeddings = nn.Embedding(
+                config.max_position_embeddings, config.hidden_size, padding_idx=self.padding_idx
+            )
+        elif self.position_embedding_type == 'rope':
+            self._init_rope(config)
+        else:
+            raise ValueError
+        self.type_vocab_size = config.type_vocab_size
+        if self.type_vocab_size > 0:
+            self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
+        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.register_buffer(
+            "position_ids", torch.arange(config.max_position_embeddings), persistent=False
+        )
+    def _init_rope(self, config):
+        kwargs = dict(
+            dim=int(config.hidden_size / config.num_attention_heads),
+            max_position_embeddings=config.max_position_embeddings,
+            base=config.rope_theta
+        )
+        if config.rope_scaling is None:
+            self.rotary_emb = RotaryEmbedding(**kwargs)
+        else:
+            kwargs.update(scaling_factor=config.rope_scaling["factor"])
+            scaling_type = config.rope_scaling["type"]
+            if scaling_type == 'ntk':
+                kwargs.update(mixed_b=config.rope_scaling.get('mixed_b', None))
+                self.rotary_emb = NTKScalingRotaryEmbedding(**kwargs)
+            else:
+                raise ValueError(f"Unknown RoPE scaling type {scaling_type}")
+    def forward(
+        self,
+        unpad_inputs: bool,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        length: Optional[List[int]] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+        inputs_embeds: Optional[torch.Tensor] = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor, Optional[Tuple], Optional[List[int]]]:
+        if inputs_embeds is None:
+            device, input_shape = input_ids.device, input_ids.shape
+        else:
+            device, input_shape = inputs_embeds.device, inputs_embeds.shape[:2]
+        batch_size, seq_length = input_shape
+        if attention_mask is None:
+            attention_mask = torch.ones(input_shape, device=device)
+            if length is not None:
+                for i, l in enumerate(length):
+                    attention_mask[i, l:] = 0
+        if unpad_inputs:
+            attention_mask_bool = attention_mask.bool()
+            if length is None:
+                length = attention_mask.sum(-1).tolist()
+        if inputs_embeds is None:
+            if unpad_inputs:
+                input_ids = input_ids[attention_mask_bool].unsqueeze(0)
+            inputs_embeds = self.word_embeddings(input_ids)
+        else:
+            if unpad_inputs:
+                inputs_embeds = inputs_embeds[attention_mask_bool].unsqueeze(0)
+        embeddings = inputs_embeds
+        if position_ids is None:
+            if seq_length > self.position_ids.size(0):
+                self.register_buffer(
+                    "position_ids", torch.arange(seq_length, device=embeddings.device), persistent=False
+                )
+            if unpad_inputs:
+                position_ids = torch.cat([self.position_ids[:l] for l in length]).unsqueeze(0)
+            else:
+                position_ids = self.position_ids[:seq_length].expand(batch_size, -1)
+        elif unpad_inputs:
+            position_ids = position_ids[attention_mask_bool].unsqueeze(0)
+        if self.position_embedding_type == 'rope':
+            rope_cos, rope_sin = self.rotary_emb(inputs_embeds, seq_len=seq_length)
+            rope_cos = rope_cos[position_ids].unsqueeze(2)
+            rope_sin = rope_sin[position_ids].unsqueeze(2)
+            rope_embeds = rope_cos, rope_sin
+        else:
+            rope_embeds = None
+        if self.type_vocab_size > 0:
+            if token_type_ids is None:
+                token_type_ids = position_ids.mul(0)
+            else:
+                if self.type_vocab_size < 2:
+                    token_type_ids.mul_(0)
+                if unpad_inputs:
+                    token_type_ids = token_type_ids[attention_mask_bool].unsqueeze(0)
+            token_type_embeddings = self.token_type_embeddings(token_type_ids)
+            embeddings = embeddings + token_type_embeddings
+        if self.position_embedding_type == "absolute":
+            position_embeddings = self.position_embeddings(position_ids)
+            embeddings = embeddings + position_embeddings
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings, attention_mask, rope_embeds, length
+class VietnameseAttention(nn.Module):
+    def __init__(self, config: VietnameseConfig, pack_qkv=None, use_memory_efficient_attention=None):
+        super().__init__()
+        self.config = config
+        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
+            raise ValueError(
+                f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
+                f"heads ({config.num_attention_heads})"
+            )
+        self.hidden_size = config.hidden_size
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+        if pack_qkv is None:
+            pack_qkv = config.pack_qkv
+        self.pack_qkv = pack_qkv
+        if self.pack_qkv:
+            self.qkv_proj = nn.Linear(config.hidden_size, self.all_head_size * 3, bias=True)
+        else:
+            self.q_proj = nn.Linear(config.hidden_size, self.all_head_size, bias=True)
+            self.k_proj = nn.Linear(config.hidden_size, self.all_head_size, bias=True)
+            self.v_proj = nn.Linear(config.hidden_size, self.all_head_size, bias=True)
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+        self.o_proj = nn.Linear(config.hidden_size, config.hidden_size, bias=True)
+        if use_memory_efficient_attention is None:
+            use_memory_efficient_attention = self.config.use_memory_efficient_attention
+        self.use_memory_efficient_attention = use_memory_efficient_attention
+        self.memory_efficient_attention = None if xops is None else xops.memory_efficient_attention
+        if self.use_memory_efficient_attention:
+            assert self.memory_efficient_attention is not None, 'please install xformers'
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_bias: torch.FloatTensor,
+        rope_embeds: Optional[Tuple[torch.FloatTensor, torch.FloatTensor]] = None,
+        padding_inputs: Optional[Tuple] = None,
+        attention_scale: Optional[torch.FloatTensor] = None,
+        head_mask: Optional[torch.FloatTensor] = None,
+        output_attentions: Optional[bool] = False,
+        qkv_inputs: Optional[Tuple] = None,
+    ) -> Tuple[torch.Tensor, ...]:
+        shape_hd = (self.num_attention_heads, self.attention_head_size)
+        if self.pack_qkv and qkv_inputs is None:
+            qkv_pack = self.qkv_proj(hidden_states).split(self.all_head_size, dim=-1)
+        else:
+            if qkv_inputs is None:
+                qkv_inputs = (hidden_states, hidden_states, hidden_states)
+            qkv_pack = [
+                getattr(self, n + '_proj')(s) for s, n in zip(qkv_inputs, 'qkv')
+            ]
+        query_states, key_states, value_states = [t.view(t.shape[:-1] + shape_hd) for t in qkv_pack]
+        if self.config.position_embedding_type == 'rope':
+            query_states, key_states = apply_rotary_pos_emb(query_states, key_states, *rope_embeds)
+        dtype = query_states.dtype
+        if self.config.logn_attention_scale and attention_scale is not None:
+            query_states = query_states * attention_scale.to(dtype)
+        if padding_inputs is not None:
+            query_states = pad_input(query_states.squeeze(), *padding_inputs)
+            key_states = pad_input(key_states.squeeze(), *padding_inputs)
+            value_states = pad_input(value_states.squeeze(), *padding_inputs)
+        if self.use_memory_efficient_attention:
+            assert self.memory_efficient_attention is not None, "xformers is not loaded"
+            assert output_attentions is False, "memory_efficient_attention do not output attentions"
+            assert head_mask is None, "Not support yet"
+            attention_probs = None
+            if torch.is_tensor(attention_bias):
+                attention_bias = attention_bias.to(dtype)
+            context_layer = self.memory_efficient_attention(
+                query_states,
+                key_states,
+                value_states,
+                attn_bias=attention_bias,
+                p=self.dropout.p
+            )
+        else:
+            if output_attentions and isinstance(self, VietnameseSdpaAttention):
+                raise RuntimeError("SDPA do not output attentions")
+            context_layer, attention_probs = self._attention(
+                query_states, key_states, value_states, attention_bias, head_mask
+            )
+        if padding_inputs is not None:
+            context_layer = unpad_input(context_layer, indices=padding_inputs[0])
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(new_context_layer_shape)
+        attn_output = self.o_proj(context_layer)
+        outputs = (attn_output, attention_probs) if output_attentions else (attn_output,)
+        return outputs
+    def _attention(self, query_states, key_states, value_states, attention_bias, head_mask):
+        query_states = query_states.transpose(1, 2)
+        key_states = key_states.transpose(1, 2)
+        value_states = value_states.transpose(1, 2)
+        attention_scores = torch.matmul(query_states, key_states.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        if attention_bias is not None:
+            attention_scores = attention_scores + attention_bias
+        attention_probs = nn.functional.softmax(attention_scores, dim=-1)
+        if self.dropout.p > 0:
+            attention_probs = self.dropout(attention_probs)
+        if head_mask is not None:
+            attention_probs = attention_probs * head_mask
+        context_layer = torch.matmul(attention_probs, value_states)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        return context_layer, attention_probs
+class VietnameseSdpaAttention(VietnameseAttention):
+    """
+    Vietnamese attention module using torch.nn.functional.scaled_dot_product_attention. This module inherits from
+    `VietnameseAttention` as the weights of the module stays untouched. The only changes are on the forward pass to adapt to
+    SDPA API.
+    """
+    def __init__(self, config: VietnameseConfig, **kwargs):
+        super().__init__(config, **kwargs)
+    def _attention(self, query_states, key_states, value_states, attention_bias, head_mask):
+        attn_output = torch.nn.functional.scaled_dot_product_attention(
+            query_states.transpose(1, 2),
+            key_states.transpose(1, 2),
+            value_states.transpose(1, 2),
+            attn_mask=attention_bias,
+            dropout_p=self.dropout.p if self.training else 0.0,
+        )
+        attn_output = attn_output.permute(0, 2, 1, 3).contiguous()
+        return attn_output, None
+Vietnamese_ATTENTION_CLASSES = {
+    "eager": VietnameseAttention,
+    "sdpa": VietnameseSdpaAttention,
+}
+class VietnameseGatedMLP(nn.Module):
+    """
+    GLU Variants Improve Transformer.
+    """
+    def __init__(self, config: VietnameseConfig):
+        super().__init__()
+        self.intermediate_size = config.intermediate_size
+        self.up_gate_proj = nn.Linear(config.hidden_size, self.intermediate_size * 2, bias=False)
+        self.down_proj = nn.Linear(self.intermediate_size, config.hidden_size, bias=True)
+        self.act_fn = ACT2FN[config.hidden_act]
+        if config.hidden_dropout_prob > 0:
+            self.hidden_dropout = nn.Dropout(config.hidden_dropout_prob)
+        else:
+            self.hidden_dropout = None
+    def forward(self, hidden_states):
+        up_gate = self.up_gate_proj(hidden_states)
+        up_states, gate = torch.split(up_gate, self.intermediate_size, dim=-1)
+        gate = self.act_fn(gate)
+        gated_states = gate * up_states
+        if self.hidden_dropout is not None:
+            gated_states = self.hidden_dropout(gated_states)
+        down_states = self.down_proj(gated_states)
+        return down_states
+class VietnameseLayer(nn.Module):
+    def __init__(
+        self,
+        config: VietnameseConfig,
+        pack_qkv=None,
+        use_memory_efficient_attention=None,
+        attn_implementation=None
+    ):
+        super().__init__()
+        if attn_implementation is None:
+            attn_implementation = config._attn_implementation
+        if use_memory_efficient_attention is None:
+            use_memory_efficient_attention = config.use_memory_efficient_attention
+        if use_memory_efficient_attention:
+            if attn_implementation != 'eager':
+                logger.warning_once(f"Override {attn_implementation=} to 'eager' as {use_memory_efficient_attention=}")
+                attn_implementation = 'eager'
+        self.attention = Vietnamese_ATTENTION_CLASSES[attn_implementation](
+            config, pack_qkv=pack_qkv, use_memory_efficient_attention=use_memory_efficient_attention
+        )
+        self.mlp = VietnameseGatedMLP(config)
+        ln_class = LAYER_NORM[config.layer_norm_type]
+        self.attn_ln = ln_class(config.hidden_size, eps=config.layer_norm_eps)
+        self.mlp_ln = ln_class(config.hidden_size, eps=config.layer_norm_eps)
+        if config.hidden_dropout_prob > 0:
+            self.hidden_dropout = nn.Dropout(config.hidden_dropout_prob)
+        else:
+            self.hidden_dropout = None
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_bias: torch.FloatTensor,
+        rope_embeds: Optional[Tuple[torch.FloatTensor, torch.FloatTensor]] = None,
+        padding_inputs: Optional[Tuple] = None,
+        attention_scale: Optional[torch.FloatTensor] = None,
+        subset_indices: Optional[torch.LongTensor] = None,
+        head_mask: Optional[torch.FloatTensor] = None,
+        output_attentions: Optional[bool] = False,
+        qkv_inputs: Optional[Tuple] = None,
+    ) -> Tuple[torch.Tensor, ...]:
+        residual = hidden_states if qkv_inputs is None else qkv_inputs[0]
+        attention_outputs = self.attention(
+            hidden_states,
+            attention_bias,
+            rope_embeds,
+            padding_inputs,
+            attention_scale,
+            head_mask,
+            output_attentions=output_attentions,
+            qkv_inputs=qkv_inputs,
+        )
+        hidden_states = attention_outputs[0]
+        if self.hidden_dropout is not None:
+            hidden_states = self.hidden_dropout(hidden_states)
+        hidden_states = residual + hidden_states
+        if subset_indices is not None:
+            hidden_states = hidden_states[subset_indices]
+        hidden_states = self.attn_ln(hidden_states)
+        residual = hidden_states
+        hidden_states = self.mlp(hidden_states)
+        if self.hidden_dropout is not None:
+            hidden_states = self.hidden_dropout(hidden_states)
+        hidden_states = residual + hidden_states
+        hidden_states = self.mlp_ln(hidden_states)
+        outputs = (hidden_states,) + attention_outputs[1:]
+        return outputs
+class VietnameseEncoder(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.layer = nn.ModuleList([VietnameseLayer(config) for _ in range(config.num_hidden_layers)])
+        self.gradient_checkpointing = False
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_bias: Optional[torch.FloatTensor] = None,
+        rope_embeds: Optional[Tuple[torch.FloatTensor, torch.FloatTensor]] = None,
+        padding_inputs: Optional[Tuple] = None,
+        attention_scale: Optional[torch.FloatTensor] = None,
+        subset_indices: Optional[torch.LongTensor] = None,
+        head_mask: Optional[torch.FloatTensor] = None,
+        output_attentions: Optional[bool] = False,
+        output_hidden_states: Optional[bool] = False,
+        return_dict: Optional[bool] = True,
+    ) -> Union[Tuple[torch.Tensor], BaseModelOutput]:
+        all_hidden_states = () if output_hidden_states else None
+        all_self_attentions = () if output_attentions else None
+        for i, layer_module in enumerate(self.layer):
+            if output_hidden_states:
+                all_hidden_states = all_hidden_states + (hidden_states,)
+            if i >= len(self.layer) - 1:
+                layer_subset_indices = subset_indices
+            else:
+                layer_subset_indices = None
+            layer_head_mask = head_mask[i] if head_mask is not None else None
+            if self.gradient_checkpointing and self.training:
+                layer_outputs = self._gradient_checkpointing_func(
+                    layer_module.__call__,
+                    hidden_states,
+                    attention_bias,
+                    rope_embeds,
+                    padding_inputs,
+                    attention_scale,
+                    layer_subset_indices,
+                    layer_head_mask,
+                )
+            else:
+                layer_outputs = layer_module(
+                    hidden_states,
+                    attention_bias,
+                    rope_embeds,
+                    padding_inputs,
+                    attention_scale,
+                    layer_subset_indices,
+                    layer_head_mask,
+                    output_attentions,
+                )
+            hidden_states = layer_outputs[0]
+            if output_attentions:
+                all_self_attentions = all_self_attentions + (layer_outputs[1],)
+        if output_hidden_states:
+            all_hidden_states = all_hidden_states + (hidden_states,)
+        if not return_dict:
+            return tuple(
+                v
+                for v in [
+                    hidden_states,
+                    all_hidden_states,
+                    all_self_attentions,
+                ]
+                if v is not None
+            )
+        return BaseModelOutput(
+            last_hidden_state=hidden_states,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attentions,
+        )
+class VietnamesePooler(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.activation = nn.Tanh()
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        first_token_tensor = hidden_states[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+class VietnamesePreTrainedModel(PreTrainedModel):
+    """
+    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
+    models.
+    """
+    config_class = VietnameseConfig
+    base_model_prefix = "Vietnamese"
+    supports_gradient_checkpointing = True
+    _supports_sdpa = True
+    def _init_weights(self, module):
+        """Initialize the weights"""
+        if isinstance(module, nn.Linear):
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+            if module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.Embedding):
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+            if module.padding_idx is not None:
+                module.weight.data[module.padding_idx].zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+class VietnameseModel(VietnamesePreTrainedModel):
+    """
+    The bare Vietnamese Model transformer outputting raw hidden-states without any specific head on top.
+    """
+    def __init__(self, config: VietnameseConfig, add_pooling_layer=False):
+        super().__init__(config)
+        self.config = config
+        self.embeddings = VietnameseEmbeddings(config)
+        self.encoder = VietnameseEncoder(config)
+        self.pooler = VietnamesePooler(config) if add_pooling_layer else None
+        self.post_init()
+    def get_input_embeddings(self):
+        return self.embeddings.word_embeddings
+    def set_input_embeddings(self, value):
+        self.embeddings.word_embeddings = value
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        length: Optional[List[int]] = None,
+        subset_indices: Optional[torch.LongTensor] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+        head_mask: Optional[torch.Tensor] = None,
+        inputs_embeds: Optional[torch.Tensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        unpad_inputs: Optional[bool] = None,
+    ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPooling]:
+        r"""
+        length  (`list` of length `batch_size`, *optional*):
+            If is `None`, return padded `last_hidden_state`.
+        subset_indices  ():
+            pass
+        unpad_inputs  (`bool`, *optional*):
+            pass
+        """
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        unpad_inputs = unpad_inputs if unpad_inputs is not None else self.config.unpad_inputs
+        output_padded = length is None
+        if input_ids is not None and inputs_embeds is not None:
+            raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
+        elif input_ids is not None:
+            self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
+            input_shape = input_ids.size()
+        elif inputs_embeds is not None:
+            input_shape = inputs_embeds.size()[:-1]
+        else:
+            raise ValueError("You have to specify either input_ids or inputs_embeds")
+        (embedding_output, attention_mask, rope_embeds, length) = self.embeddings(
+            unpad_inputs,
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            length=length,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            inputs_embeds=inputs_embeds
+        )
+        batch_size, seq_length = input_shape
+        if unpad_inputs and self.config.use_memory_efficient_attention:
+            attention_bias = xops.fmha.attn_bias.BlockDiagonalMask.from_seqlens(length)
+        else:
+            attention_bias = self.get_extended_attention_mask(attention_mask, input_shape)
+            if self.config.use_memory_efficient_attention:
+                attention_bias = attention_bias.expand(-1, self.config.num_attention_heads, seq_length, -1)
+        padding_inputs = None
+        if unpad_inputs and (output_padded or not self.config.use_memory_efficient_attention):
+            indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
+            if not self.config.use_memory_efficient_attention:
+                padding_inputs = (indices, *input_shape)
+        attention_scale = None
+        if self.config.logn_attention_scale:
+            logger.warning_once("TODO: logn_attention_scale")
+        encoder_outputs = self.encoder(
+            embedding_output,
+            attention_bias=attention_bias,
+            rope_embeds=rope_embeds,
+            padding_inputs=padding_inputs,
+            attention_scale=attention_scale,
+            subset_indices=subset_indices,
+            head_mask=head_mask,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        sequence_output = encoder_outputs[0]
+        if unpad_inputs and output_padded:
+            sequence_output = pad_input(
+                sequence_output.squeeze(), indices, batch_size, seq_length
+            )
+        pooled_output = self.pooler(sequence_output) if self.pooler is not None else None
+        if not return_dict:
+            return (sequence_output, pooled_output) + encoder_outputs[1:]
+        return BaseModelOutputWithPooling(
+            last_hidden_state=sequence_output,
+            pooler_output=pooled_output,
+            hidden_states=encoder_outputs.hidden_states,
+            attentions=encoder_outputs.attentions,
+        )
+class VietnameseLMPredictionHead(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.transform_act_fn = ACT2FN[config.hidden_act]
+        self.norm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.decoder = nn.Linear(config.hidden_size, config.vocab_size)
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.transform_act_fn(hidden_states)
+        hidden_states = self.norm(hidden_states)
+        hidden_states = self.decoder(hidden_states)
+        return hidden_states
+class VietnameseForMaskedLM(VietnamesePreTrainedModel):
+    _tied_weights_keys = ["lm_head.decoder.bias", "lm_head.decoder.weight"]
+    def __init__(self, config: VietnameseConfig):
+        super().__init__(config)
+        self.Vietnamese = VietnameseModel(config, add_pooling_layer=False)
+        self.lm_head = VietnameseLMPredictionHead(config)
+        self.loss_fct = nn.CrossEntropyLoss()
+        self.post_init()
+    def get_output_embeddings(self):
+        return self.lm_head.decoder
+    def set_output_embeddings(self, new_embeddings):
+        self.lm_head.decoder = new_embeddings
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+        head_mask: Optional[torch.Tensor] = None,
+        inputs_embeds: Optional[torch.Tensor] = None,
+        labels: Optional[torch.Tensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        unpad_inputs: Optional[bool] = None,
+    ) -> Union[Tuple[torch.Tensor], MaskedLMOutput]:
+        r"""
+        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
+            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
+            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        if labels is None or not self.Vietnamese.config.unpad_inputs:
+            length = None
+            subset_indices = None
+        else:
+            length = attention_mask.sum(-1).tolist()
+            labels = labels[attention_mask.bool()].unsqueeze(0)
+            subset_indices = labels > -100
+        outputs = self.Vietnamese(
+            input_ids,
+            attention_mask=attention_mask,
+            length=length,
+            subset_indices=subset_indices,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            unpad_inputs=unpad_inputs,
+        )
+        sequence_output = outputs[0]
+        prediction_scores = self.lm_head(sequence_output)
+        masked_lm_loss = None
+        if labels is not None:
+            if subset_indices is None:
+                mask = attention_mask.bool()
+                prediction_scores = prediction_scores[mask]
+                labels = labels[mask]
+            else:
+                labels = labels[subset_indices]
+            masked_lm_loss = self.loss_fct(prediction_scores, labels)
+        if not return_dict:
+            output = (prediction_scores,) + outputs[2:]
+            return ((masked_lm_loss,) + output) if masked_lm_loss is not None else output
+        return MaskedLMOutput(
+            loss=masked_lm_loss,
+            logits=prediction_scores,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+class VietnameseForSequenceClassification(VietnamesePreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.config = config
+        self.Vietnamese = VietnameseModel(config, add_pooling_layer=True)
+        classifier_dropout = (
+            config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
+        )
+        self.dropout = nn.Dropout(classifier_dropout)
+        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
+        self.post_init()
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+        head_mask: Optional[torch.Tensor] = None,
+        inputs_embeds: Optional[torch.Tensor] = None,
+        labels: Optional[torch.Tensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        unpad_inputs: Optional[bool] = None,
+    ) -> Union[Tuple[torch.Tensor], SequenceClassifierOutput]:
+        r"""
+        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
+            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
+            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
+            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        outputs = self.Vietnamese(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            unpad_inputs=unpad_inputs,
+        )
+        pooled_output = outputs[1]
+        pooled_output = self.dropout(pooled_output)
+        logits = self.classifier(pooled_output)
+        loss = None
+        if labels is not None:
+            if self.config.problem_type is None:
+                if self.num_labels == 1:
+                    self.config.problem_type = "regression"
+                elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
+                    self.config.problem_type = "single_label_classification"
+                else:
+                    self.config.problem_type = "multi_label_classification"
+            if self.config.problem_type == "regression":
+                loss_fct = nn.MSELoss()
+                if self.num_labels == 1:
+                    loss = loss_fct(logits.squeeze(), labels.squeeze())
+                else:
+                    loss = loss_fct(logits, labels)
+            elif self.config.problem_type == "single_label_classification":
+                loss_fct = nn.CrossEntropyLoss()
+                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+            elif self.config.problem_type == "multi_label_classification":
+                loss_fct = nn.BCEWithLogitsLoss()
+                loss = loss_fct(logits, labels)
+        if not return_dict:
+            output = (logits,) + outputs[2:]
+            return ((loss,) + output) if loss is not None else output
+        return SequenceClassifierOutput(
+            loss=loss,
+            logits=logits,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+class VietnameseForMultipleChoice(VietnamesePreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.Vietnamese = VietnameseModel(config, add_pooling_layer=True)
+        classifier_dropout = (
+            config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
+        )
+        self.dropout = nn.Dropout(classifier_dropout)
+        self.classifier = nn.Linear(config.hidden_size, 1)
+        self.post_init()
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+        head_mask: Optional[torch.Tensor] = None,
+        inputs_embeds: Optional[torch.Tensor] = None,
+        labels: Optional[torch.Tensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        unpad_inputs: Optional[bool] = None,
+    ) -> Union[Tuple[torch.Tensor], MultipleChoiceModelOutput]:
+        r"""
+        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
+            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
+            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
+            `input_ids` above)
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
+        input_ids = input_ids.view(-1, input_ids.size(-1)) if input_ids is not None else None
+        attention_mask = attention_mask.view(-1, attention_mask.size(-1)) if attention_mask is not None else None
+        token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1)) if token_type_ids is not None else None
+        position_ids = position_ids.view(-1, position_ids.size(-1)) if position_ids is not None else None
+        inputs_embeds = (
+            inputs_embeds.view(-1, inputs_embeds.size(-2), inputs_embeds.size(-1))
+            if inputs_embeds is not None
+            else None
+        )
+        outputs = self.Vietnamese(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            unpad_inputs=unpad_inputs,
+        )
+        pooled_output = outputs[1]
+        pooled_output = self.dropout(pooled_output)
+        logits = self.classifier(pooled_output)
+        reshaped_logits = logits.view(-1, num_choices)
+        loss = None
+        if labels is not None:
+            loss_fct = nn.CrossEntropyLoss()
+            loss = loss_fct(reshaped_logits, labels)
+        if not return_dict:
+            output = (reshaped_logits,) + outputs[2:]
+            return ((loss,) + output) if loss is not None else output
+        return MultipleChoiceModelOutput(
+            loss=loss,
+            logits=reshaped_logits,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+@dataclass
+class VietnameseTokenClassifierOutput(ModelOutput):
+    loss: Optional[torch.FloatTensor] = None
+    logits: torch.FloatTensor = None
+    last_hidden_state: torch.FloatTensor = None
+    hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
+    attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
+class VietnameseForTokenClassification(VietnamesePreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.Vietnamese = VietnameseModel(config, add_pooling_layer=False)
+        classifier_dropout = (
+            config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
+        )
+        self.dropout = nn.Dropout(classifier_dropout)
+        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
+        self.post_init()
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+        head_mask: Optional[torch.Tensor] = None,
+        inputs_embeds: Optional[torch.Tensor] = None,
+        labels: Optional[torch.Tensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        unpad_inputs: Optional[bool] = None,
+    ) -> Union[Tuple[torch.Tensor], VietnameseTokenClassifierOutput]:
+        r"""
+        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        outputs = self.Vietnamese(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            unpad_inputs=unpad_inputs,
+        )
+        sequence_output = outputs[0]
+        sequence_output = self.dropout(sequence_output)
+        logits = self.classifier(sequence_output)
+        loss = None
+        if labels is not None:
+            loss_fct = nn.CrossEntropyLoss()
+            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+        if not return_dict:
+            output = (logits,) + outputs[2:]
+            return ((loss,) + output) if loss is not None else output
+        return VietnameseTokenClassifierOutput(
+            loss=loss,
+            logits=logits,
+            last_hidden_state=sequence_output,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+class VietnameseForQuestionAnswering(VietnamesePreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.Vietnamese = VietnameseModel(config, add_pooling_layer=False)
+        self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)
+        self.post_init()
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+        head_mask: Optional[torch.Tensor] = None,
+        inputs_embeds: Optional[torch.Tensor] = None,
+        start_positions: Optional[torch.Tensor] = None,
+        end_positions: Optional[torch.Tensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        unpad_inputs: Optional[bool] = None,
+    ) -> Union[Tuple[torch.Tensor], QuestionAnsweringModelOutput]:
+        r"""
+        start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
+            Labels for position (index) of the start of the labelled span for computing the token classification loss.
+            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
+            are not taken into account for computing the loss.
+        end_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
+            Labels for position (index) of the end of the labelled span for computing the token classification loss.
+            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
+            are not taken into account for computing the loss.
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        outputs = self.Vietnamese(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            unpad_inputs=unpad_inputs,
+        )
+        sequence_output = outputs[0]
+        logits = self.qa_outputs(sequence_output)
+        start_logits, end_logits = logits.split(1, dim=-1)
+        start_logits = start_logits.squeeze(-1).contiguous()
+        end_logits = end_logits.squeeze(-1).contiguous()
+        total_loss = None
+        if start_positions is not None and end_positions is not None:
+            if len(start_positions.size()) > 1:
+                start_positions = start_positions.squeeze(-1)
+            if len(end_positions.size()) > 1:
+                end_positions = end_positions.squeeze(-1)
+            ignored_index = start_logits.size(1)
+            start_positions = start_positions.clamp(0, ignored_index)
+            end_positions = end_positions.clamp(0, ignored_index)
+            loss_fct = nn.CrossEntropyLoss(ignore_index=ignored_index)
+            start_loss = loss_fct(start_logits, start_positions)
+            end_loss = loss_fct(end_logits, end_positions)
+            total_loss = (start_loss + end_loss) / 2
+        if not return_dict:
+            output = (start_logits, end_logits) + outputs[2:]
+            return ((total_loss,) + output) if total_loss is not None else output
+        return QuestionAnsweringModelOutput(
+            loss=total_loss,
+            start_logits=start_logits,
+            end_logits=end_logits,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+def create_position_ids_from_input_ids(input_ids, padding_idx, past_key_values_length=0):
+    """
+    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
+    are ignored. This is modified from fairseq's `utils.make_positions`.
+    Args:
+        x: torch.Tensor x:
+    Returns: torch.Tensor
+    """
+    # The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA.
+    mask = input_ids.ne(padding_idx).int()
+    incremental_indices = (torch.cumsum(mask, dim=1).type_as(mask) + past_key_values_length) * mask
+    return incremental_indices.long() + padding_idx

modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "max_seq_length": 8192,
+    "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa7a6ad87a7ce8fe196787355f6af7d03aee94d19c54a5eb1392ed18c8ef451a
+size 17082988

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "250001": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "max_length": 8192,
+  "model_max_length": 8192,
+  "pad_to_multiple_of": null,
+  "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "</s>",
+  "stride": 0,
+  "tokenizer_class": "XLMRobertaTokenizerFast",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<unk>"
+}