diff --git a/.gitattributes b/.gitattributes
index aaee250b861cce8e6e333d802bb16bab6ec8f949..47a2eb080b5e6a11b1c297e2f8da2636aeb5af43 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -46,3 +46,31 @@ checkpoint-118000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
 checkpoint-120000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
 checkpoint-128000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
 checkpoint-130000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_0_A[[:space:]]group[[:space:]]of[[:space:]]zebras[[:space:]]grazing[[:space:]]in[[:space:]]the[[:space:]]grass._1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_10_A[[:space:]]horse[[:space:]]grazing[[:space:]]on[[:space:]]top[[:space:]]of[[:space:]]a[[:space:]]lush[[:space:]]green[[:space:]]forest._1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_13_A[[:space:]]woman[[:space:]]eats[[:space:]]a[[:space:]]slice[[:space:]]of[[:space:]]cheese[[:space:]]pizza._1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_14_A[[:space:]]colorful[[:space:]]umbrella[[:space:]]falling[[:space:]]from[[:space:]]a[[:space:]]tall[[:space:]]building._1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_15_Group[[:space:]]of[[:space:]]people[[:space:]]standing[[:space:]]near[[:space:]]or[[:space:]]riding[[:space:]]a[[:space:]]very[[:space:]]small[[:space:]]train[[:space:]]engine.[[:space:]]_1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_16_A[[:space:]]green[[:space:]]train[[:space:]]parked,[[:space:]]sitting[[:space:]]in[[:space:]]the[[:space:]]snow_1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_17_A[[:space:]]red[[:space:]]fire[[:space:]]hydrant[[:space:]]is[[:space:]]set[[:space:]]up[[:space:]]in[[:space:]]a[[:space:]]grassy[[:space:]]clearing._1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_18_Two[[:space:]]people[[:space:]]are[[:space:]]crossing[[:space:]]the[[:space:]]street[[:space:]]holding[[:space:]]umbrellas._1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_1_a[[:space:]]number[[:space:]]of[[:space:]]people[[:space:]]standing[[:space:]]around[[:space:]]a[[:space:]]large[[:space:]]group[[:space:]]of[[:space:]]luggage[[:space:]]bags_1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_20_MAN[[:space:]]WORKING[[:space:]]ON[[:space:]]A[[:space:]]MOTORCYCLE[[:space:]]IN[[:space:]]A[[:space:]]GARAGE_1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_22_A[[:space:]]Japan[[:space:]]Airlines[[:space:]]plane[[:space:]]sits[[:space:]]parked[[:space:]]at[[:space:]]an[[:space:]]airport._1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_23_A[[:space:]]young[[:space:]]woman[[:space:]]poses[[:space:]]comically[[:space:]]with[[:space:]]a[[:space:]]piece[[:space:]]of[[:space:]]pizza[[:space:]]in[[:space:]]her[[:space:]]mouth._1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_24_A[[:space:]]black[[:space:]]bear[[:space:]]in[[:space:]]a[[:space:]]tree[[:space:]]at[[:space:]]a[[:space:]]zoo[[:space:]]setting._1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_25_Two[[:space:]]guys[[:space:]]are[[:space:]]on[[:space:]]a[[:space:]]boat,[[:space:]]with[[:space:]]one's[[:space:]]crack[[:space:]]being[[:space:]]exposed.[[:space:]]_1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_27_Two[[:space:]]different[[:space:]]pictures[[:space:]]of[[:space:]]a[[:space:]]giraffe[[:space:]]at[[:space:]]the[[:space:]]zoo._1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_28_A[[:space:]]bus[[:space:]]that[[:space:]]sign[[:space:]]reads[[:space:]]"Crosstown".[[:space:]]It[[:space:]]is[[:space:]]a[[:space:]]metro[[:space:]]bus._1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_2_A[[:space:]]yellow[[:space:]]commuter[[:space:]]train[[:space:]]traveling[[:space:]]past[[:space:]]some[[:space:]]houses._1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_30_A[[:space:]]banana[[:space:]]tree[[:space:]]filled[[:space:]]with[[:space:]]lots[[:space:]]of[[:space:]]unripe[[:space:]]bananas._1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_31_A[[:space:]]red[[:space:]]fire[[:space:]]hydrant[[:space:]]with[[:space:]]weeds[[:space:]]around[[:space:]]it[[:space:]]next[[:space:]]to[[:space:]]a[[:space:]]cement[[:space:]]ball._1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_32_A[[:space:]]single[[:space:]]giraffe[[:space:]]standing[[:space:]]in[[:space:]]the[[:space:]]middle[[:space:]]of[[:space:]]tall[[:space:]]grass_1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_33_the[[:space:]]men[[:space:]]play[[:space:]]soccer[[:space:]]on[[:space:]]the[[:space:]]beach[[:space:]]with[[:space:]]no[[:space:]]shoes_1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_3_An[[:space:]]old[[:space:]]fashioned[[:space:]]oxitue[[:space:]]worth[[:space:]]old[[:space:]]cars[[:space:]]on[[:space:]]street[[:space:]]of[[:space:]]town_1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_4_Two[[:space:]]men[[:space:]]are[[:space:]]in[[:space:]]a[[:space:]]building[[:space:]]with[[:space:]]brick[[:space:]]walls._1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_5_A[[:space:]]woman[[:space:]]baking[[:space:]]a[[:space:]]pizza[[:space:]]pie[[:space:]]in[[:space:]]the[[:space:]]kitchen_1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_8_LOOKS[[:space:]]LIKE[[:space:]]A[[:space:]]PERSON[[:space:]]DROPPING[[:space:]]THERE[[:space:]]TENNIS[[:space:]]RACK_1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/coco_steps-40_cfg-5/coco_9_Three[[:space:]]people[[:space:]]are[[:space:]]walking[[:space:]]down[[:space:]]the[[:space:]]road[[:space:]]with[[:space:]]three[[:space:]]horses._1.png filter=lfs diff=lfs merge=lfs -text
+checkpoint-138000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-140000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_0_A group of zebras grazing in the grass._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_0_A group of zebras grazing in the grass._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..e9979a148a724277ee844abdffc4fd2ccdc0dde9
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_0_A group of zebras grazing in the grass._1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7af3dd5dc6dc77bfa80821895e8e6842d8832a3d60d274bb40d323d7bd792072
+size 144646
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_10_A horse grazing on top of a lush green forest._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_10_A horse grazing on top of a lush green forest._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..fa739430745a6acbe18c8f2cc4a7f8ec0c27df79
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_10_A horse grazing on top of a lush green forest._1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2f78cbbaa7683e0ca25fe9be054c765d6ff97a82fad02c25142d823f1afceed
+size 168370
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_11_Looking out a home window with power lines nearby with trees in the foreground._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_11_Looking out a home window with power lines nearby with trees in the foreground._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..f9210b0a263b4eb1fbffa92b6e761eac41e52507
Binary files /dev/null and b/checkpoint-138000/coco_steps-40_cfg-5/coco_11_Looking out a home window with power lines nearby with trees in the foreground._1.png differ
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_12_Looking up at the tail fin and back wings of an airplane in the sun_1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_12_Looking up at the tail fin and back wings of an airplane in the sun_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..b54e008fdedce1f09d8be84b2657322ec3bdf75e
Binary files /dev/null and b/checkpoint-138000/coco_steps-40_cfg-5/coco_12_Looking up at the tail fin and back wings of an airplane in the sun_1.png differ
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_13_A woman eats a slice of cheese pizza._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_13_A woman eats a slice of cheese pizza._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..611bb84312330bb1962e54865cf549fb3e752130
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_13_A woman eats a slice of cheese pizza._1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50a21e98cc1371dbb38d739e248abdd16b753da142d926b5a24a6b339d37ae50
+size 111310
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_14_A colorful umbrella falling from a tall building._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_14_A colorful umbrella falling from a tall building._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..e67f455dd1e32e2a9b1df9429270338809ec935f
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_14_A colorful umbrella falling from a tall building._1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:839f853f8b15c31460e59ad71ea4f136870b8a2a2b63c9ec138b862d3d66da83
+size 121057
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_15_Group of people standing near or riding a very small train engine. _1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_15_Group of people standing near or riding a very small train engine. _1.png
new file mode 100644
index 0000000000000000000000000000000000000000..6d41d01ad75a15a594af78b84280588c43c5ab9d
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_15_Group of people standing near or riding a very small train engine. _1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39d550c14ab52f3666155fa5c6a282526a2cbc871f97d2ae84c0ba5aefe2091f
+size 128732
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_16_A green train parked, sitting in the snow_1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_16_A green train parked, sitting in the snow_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..138ab8b7b79daad22401b364baa4475105401361
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_16_A green train parked, sitting in the snow_1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85f75e2690d40e839f8ae71c0b5232c83672b18c341f94a71a9bbe87d19525e7
+size 116986
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_17_A red fire hydrant is set up in a grassy clearing._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_17_A red fire hydrant is set up in a grassy clearing._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..2d6eb826759af96741368b922811d1309219a94a
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_17_A red fire hydrant is set up in a grassy clearing._1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c27df5c1c35c069bd5fe208b3476a2f0f51bb28dfe1086629dc04987c1a57d8
+size 126441
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_18_Two people are crossing the street holding umbrellas._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_18_Two people are crossing the street holding umbrellas._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..d70d23ed6431e2f3965fa77e0498ee6739b2fd15
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_18_Two people are crossing the street holding umbrellas._1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1254b5da069f81e2628325290bd475b00e9033502e3f0a75097e651d46694b04
+size 109652
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_19_Someone smiling while skiing in their skis at a ski slope. _1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_19_Someone smiling while skiing in their skis at a ski slope. _1.png
new file mode 100644
index 0000000000000000000000000000000000000000..d88893513da29924dd4974b4f1f3d64e006d59a5
Binary files /dev/null and b/checkpoint-138000/coco_steps-40_cfg-5/coco_19_Someone smiling while skiing in their skis at a ski slope. _1.png differ
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_1_a number of people standing around a large group of luggage bags_1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_1_a number of people standing around a large group of luggage bags_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..d7eea4027f5237e8c9375477d5126d68e22c4357
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_1_a number of people standing around a large group of luggage bags_1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:549ac7c59d0547b25b1ba423a1293496a398336827dc2156e9b10d192f0aeccf
+size 135294
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_20_MAN WORKING ON A MOTORCYCLE IN A GARAGE_1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_20_MAN WORKING ON A MOTORCYCLE IN A GARAGE_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..78ea1a5e286df435bced26388b3f21c65655d55b
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_20_MAN WORKING ON A MOTORCYCLE IN A GARAGE_1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08f2a6b399df05ad2d000406abf73d328bf82f39309ba120853c4bee9d32f143
+size 109483
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_21_A vintage red truck parked in a parking lot under a blue sky._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_21_A vintage red truck parked in a parking lot under a blue sky._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..44bef2edaaa66217d43827bfa1f3f3fc00a6eb3c
Binary files /dev/null and b/checkpoint-138000/coco_steps-40_cfg-5/coco_21_A vintage red truck parked in a parking lot under a blue sky._1.png differ
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_22_A Japan Airlines plane sits parked at an airport._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_22_A Japan Airlines plane sits parked at an airport._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..03a444c7d7df497b573c50c1b971b338d1788c2a
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_22_A Japan Airlines plane sits parked at an airport._1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99abe27ce38b848f645727fb14fc4c43a70ab337ddddfb3c178bea3f7f7c3923
+size 104292
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_23_A young woman poses comically with a piece of pizza in her mouth._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_23_A young woman poses comically with a piece of pizza in her mouth._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..ab3be3937ce71d0a487a0c13172c4cf9ab4ff104
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_23_A young woman poses comically with a piece of pizza in her mouth._1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77533043ce244af99d2f7533ce4f7562796049e9487c9646f65f9b87636403f2
+size 114014
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_24_A black bear in a tree at a zoo setting._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_24_A black bear in a tree at a zoo setting._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..6d63f75600901b7cedc9a589e5e2501af90427cd
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_24_A black bear in a tree at a zoo setting._1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7cb4c96d54d131385e35b27fe62954f1c0219361d567b849e879635ab2ab9cc
+size 146539
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_25_Two guys are on a boat, with one's crack being exposed. _1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_25_Two guys are on a boat, with one's crack being exposed. _1.png
new file mode 100644
index 0000000000000000000000000000000000000000..ca4f8ce31ad582c84036adf027c86452dc90119c
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_25_Two guys are on a boat, with one's crack being exposed. _1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff52725f7cafd8a42b8ea919bd36293bfc3faa3e7cda6614cd5dbec890d7d041
+size 119832
diff --git "a/checkpoint-138000/coco_steps-40_cfg-5/coco_26_This photograph appears to be looking truly wonderful. \n_1.png" "b/checkpoint-138000/coco_steps-40_cfg-5/coco_26_This photograph appears to be looking truly wonderful. \n_1.png"
new file mode 100644
index 0000000000000000000000000000000000000000..ff6d6512d910a0c22a6c3aafad1b527e458bae9d
Binary files /dev/null and "b/checkpoint-138000/coco_steps-40_cfg-5/coco_26_This photograph appears to be looking truly wonderful. \n_1.png" differ
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_27_Two different pictures of a giraffe at the zoo._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_27_Two different pictures of a giraffe at the zoo._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..6b5bf86b737c3a2366ad3066ba06c27b69628ecf
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_27_Two different pictures of a giraffe at the zoo._1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecc618b5c6c3fe7e36632327dcb8ea5fea8c62ad13c61a5bf4502e24bbd7f52e
+size 113429
diff --git "a/checkpoint-138000/coco_steps-40_cfg-5/coco_28_A bus that sign reads \"Crosstown\". It is a metro bus._1.png" "b/checkpoint-138000/coco_steps-40_cfg-5/coco_28_A bus that sign reads \"Crosstown\". It is a metro bus._1.png"
new file mode 100644
index 0000000000000000000000000000000000000000..3568ff5d735899835317df5a18a8948c697773ab
--- /dev/null
+++ "b/checkpoint-138000/coco_steps-40_cfg-5/coco_28_A bus that sign reads \"Crosstown\". It is a metro bus._1.png"	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be6fb073c825854815e53ea3bf1e9e401f008eaf783d27a5d94340f80cc63e65
+size 101939
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_29_A woman stands next to the ocean on a beach._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_29_A woman stands next to the ocean on a beach._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..ae1f2a4549a6b22d8cf5658f1bddd439ed22c342
Binary files /dev/null and b/checkpoint-138000/coco_steps-40_cfg-5/coco_29_A woman stands next to the ocean on a beach._1.png differ
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_2_A yellow commuter train traveling past some houses._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_2_A yellow commuter train traveling past some houses._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..52a9c3b4d63c302fca1a5cc993e2cbb927cc4a84
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_2_A yellow commuter train traveling past some houses._1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:598dd44aeecde5147feaefbb5db8dd0460c9994e193e25d1fd323c44c01b4e4b
+size 110644
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_30_A banana tree filled with lots of unripe bananas._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_30_A banana tree filled with lots of unripe bananas._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..9ec2d0da1dd82fc1c28d417cc51b1237d7a9358d
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_30_A banana tree filled with lots of unripe bananas._1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ebd7d687e093290e9f374ceff9f5256a6f5a7a07de4c41a0552e7e15090a060
+size 129585
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_31_A red fire hydrant with weeds around it next to a cement ball._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_31_A red fire hydrant with weeds around it next to a cement ball._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..61eb44f9e5f926640d9cbebc02270cc71088d08b
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_31_A red fire hydrant with weeds around it next to a cement ball._1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77b3b63afe01818246736f7335aa4c40977950de68b176a172c2e2065ccf4413
+size 153710
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_32_A single giraffe standing in the middle of tall grass_1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_32_A single giraffe standing in the middle of tall grass_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..aa848d9d633f3314437665c011ced351d61c66d4
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_32_A single giraffe standing in the middle of tall grass_1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa9f474590b83d5dbdddec42572802922b9c97508afeb9e28e454223c8ba9bb0
+size 105195
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_33_the men play soccer on the beach with no shoes_1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_33_the men play soccer on the beach with no shoes_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..cb9ac45bfe2af652e6111b652db143d46985d750
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_33_the men play soccer on the beach with no shoes_1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9998f77a2fefbb243245f2ef851158e51c0e7a57bde7803ffc91900c988b546
+size 125666
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_34_Computer animated people in a computer animated park._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_34_Computer animated people in a computer animated park._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..6b4be0e20091c83786d25321799218fe88e330e3
Binary files /dev/null and b/checkpoint-138000/coco_steps-40_cfg-5/coco_34_Computer animated people in a computer animated park._1.png differ
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_35_The tennis player looks serious as she is about to start her serve. _1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_35_The tennis player looks serious as she is about to start her serve. _1.png
new file mode 100644
index 0000000000000000000000000000000000000000..cea3ffbd67d90d3b896f9afaac779b1ac1cc258d
Binary files /dev/null and b/checkpoint-138000/coco_steps-40_cfg-5/coco_35_The tennis player looks serious as she is about to start her serve. _1.png differ
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_3_An old fashioned oxitue worth old cars on street of town_1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_3_An old fashioned oxitue worth old cars on street of town_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..3b9f55c412b5696c5eb3ed96a6a5e65a8dc3518c
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_3_An old fashioned oxitue worth old cars on street of town_1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ebaa958c4a8d19590184523c0f99ba6ded005c49fd956675f9c9273af57f8efe
+size 138130
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_4_Two men are in a building with brick walls._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_4_Two men are in a building with brick walls._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..51c4eacbe69b813cfdb27a266708411109b3dfc5
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_4_Two men are in a building with brick walls._1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62ad0cfbabb206c5b14d3282c2b9e3c97858fba25bb630dce6eba2f36470fa7a
+size 111600
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_5_A woman baking a pizza pie in the kitchen_1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_5_A woman baking a pizza pie in the kitchen_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..fdbe6daa5d0c007c58b0ae60910ad9bbf3877bf9
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_5_A woman baking a pizza pie in the kitchen_1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79b7e86c2fe3bf4cd737116ae828e917441f55008f8f22fdc3340b5016c2ca81
+size 100941
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_6_A couple of men standing on a field playing baseball._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_6_A couple of men standing on a field playing baseball._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..c86204c92a5e49cae2354abd4238de7c743a8fb4
Binary files /dev/null and b/checkpoint-138000/coco_steps-40_cfg-5/coco_6_A couple of men standing on a field playing baseball._1.png differ
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_7_A red toothbrush next to a bottle of Crest toothpaste._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_7_A red toothbrush next to a bottle of Crest toothpaste._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..60b6151435ad4fdc99f060399036b5add3e2b893
Binary files /dev/null and b/checkpoint-138000/coco_steps-40_cfg-5/coco_7_A red toothbrush next to a bottle of Crest toothpaste._1.png differ
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_8_LOOKS LIKE A PERSON DROPPING THERE TENNIS RACK_1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_8_LOOKS LIKE A PERSON DROPPING THERE TENNIS RACK_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..4dffb9ee60908ec694eb1d9aa09929a336fb1f0a
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_8_LOOKS LIKE A PERSON DROPPING THERE TENNIS RACK_1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1428b338310150c680bf49c0e0bbd59e9278469da6cbf8b139f02e5c3e4aa7d0
+size 117356
diff --git a/checkpoint-138000/coco_steps-40_cfg-5/coco_9_Three people are walking down the road with three horses._1.png b/checkpoint-138000/coco_steps-40_cfg-5/coco_9_Three people are walking down the road with three horses._1.png
new file mode 100644
index 0000000000000000000000000000000000000000..1a4e20cd087e54170ab59bfabb219b54f6cafa2f
--- /dev/null
+++ b/checkpoint-138000/coco_steps-40_cfg-5/coco_9_Three people are walking down the road with three horses._1.png	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f13a4218157682682714733f57490bd0b9a3388a6a9bee0864731dab895f57d
+size 124934
diff --git a/checkpoint-138000/config.json b/checkpoint-138000/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ee8e83c9871e07cf65c548bb2dd181156f2d8445
--- /dev/null
+++ b/checkpoint-138000/config.json
@@ -0,0 +1,79 @@
+{
+  "ar_steps": 1,
+  "architectures": [
+    "DiffVLMDiffusion"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "condition_layer": -1,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 1536,
+  "image_token_id": 151655,
+  "img_cross_attention_dim": 2048,
+  "img_diffuser_depth": 1,
+  "img_ffn_dim_multiplier": null,
+  "img_hidden_size": 1536,
+  "img_multiple_of": 256,
+  "img_norm_eps": 1e-05,
+  "img_num_attention_heads": 12,
+  "img_num_kv_heads": 12,
+  "img_qk_norm": true,
+  "in_channels": 32,
+  "initializer_range": 0.02,
+  "inject_img_diffuser": false,
+  "input_size": 32,
+  "intermediate_size": 8960,
+  "layer_group_size": 7,
+  "layerwise_start_idx": 0,
+  "lora_alpha": 16,
+  "lora_bias": "none",
+  "lora_dropout": 0.05,
+  "lora_enable": false,
+  "lora_r": 64,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen2_vl",
+  "non_linearity": 1,
+  "norm_elementwise_affine": true,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 2,
+  "patch_size": 1,
+  "repa_coeff": 0.5,
+  "repa_layers": null,
+  "repa_shared": false,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": {
+    "mrope_section": [
+      16,
+      24,
+      24
+    ],
+    "rope_type": "default",
+    "type": "default"
+  },
+  "rope_theta": 1000000.0,
+  "sample_size": 128,
+  "sampling_steps": 28,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.47.0",
+  "use_cache": true,
+  "use_repa": false,
+  "use_residual_attn": true,
+  "use_sliding_window": false,
+  "vae_path": "mit-han-lab/dc-ae-f32c32-sana-1.1-diffusers",
+  "video_token_id": 151656,
+  "vision_config": {
+    "hidden_size": 1536,
+    "in_chans": 3,
+    "model_type": "qwen2_vl",
+    "spatial_patch_size": 14
+  },
+  "vision_end_token_id": 151653,
+  "vision_start_token_id": 151652,
+  "vision_token_id": 151654,
+  "vocab_size": 151936
+}
diff --git a/checkpoint-138000/generation_config.json b/checkpoint-138000/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0e3d465da90cb0ab59d8ba7babdefb0e88fbfa6b
--- /dev/null
+++ b/checkpoint-138000/generation_config.json
@@ -0,0 +1,6 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "transformers_version": "4.47.0"
+}
diff --git a/checkpoint-138000/model-00001-of-00002.safetensors b/checkpoint-138000/model-00001-of-00002.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6823e1f5f419b0b8e7698c6891548adbeff10742
--- /dev/null
+++ b/checkpoint-138000/model-00001-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9304880be7ea17765bed974cb5e2f15d0c3ef87359de305698e65a71fe30a32
+size 4974360904
diff --git a/checkpoint-138000/model-00002-of-00002.safetensors b/checkpoint-138000/model-00002-of-00002.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4affc82a6b472dc4d892e69f3bc9685a58f3066d
--- /dev/null
+++ b/checkpoint-138000/model-00002-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee0fc797f690144fdef0ec99c6d621ed7a7e1627b3d9ef0340bb868c279ab6bd
+size 3234250886
diff --git a/checkpoint-138000/model.safetensors.index.json b/checkpoint-138000/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..f6f170527e634787d82df1968545126caa9903d5
--- /dev/null
+++ b/checkpoint-138000/model.safetensors.index.json
@@ -0,0 +1,1599 @@
+{
+  "metadata": {
+    "total_size": 8208433094
+  },
+  "weight_map": {
+    "embed_tokens.weight": "model-00001-of-00002.safetensors",
+    "img2text.bias": "model-00001-of-00002.safetensors",
+    "img2text.weight": "model-00001-of-00002.safetensors",
+    "img_norm_out.linear_1.bias": "model-00001-of-00002.safetensors",
+    "img_norm_out.linear_1.weight": "model-00001-of-00002.safetensors",
+    "img_norm_out.linear_2.bias": "model-00001-of-00002.safetensors",
+    "img_norm_out.linear_2.weight": "model-00001-of-00002.safetensors",
+    "layers.0.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.0.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.0.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.0.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.1.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.1.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.1.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.10.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.10.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.10.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.11.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.11.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.11.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.12.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.12.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.12.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.13.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.13.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.13.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.14.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.14.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.14.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.15.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.15.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.15.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.16.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.16.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.16.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.17.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.17.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.17.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.18.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.18.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.18.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.19.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.19.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.19.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.2.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.2.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.2.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.20.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.20.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.20.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.21.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.21.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.21.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.21.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.21.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.22.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.22.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.22.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.22.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.22.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.22.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.23.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.23.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.23.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.23.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.23.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.23.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.24.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.24.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.24.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.24.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.24.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.24.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.24.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.24.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.24.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.24.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.25.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.25.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.25.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.25.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.25.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.25.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.25.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.26.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.26.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.26.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.26.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.26.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.26.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.26.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.26.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.27.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.27.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.27.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.27.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.27.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.27.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.27.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.27.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.27.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.27.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.27.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.27.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.3.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.3.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.3.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.4.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.4.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.4.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.5.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.5.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.5.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.6.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.6.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.6.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.7.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.7.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.7.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.8.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.8.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.8.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.9.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.9.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.9.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "patch_embedder.proj.bias": "model-00001-of-00002.safetensors",
+    "patch_embedder.proj.weight": "model-00001-of-00002.safetensors",
+    "t_embedder.timestep_embedder.linear_1.bias": "model-00001-of-00002.safetensors",
+    "t_embedder.timestep_embedder.linear_1.weight": "model-00001-of-00002.safetensors",
+    "t_embedder.timestep_embedder.linear_2.bias": "model-00001-of-00002.safetensors",
+    "t_embedder.timestep_embedder.linear_2.weight": "model-00001-of-00002.safetensors",
+    "txt_layers.0.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "txt_layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "txt_layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "txt_layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "txt_layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "txt_layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "txt_layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "txt_layers.1.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.1.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.1.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.1.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.10.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.10.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.10.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.11.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.11.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.11.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.12.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.12.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.12.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.13.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.13.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.13.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.14.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.14.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.14.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.15.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.15.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.15.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.16.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.16.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.16.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.17.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.17.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.17.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.4.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.4.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.4.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.5.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.5.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.5.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.6.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.6.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.6.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.7.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.7.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.7.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.8.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.8.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.8.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.9.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.9.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.9.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.conv_in.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.conv_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.conv_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.conv_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.0.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.0.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.1.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.1.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.1.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.1.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.1.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.2.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.2.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.2.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.2.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.2.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.3.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.3.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.3.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.3.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.3.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.0.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.0.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.1.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.1.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.1.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.1.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.1.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.2.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.2.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.2.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.2.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.2.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.3.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.3.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.3.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.3.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.3.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.0.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.0.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.1.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.1.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.1.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.1.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.1.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.2.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.2.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.2.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.2.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.2.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.3.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.3.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.3.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.3.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.3.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.0.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.0.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.0.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.0.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.conv_in.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.conv_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.conv_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.conv_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.0.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.0.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.0.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.0.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.0.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.1.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.1.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.1.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.1.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.1.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.2.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.2.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.0.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.0.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.0.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.0.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.0.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.1.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.1.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.1.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.1.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.1.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.2.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.2.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.0.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.0.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.0.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.0.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.0.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.1.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.1.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.1.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.1.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.1.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.2.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.2.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.3.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.3.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.3.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.3.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "visual.blocks.0.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.merger.ln_q.bias": "model-00001-of-00002.safetensors",
+    "visual.merger.ln_q.weight": "model-00001-of-00002.safetensors",
+    "visual.merger.mlp.0.bias": "model-00001-of-00002.safetensors",
+    "visual.merger.mlp.0.weight": "model-00001-of-00002.safetensors",
+    "visual.merger.mlp.2.bias": "model-00001-of-00002.safetensors",
+    "visual.merger.mlp.2.weight": "model-00001-of-00002.safetensors",
+    "visual.patch_embed.proj.weight": "model-00001-of-00002.safetensors"
+  }
+}
diff --git a/checkpoint-138000/optimizer.pt b/checkpoint-138000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..734da9e9dc64964e6d0d3091ed5610723f2d2c39
--- /dev/null
+++ b/checkpoint-138000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4d30073b751b4e781369baede3ccf8dec66dda5f2455009c2e6aa2e9b82d0f0
+size 7455061885
diff --git a/checkpoint-138000/rng_state_0.pth b/checkpoint-138000/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0bb693d4a46723b5eaefa4c34e8b290f69a3c27d
--- /dev/null
+++ b/checkpoint-138000/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af92edd6096bfd3772f9dc195ef69b291a4e29b5e326fc502708ea859db8109e
+size 15024
diff --git a/checkpoint-138000/rng_state_1.pth b/checkpoint-138000/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0a6dfbbea59338300c5fffd154d89cfb184a36da
--- /dev/null
+++ b/checkpoint-138000/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4884d49c322abc8149ded2641dd29f9308bb89b9c5b813626346a4d9daaf6b9
+size 15024
diff --git a/checkpoint-138000/rng_state_2.pth b/checkpoint-138000/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cf9ddf5fa643f21bab619c2d46131f5e82f81023
--- /dev/null
+++ b/checkpoint-138000/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dedf957f0943c3b35e3075e7d20d6aeb54681a4c94af88412a6667ab39d16e88
+size 15024
diff --git a/checkpoint-138000/rng_state_3.pth b/checkpoint-138000/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c16ba6be9e355e86b25675ac64520aefa2c7a25e
--- /dev/null
+++ b/checkpoint-138000/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1906d2ca02a19227b7c566f6ac17aa33ead13c56d42d8170a2258b36736f3f7
+size 15024
diff --git a/checkpoint-138000/scheduler.pt b/checkpoint-138000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..57b139c70f02a8cecf5f6972c71688b1a23e1242
--- /dev/null
+++ b/checkpoint-138000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37fb23e1d842e9514301374aeb0eb2537984b09f62c819ccb746d851435feabc
+size 1064
diff --git a/checkpoint-138000/trainer_state.json b/checkpoint-138000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..a1eb8266804796c384c307d48df8aea3fb43b629
--- /dev/null
+++ b/checkpoint-138000/trainer_state.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d49206a8129037d93322db80fb99d365d9e6fdfa6952e67e5651a27d29d050e
+size 21242544
diff --git a/checkpoint-138000/training_args.bin b/checkpoint-138000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d40e83cbccfaf20b540bbdd2e0667f36abb10dc6
--- /dev/null
+++ b/checkpoint-138000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1c49505c345575e51b0123d9223ddd9263d76b83bc110217e53e85a3e8e90b4
+size 6008
diff --git a/checkpoint-140000/config.json b/checkpoint-140000/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ee8e83c9871e07cf65c548bb2dd181156f2d8445
--- /dev/null
+++ b/checkpoint-140000/config.json
@@ -0,0 +1,79 @@
+{
+  "ar_steps": 1,
+  "architectures": [
+    "DiffVLMDiffusion"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "condition_layer": -1,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 1536,
+  "image_token_id": 151655,
+  "img_cross_attention_dim": 2048,
+  "img_diffuser_depth": 1,
+  "img_ffn_dim_multiplier": null,
+  "img_hidden_size": 1536,
+  "img_multiple_of": 256,
+  "img_norm_eps": 1e-05,
+  "img_num_attention_heads": 12,
+  "img_num_kv_heads": 12,
+  "img_qk_norm": true,
+  "in_channels": 32,
+  "initializer_range": 0.02,
+  "inject_img_diffuser": false,
+  "input_size": 32,
+  "intermediate_size": 8960,
+  "layer_group_size": 7,
+  "layerwise_start_idx": 0,
+  "lora_alpha": 16,
+  "lora_bias": "none",
+  "lora_dropout": 0.05,
+  "lora_enable": false,
+  "lora_r": 64,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen2_vl",
+  "non_linearity": 1,
+  "norm_elementwise_affine": true,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 2,
+  "patch_size": 1,
+  "repa_coeff": 0.5,
+  "repa_layers": null,
+  "repa_shared": false,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": {
+    "mrope_section": [
+      16,
+      24,
+      24
+    ],
+    "rope_type": "default",
+    "type": "default"
+  },
+  "rope_theta": 1000000.0,
+  "sample_size": 128,
+  "sampling_steps": 28,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.47.0",
+  "use_cache": true,
+  "use_repa": false,
+  "use_residual_attn": true,
+  "use_sliding_window": false,
+  "vae_path": "mit-han-lab/dc-ae-f32c32-sana-1.1-diffusers",
+  "video_token_id": 151656,
+  "vision_config": {
+    "hidden_size": 1536,
+    "in_chans": 3,
+    "model_type": "qwen2_vl",
+    "spatial_patch_size": 14
+  },
+  "vision_end_token_id": 151653,
+  "vision_start_token_id": 151652,
+  "vision_token_id": 151654,
+  "vocab_size": 151936
+}
diff --git a/checkpoint-140000/generation_config.json b/checkpoint-140000/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0e3d465da90cb0ab59d8ba7babdefb0e88fbfa6b
--- /dev/null
+++ b/checkpoint-140000/generation_config.json
@@ -0,0 +1,6 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "transformers_version": "4.47.0"
+}
diff --git a/checkpoint-140000/model-00001-of-00002.safetensors b/checkpoint-140000/model-00001-of-00002.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e08975f992f8372e0a4731d4e8c62fd242d71355
--- /dev/null
+++ b/checkpoint-140000/model-00001-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:380186fbffff23b1b27242b9d1889dd6bc4e65a679afe8dc1470e4bdd67eb382
+size 4974360904
diff --git a/checkpoint-140000/model-00002-of-00002.safetensors b/checkpoint-140000/model-00002-of-00002.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6b25b18b2a77551c996feeb53d1cc0e353794043
--- /dev/null
+++ b/checkpoint-140000/model-00002-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9754b27a7ddd7b94ff8e561703a86b3fe2b14df50990db3ce22f7ab3d030e096
+size 3234250886
diff --git a/checkpoint-140000/model.safetensors.index.json b/checkpoint-140000/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..f6f170527e634787d82df1968545126caa9903d5
--- /dev/null
+++ b/checkpoint-140000/model.safetensors.index.json
@@ -0,0 +1,1599 @@
+{
+  "metadata": {
+    "total_size": 8208433094
+  },
+  "weight_map": {
+    "embed_tokens.weight": "model-00001-of-00002.safetensors",
+    "img2text.bias": "model-00001-of-00002.safetensors",
+    "img2text.weight": "model-00001-of-00002.safetensors",
+    "img_norm_out.linear_1.bias": "model-00001-of-00002.safetensors",
+    "img_norm_out.linear_1.weight": "model-00001-of-00002.safetensors",
+    "img_norm_out.linear_2.bias": "model-00001-of-00002.safetensors",
+    "img_norm_out.linear_2.weight": "model-00001-of-00002.safetensors",
+    "layers.0.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.0.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.0.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.0.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.1.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.1.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.1.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.10.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.10.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.10.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.11.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.11.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.11.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.12.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.12.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.12.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.13.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.13.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.13.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.14.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.14.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.14.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.15.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.15.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.15.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.16.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.16.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.16.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.17.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.17.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.17.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.18.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.18.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.18.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.19.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.19.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.19.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.2.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.2.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.2.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.20.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.20.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.20.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.21.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.21.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.21.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.21.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.21.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.22.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.22.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.22.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.22.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.22.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.22.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.23.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.23.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.23.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.23.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.23.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.23.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.24.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.24.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.24.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.24.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.24.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.24.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.24.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.24.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.24.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.24.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.25.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.25.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.25.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.25.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.25.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.25.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.25.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.26.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.26.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.26.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.26.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.26.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.26.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.26.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.26.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.27.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.27.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.27.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.27.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.27.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.27.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.27.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.27.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.27.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.27.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.27.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.27.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.3.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.3.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.3.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.4.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.4.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.4.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.5.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.5.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.5.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.6.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.6.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.6.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.7.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.7.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.7.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.8.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.8.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.8.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.img_post_ffn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.9.img_post_mixed_attn_norm.weight": "model-00001-of-00002.safetensors",
+    "layers.9.img_scale_shift.linear.bias": "model-00001-of-00002.safetensors",
+    "layers.9.img_scale_shift.linear.weight": "model-00001-of-00002.safetensors",
+    "layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.res_attn_gate.weight": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "patch_embedder.proj.bias": "model-00001-of-00002.safetensors",
+    "patch_embedder.proj.weight": "model-00001-of-00002.safetensors",
+    "t_embedder.timestep_embedder.linear_1.bias": "model-00001-of-00002.safetensors",
+    "t_embedder.timestep_embedder.linear_1.weight": "model-00001-of-00002.safetensors",
+    "t_embedder.timestep_embedder.linear_2.bias": "model-00001-of-00002.safetensors",
+    "t_embedder.timestep_embedder.linear_2.weight": "model-00001-of-00002.safetensors",
+    "txt_layers.0.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "txt_layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "txt_layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "txt_layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "txt_layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "txt_layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "txt_layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "txt_layers.1.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.1.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.1.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.1.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.10.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.10.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.10.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.10.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.11.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.11.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.11.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.11.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.12.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.12.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.12.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.12.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.13.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.13.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.13.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.13.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.14.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.14.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.14.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.14.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.15.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.15.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.15.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.15.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.16.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.16.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.16.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.16.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.17.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.17.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.17.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.17.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.18.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.19.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.2.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.20.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.3.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.4.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.4.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.4.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.4.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.5.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.5.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.5.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.5.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.6.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.6.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.6.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.6.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.7.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.7.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.7.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.7.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.8.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.8.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.8.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.8.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.9.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.9.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_layers.9.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "txt_layers.9.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "txt_norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.conv_in.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.conv_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.conv_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.conv_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.0.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.0.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.1.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.1.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.1.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.1.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.1.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.2.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.2.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.2.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.2.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.2.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.3.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.3.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.3.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.3.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.0.3.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.0.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.0.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.1.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.1.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.1.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.1.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.1.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.2.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.2.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.2.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.2.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.2.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.3.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.3.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.3.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.3.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.1.3.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.0.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.0.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.1.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.1.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.1.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.1.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.1.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.2.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.2.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.2.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.2.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.2.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.3.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.3.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.3.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.3.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.2.3.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.0.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.0.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.1.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.2.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.3.3.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.0.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.0.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.1.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.2.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.4.3.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.0.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.1.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.decoder.up_blocks.5.2.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.conv_in.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.conv_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.conv_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.conv_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.0.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.0.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.0.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.0.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.0.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.1.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.1.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.1.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.1.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.1.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.2.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.0.2.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.0.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.0.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.0.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.0.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.0.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.1.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.1.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.1.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.1.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.1.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.2.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.1.2.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.0.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.0.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.0.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.0.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.0.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.1.conv1.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.1.conv1.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.1.conv2.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.1.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.1.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.2.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.2.2.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.0.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.1.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.2.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.3.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.3.3.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.0.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.1.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.2.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.3.conv.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.4.3.conv.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.0.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.1.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.attn.norm_out.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.attn.norm_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.attn.to_k.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.attn.to_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.attn.to_q.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.attn.to_qkv_multiscale.0.proj_in.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.attn.to_qkv_multiscale.0.proj_out.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.attn.to_v.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.conv_out.conv_depth.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.conv_out.conv_depth.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.conv_out.conv_inverted.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.conv_out.conv_inverted.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.conv_out.conv_point.weight": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.conv_out.norm.bias": "model-00002-of-00002.safetensors",
+    "vae.encoder.down_blocks.5.2.conv_out.norm.weight": "model-00002-of-00002.safetensors",
+    "visual.blocks.0.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.norm1.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.norm2.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.merger.ln_q.bias": "model-00001-of-00002.safetensors",
+    "visual.merger.ln_q.weight": "model-00001-of-00002.safetensors",
+    "visual.merger.mlp.0.bias": "model-00001-of-00002.safetensors",
+    "visual.merger.mlp.0.weight": "model-00001-of-00002.safetensors",
+    "visual.merger.mlp.2.bias": "model-00001-of-00002.safetensors",
+    "visual.merger.mlp.2.weight": "model-00001-of-00002.safetensors",
+    "visual.patch_embed.proj.weight": "model-00001-of-00002.safetensors"
+  }
+}
diff --git a/checkpoint-140000/optimizer.pt b/checkpoint-140000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..731833c7371f6dbf839a81cc0afdc0cf507427c6
--- /dev/null
+++ b/checkpoint-140000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17548642b8613988895e9ef292149689239f7b93f1009c8a8e13ad115d64f258
+size 7455061885
diff --git a/checkpoint-140000/rng_state_0.pth b/checkpoint-140000/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6b5534c5a1d86857f0c5244828ee527ba8f16dd4
--- /dev/null
+++ b/checkpoint-140000/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02cd20e5515563163911cecd8dbe657091ec3dbf2b31865c1545be88c77b58b1
+size 15024
diff --git a/checkpoint-140000/rng_state_1.pth b/checkpoint-140000/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..96cbb050c9b9e839bb8bb2a87d79b9fafc096cf0
--- /dev/null
+++ b/checkpoint-140000/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a4e4a236edc60a15d85c1164edd24c7f089444982ccb5be61179033a99d2c0a
+size 15024
diff --git a/checkpoint-140000/rng_state_2.pth b/checkpoint-140000/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3cde7185a7b287b318e88fe4dc2ee1183ad2f1e2
--- /dev/null
+++ b/checkpoint-140000/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64f82b3a5adcbc2d050269d54ad92a5122e7b9fd4e730f50e0ef797b0e1772de
+size 15024
diff --git a/checkpoint-140000/rng_state_3.pth b/checkpoint-140000/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f3c05fa22167575aac3a6719401b1abef80fab74
--- /dev/null
+++ b/checkpoint-140000/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:acb0260622f1c736d2fcd1e4530cebd4e6aa79f8e5bd1d938756808fbf450c3d
+size 15024
diff --git a/checkpoint-140000/scheduler.pt b/checkpoint-140000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9780f71885482c177915c5c3f4d5ca559a28bc99
--- /dev/null
+++ b/checkpoint-140000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba3f10af11b4450d1d39766d4216535b6f9f467c488fb2e5707c79acd3c9b465
+size 1064
diff --git a/checkpoint-140000/trainer_state.json b/checkpoint-140000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3db14c11aa7dc6de3e0c11133241d19ffaa03bfc
--- /dev/null
+++ b/checkpoint-140000/trainer_state.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe9ebc6c3a2d4b76b51a5b49d61e7acd171e89de6f146baf5979d0e53de5cdf3
+size 21551241
diff --git a/checkpoint-140000/training_args.bin b/checkpoint-140000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d40e83cbccfaf20b540bbdd2e0667f36abb10dc6
--- /dev/null
+++ b/checkpoint-140000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1c49505c345575e51b0123d9223ddd9263d76b83bc110217e53e85a3e8e90b4
+size 6008