tolgacangoz
/

matryoshka-diffusion-models

Model card Files Files and versions Community

tolgacangoz commited on Oct 12, 2024

Commit

33d718d

·

verified ·

1 Parent(s): c0c0ade

Upload matryoshka.py

Files changed (1) hide show

matryoshka.py +23 -10

matryoshka.py CHANGED Viewed

@@ -102,15 +102,21 @@ logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 EXAMPLE_DOC_STRING = """
     Examples:
         ```py
-        >>> import torch
-        >>> from diffusers import MatryoshkaPipeline
-        >>> pipe = MatryoshkaPipeline.from_pretrained("A/B", torch_dtype=torch.float16, variant="fp16")
-        >>> pipe = pipe.to("cuda")
-        >>> prompt = "a photo of an astronaut riding a horse on mars"
-        >>> image = pipe(prompt).images[0]
-        >>> image
         ```
 """
@@ -1636,12 +1642,19 @@ class MatryoshkaFusedAttnProcessor1_0_or_2_0:
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
         # TODO: add support for attn.scale when we move to Torch 2.1 if F.scaled_dot_product_attention() is available
-        hidden_states = self.attention(
             query,
             key,
             value,
-            mask=attention_mask,
-            num_heads=attn.heads,
         )
         hidden_states = hidden_states.to(query.dtype)

 EXAMPLE_DOC_STRING = """
     Examples:
         ```py
+        >>> from diffusers import DiffusionPipeline
+        >>> from diffusers.utils import make_image_grid
+        >>> # nesting_level=0 -> 64x64; nesting_level=1 -> 256x256 - 64x64; nesting_level=2 -> 1024x1024 - 256x256 - 64x64
+        >>> pipe = DiffusionPipeline.from_pretrained("tolgacangoz/matryoshka-diffusion-models",
+        >>>                                          custom_pipeline="matryoshka").to("cuda")
+        >>> prompt0 = "a blue jay stops on the top of a helmet of Japanese samurai, background with sakura tree"
+        >>> prompt = f"breathtaking {prompt0}. award-winning, professional, highly detailed"
+        >>> negative_prompt = "deformed, mutated, ugly, disfigured, blur, blurry, noise, noisy"
+        >>> image = pipe(prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=50).images
+        >>> make_image_grid(image, rows=1, cols=len(image))
+        >>> pipe.change_nesting_level(<int>)  # 0, 1, or 2
+        >>> # 50+, 100+, and 250+ num_inference_steps are recommended for nesting levels 0, 1, and 2 respectively.
         ```
 """
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
         # TODO: add support for attn.scale when we move to Torch 2.1 if F.scaled_dot_product_attention() is available
+        # hidden_states = self.attention(
+        #     query,
+        #     key,
+        #     value,
+        #     mask=attention_mask,
+        #     num_heads=attn.heads,
+        # )
+        hidden_states = F.scaled_dot_product_attention(
             query,
             key,
             value,
+            attn_mask=attention_mask,
+            dropout=attn.dropout,
         )
         hidden_states = hidden_states.to(query.dtype)