jinaai
/

jina-code-embeddings-0.5b

@@ -57,38 +57,7 @@ The following Python packages are required:
 </details>
 <details>
-  <summary>via <a href="https://huggingface.co/docs/transformers/en/index">transformers</a> (AutoModel with trust_remote_code=True)</summary>
-```python
-# !pip install transformers>=4.53.0 torch>=2.7.1
-from transformers import AutoModel
-import torch
-# Initialize the model
-model = AutoModel.from_pretrained("jinaai/jina-code-embeddings-0.5b", trust_remote_code=True)
-model.to("cuda")
-# Configure truncate_dim, max_length, batch_size in the encode function if needed
-# Encode query
-query_embeddings = model.encode(
-    ["print hello world in python"],
-    task="nl2code",
-    prompt_name="query",
-)
-# Encode passage
-passage_embeddings = model.encode(
-    ["print('Hello World!')"],
-    task="nl2code",
-    prompt_name="passage",
-)
-```
-</details>
-<details>
-  <summary> via <a href="https://huggingface.co/docs/transformers/en/index">transformers</a> (using Qwen2Model without trust_remote_code)</summary>
 ```python
 # !pip install transformers>=4.53.0 torch>=2.7.1
@@ -96,8 +65,7 @@ passage_embeddings = model.encode(
 import torch
 import torch.nn.functional as F
-from transformers.models.qwen2 import Qwen2Model
-from transformers.models.qwen2.tokenization_qwen2_fast import Qwen2TokenizerFast
 INSTRUCTION_CONFIG = {
     "nl2code": {
@@ -152,8 +120,8 @@ documents = [
 ]
 all_inputs = queries + documents
-tokenizer = Qwen2TokenizerFast.from_pretrained('jinaai/jina-code-embeddings-0.5b')
-model = Qwen2Model.from_pretrained('jinaai/jina-code-embeddings-0.5b')
 batch_dict = tokenizer(
     all_inputs,
@@ -192,7 +160,8 @@ model = SentenceTransformer(
         "torch_dtype": torch.bfloat16,
         "attn_implementation": "flash_attention_2",
         "device_map": "cuda"
-    }
 )
 # The queries and documents to embed
@@ -211,8 +180,8 @@ document_embeddings = model.encode(documents, prompt_name="nl2code_document")
 # Compute the (cosine) similarity between the query and document embeddings
 similarity = model.similarity(query_embeddings, document_embeddings)
 print(similarity)
-# tensor([[0.8157, 0.1222],
-#         [0.1201, 0.5500]])
 ```
 </details>
@@ -270,7 +239,6 @@ all_inputs = queries + documents
 # vLLM embedding model
 llm = LLM(
     model="jinaai/jina-code-embeddings-0.5b",
-    hf_overrides={"architectures": ["Qwen2ForCausalLM"]},
     task="embed"
 )

 </details>
 <details>
+  <summary>via <a href="https://huggingface.co/docs/transformers/en/index">transformers</a></summary>
 ```python
 # !pip install transformers>=4.53.0 torch>=2.7.1
 import torch
 import torch.nn.functional as F
+from transformers import AutoModel, AutoTokenizer
 INSTRUCTION_CONFIG = {
     "nl2code": {
 ]
 all_inputs = queries + documents
+tokenizer = AutoTokenizer.from_pretrained('jinaai/jina-code-embeddings-0.5b')
+model = AutoModel.from_pretrained('jinaai/jina-code-embeddings-0.5b')
 batch_dict = tokenizer(
     all_inputs,
         "torch_dtype": torch.bfloat16,
         "attn_implementation": "flash_attention_2",
         "device_map": "cuda"
+    },
+    tokenizer_kwargs={"padding_side": "left"},
 )
 # The queries and documents to embed
 # Compute the (cosine) similarity between the query and document embeddings
 similarity = model.similarity(query_embeddings, document_embeddings)
 print(similarity)
+# tensor([[0.8169, 0.1214],
+#         [0.1190, 0.5500]])
 ```
 </details>
 # vLLM embedding model
 llm = LLM(
     model="jinaai/jina-code-embeddings-0.5b",
     task="embed"
 )