Databases
Weaviate
For an end-to-end guide follow this link: https://learn.mixpeek.com/mixpeek-embed-with-weaviate-for-multimodal-ai/
Video
We’ll be using vuse-generic-v1
to build a collection of 1 second interval video chunks into a 768 dimension embedding collection.
Ingest
You’ll need to create a Weaviate schema with a class called VideoChunk
that
has a vector index of 768 dimensions.
from mixpeek import Mixpeek
import weaviate
# Initialize the Mixpeek client with your API key
mixpeek = Mixpeek("YOUR_API_KEY")
# Initialize Weaviate client
client = weaviate.Client("http://localhost:8080")
# Define schema for VideoChunk class
schema = {
"classes": [{
"class": "VideoChunk",
"vectorizer": "none",
"vectorIndexType": "hnsw",
"properties": [
{"name": "start_time", "dataType": ["number"]},
{"name": "end_time", "dataType": ["number"]}
]
}]
}
# Create schema
client.schema.create(schema)
# Process video chunks
processed_chunks = mixpeek.tools.video.process(
video_source="https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/mixpek-public-demo/Jurassic+Park+(2).mp4",
chunk_interval=1, # 1 second intervals
resolution=[720, 1280]
)
for chunk in processed_chunks:
print(f"Processing video chunk: {chunk['start_time']}")
# embed each chunk
embed_response = mixpeek.embed.video(
model_id="vuse-generic-v1",
input=chunk['base64_chunk'],
input_type="base64"
)
# Add to Weaviate
client.data_object.create(
"VideoChunk",
{
"start_time": chunk["start_time"],
"end_time": chunk["end_time"]
},
vector=embed_response['embedding']
)
Text Query
query = "two boys inside a car"
embed_response = mixpeek.embed.video(
model_id="vuse-generic-v1",
input=query,
input_type="text"
)
results = (
client.query
.get("VideoChunk", ["start_time", "end_time"])
.with_near_vector({
"vector": embed_response['embedding']
})
.with_limit(10)
.do()
)
for result in results['data']['Get']['VideoChunk']:
print(result)
Video Query
# we'll use a cartoon version of jurassic park
file_url = "https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/weaviate/rabbit-jurassic.mp4"
embed_response = mixpeek.embed.video(
model_id="vuse-generic-v1",
input=file_url,
input_type="url"
)
results = (
client.query
.get("VideoChunk", ["start_time", "end_time"])
.with_near_vector({
"vector": embed_response['embedding']
})
.with_limit(10)
.do()
)
for result in results['data']['Get']['VideoChunk']:
print(result)
Image
We’ll be using clip-v1
to build a collection of image embeddings with 512 dimensions.
You’ll need to create a Weaviate schema with a class called Image
that has a
vector index of 512 dimensions.
Ingest
from mixpeek import Mixpeek
import weaviate
# Initialize the Mixpeek client with your API key
mixpeek = Mixpeek("YOUR_API_KEY")
# Initialize Weaviate client
client = weaviate.Client("http://localhost:8080")
# Define schema for Image class
schema = {
"classes": [{
"class": "Image",
"vectorizer": "none",
"vectorIndexType": "hnsw",
"properties": [
{"name": "image_url", "dataType": ["string"]}
]
}]
}
# Create schema
client.schema.create(schema)
# List of image URLs to process
image_urls = [
"https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/weaviate/image1.jpg",
"https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/weaviate/image2.jpg",
"https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/weaviate/image3.jpg",
"https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/weaviate/image4.jpg"
]
for url in image_urls:
print(f"Processing image: {url}")
# Embed each image
embed_response = mixpeek.embed.image(
model_id="openai-clip-vit-base-patch32",
input=url,
input_type="url"
)
# Add to Weaviate
client.data_object.create(
"Image",
{
"image_url": url
},
vector=embed_response['embedding']
)
Text Query
query = "a cat sitting on a windowsill"
embed_response = mixpeek.embed.image(
model_id="openai-clip-vit-base-patch32",
input=query,
input_type="text"
)
results = (
client.query
.get("Image", ["image_url"])
.with_near_vector({
"vector": embed_response['embedding']
})
.with_limit(5)
.do()
)
for result in results['data']['Get']['Image']:
print(result)
Image Query
# Use an image from the same bucket as a query
query_image = "https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/weaviate/query_image.jpg"
embed_response = mixpeek.embed.image(
model_id="clip-v1",
input=query_image,
input_type="url"
)
results = (
client.query
.get("Image", ["image_url"])
.with_near_vector({
"vector": embed_response['embedding']
})
.with_limit(5)
.do()
)
for result in results['data']['Get']['Image']:
print(result)