Databases
Qdrant
For an end-to-end guide follow this link: https://qdrant.tech/documentation/embeddings/mixpeek/
Video
We’ll be using vuse-generic-v1
to build a collection of 1 second interval video chunks into a 768 dimension embedding collection.
You’ll need to create a Qdrant collection called video_chunks
with 768
dimensions for vuse-generic-v1
model embeddings.
Ingest
from mixpeek import Mixpeek
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance
# Initialize the Mixpeek client with your API key
mixpeek = Mixpeek("YOUR_API_KEY")
# Initialize Qdrant client
client = QdrantClient("localhost", port=6333)
# Create collection
client.recreate_collection(
collection_name="video_chunks",
vectors_config=VectorParams(size=768, distance=Distance.COSINE),
)
# Process video chunks
processed_chunks = mixpeek.tools.video.process(
video_source="https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/qdrant/Jurassic+Park+(2).mp4",
chunk_interval=1, # 1 second intervals
resolution=[720, 1280]
)
for i, chunk in enumerate(processed_chunks):
print(f"Processing video chunk: {i}")
# embed each chunk
embed_response = mixpeek.embed.video(
model_id="vuse-generic-v1",
input=chunk['base64_chunk'],
input_type="base64"
)
# Add to Qdrant
client.upsert(
collection_name="video_chunks",
points=[
{
"id": i,
"vector": embed_response['embedding'],
"payload": {
"start_time": chunk["start_time"],
"end_time": chunk["end_time"]
}
}
]
)
Text Query
query = "two boys inside a car"
embed_response = mixpeek.embed.video(
model_id="vuse-generic-v1",
input=query,
input_type="text"
)
results = client.search(
collection_name="video_chunks",
query_vector=embed_response['embedding'],
limit=10
)
for result in results:
print(result)
Video Query
# we'll use a cartoon version of jurassic park
file_url = "https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/qdrant/rabbit-jurassic.mp4"
embed_response = mixpeek.embed.video(
model_id="vuse-generic-v1",
input=file_url,
input_type="url"
)
results = client.search(
collection_name="video_chunks",
query_vector=embed_response['embedding'],
limit=10
)
for result in results:
print(result)
Image
We’ll be using openai-clip-vit-base-patch32
to build a collection of image embeddings with 512 dimensions.
You’ll need to create a Qdrant collection called images
with 512 dimensions
for openai-clip-vit-base-patch32
model embeddings.
Ingest
from mixpeek import Mixpeek
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance
# Initialize the Mixpeek client with your API key
mixpeek = Mixpeek("YOUR_API_KEY")
# Initialize Qdrant client
client = QdrantClient("localhost", port=6333)
# Create collection
client.recreate_collection(
collection_name="images",
vectors_config=VectorParams(size=512, distance=Distance.COSINE),
)
# List of image URLs to process
image_urls = [
"https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/qdrant/image1.jpg",
"https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/qdrant/image2.jpg",
"https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/qdrant/image3.jpg",
"https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/qdrant/image4.jpg"
]
for i, url in enumerate(image_urls):
print(f"Processing image: {url}")
# Embed each image
embed_response = mixpeek.embed.image(
model_id="openai-clip-vit-base-patch32",
input=url,
input_type="url"
)
# Add to Qdrant
client.upsert(
collection_name="images",
points=[
{
"id": i,
"vector": embed_response['embedding'],
"payload": {"image_url": url}
}
]
)
Text Query
query = "a cat sitting on a windowsill"
embed_response = mixpeek.embed.image(
model_id="openai-clip-vit-base-patch32",
input=query,
input_type="text"
)
results = client.search(
collection_name="images",
query_vector=embed_response['embedding'],
limit=5
)
for result in results:
print(result)
Image Query
# Use an image from the same bucket as a query
query_image = "https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/qdrant/query_image.jpg"
embed_response = mixpeek.embed.image(
model_id="openai-clip-vit-base-patch32",
input=query_image,
input_type="url"
)
results = client.search(
collection_name="images",
query_vector=embed_response['embedding'],
limit=5
)
for result in results:
print(result)