Databases
Supabase
Databases
Supabase
For an end-to-end guide follow this link: https://supabase.com/docs/guides/ai/examples/mixpeek-video-search
Video
We’ll be using vuse-generic-v1
to build a collection of 1 second interval video chunks into a 768 dimension embedding collection.
You’ll need to create a Supabase table called video_chunks
with appropriate
columns for storing video chunk data and embeddings.
Ingest
from supabase import create_client, Client
from mixpeek import Mixpeek
import os
# Initialize the Mixpeek client with your API key
mixpeek = Mixpeek("YOUR_MIXPEEK_API_KEY")
# Initialize Supabase client
supabase: Client = create_client("YOUR_SUPABASE_URL", "YOUR_SUPABASE_KEY")
# Create table for video chunks (if not already created)
supabase.table("video_chunks").create({
"id": "text",
"start_time": "float8",
"end_time": "float8",
"embedding": "vector(768)",
"metadata": "jsonb"
})
# Process video chunks
video_url = "https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/starter/jurassic_park_trailer.mp4"
processed_chunks = mixpeek.tools.video.process(
video_source=video_url,
chunk_interval=1, # 1 second intervals
resolution=[720, 1280]
)
for chunk in processed_chunks:
print(f"Processing video chunk: {chunk['start_time']}")
# embed each chunk
embed_response = mixpeek.embed.video(
model_id="vuse-generic-v1",
input=chunk['base64_chunk'],
input_type="base64"
)
# Insert into Supabase
supabase.table("video_chunks").insert({
"id": f"chunk_{chunk['start_time']}",
"start_time": chunk["start_time"],
"end_time": chunk["end_time"],
"embedding": embed_response['embedding'],
"metadata": {"video_url": video_url}
}).execute()
# Create index for fast search performance
supabase.query("CREATE INDEX ON video_chunks USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100)").execute()
print("Created index")
Text Query
query = "two boys inside a car"
embed_response = mixpeek.embed.video(
model_id="vuse-generic-v1",
input=query,
input_type="text"
)['embedding']
results = supabase.rpc(
'match_video_chunks',
{
'query_embedding': embed_response['embedding'],
'match_threshold': 0.8,
'match_count': 10
}
).execute()
for result in results.data:
print(f"Chunk ID: {result['id']}, Similarity: {result['similarity']}, Start Time: {result['start_time']}, End Time: {result['end_time']}")
Video Query
# we'll use a cartoon version of jurassic park
file_url = "https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/starter/jurassic_bunny.mp4"
embed_response = mixpeek.embed.video(
model_id="vuse-generic-v1",
input=file_url,
input_type="url"
)
results = supabase.rpc(
'match_video_chunks',
{
'query_embedding': embed_response['embedding'],
'match_threshold': 0.8,
'match_count': 10
}
).execute()
for result in results.data:
print(f"Chunk ID: {result['id']}, Similarity: {result['similarity']}, Start Time: {result['start_time']}, End Time: {result['end_time']}")
Image
We’ll be using openai-clip-vit-base-patch32
to build a collection of image embeddings with 512 dimensions.
You’ll need to create a Supabase table called images
with appropriate
columns for storing image data and embeddings.
Ingest
from supabase import create_client, Client
from mixpeek import Mixpeek
import os
# Initialize the Mixpeek client with your API key
mixpeek = Mixpeek("YOUR_MIXPEEK_API_KEY")
# Initialize Supabase client
supabase: Client = create_client("YOUR_SUPABASE_URL", "YOUR_SUPABASE_KEY")
# Create table for images (if not already created)
supabase.table("images").create({
"id": "text",
"embedding": "vector(512)",
"image_url": "text"
})
# List of image URLs to process
image_urls = [
"https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/supabase/image1.jpg",
"https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/supabase/image2.jpg",
"https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/supabase/image3.jpg",
"https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/supabase/image4.jpg"
]
for url in image_urls:
print(f"Processing image: {url}")
# Embed each image
embed_response = mixpeek.embed.image(
model_id="openai-clip-vit-base-patch32",
input=url,
input_type="url"
)['embedding']
# Insert into Supabase
supabase.table("images").insert({
"id": f"image_{url.split('/')[-1]}",
"embedding": embed_response,
"image_url": url
}).execute()
# Create index for fast search performance
supabase.query("CREATE INDEX ON images USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100)").execute()
print("Created index")
Text Query
query = "a cat sitting on a windowsill"
embed_response = mixpeek.embed.image(
model_id="openai-clip-vit-base-patch32",
input=query,
input_type="text"
)
results = supabase.rpc(
'match_images',
{
'query_embedding': embed_response['embedding'],
'match_threshold': 0.8,
'match_count': 5
}
).execute()
for result in results.data:
print(f"Image ID: {result['id']}, Similarity: {result['similarity']}, Image URL: {result['image_url']}")
Image Query
# Use an image from the same bucket as a query
query_image = "https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/supabase/query_image.jpg"
embed_response = mixpeek.embed.image(
model_id="openai-clip-vit-base-patch32",
input=query_image,
input_type="url"
)
results = supabase.rpc(
'match_images',
{
'query_embedding': embed_response['embedding'],
'match_threshold': 0.8,
'match_count': 5
}
).execute()
for result in results.data:
print(f"Image ID: {result['id']}, Similarity: {result['similarity']}, Image URL: {result['image_url']}")
Was this page helpful?