For an end-to-end guide follow this link: https://supabase.com/docs/guides/ai/examples/mixpeek-video-search

Video

We’ll be using vuse-generic-v1 to build a collection of 1 second interval video chunks into a 768 dimension embedding collection.

You’ll need to create a Supabase table called video_chunks with appropriate columns for storing video chunk data and embeddings.

Ingest

from supabase import create_client, Client
from mixpeek import Mixpeek
import os

# Initialize the Mixpeek client with your API key
mixpeek = Mixpeek("YOUR_MIXPEEK_API_KEY")

# Initialize Supabase client
supabase: Client = create_client("YOUR_SUPABASE_URL", "YOUR_SUPABASE_KEY")

# Create table for video chunks (if not already created)
supabase.table("video_chunks").create({
    "id": "text",
    "start_time": "float8",
    "end_time": "float8",
    "embedding": "vector(768)",
    "metadata": "jsonb"
})

# Process video chunks
video_url = "https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/starter/jurassic_park_trailer.mp4"
processed_chunks = mixpeek.tools.video.process(
    video_source=video_url,
    chunk_interval=1, # 1 second intervals
    resolution=[720, 1280]
)

for chunk in processed_chunks:
    print(f"Processing video chunk: {chunk['start_time']}")

    # embed each chunk
    embed_response = mixpeek.embed.video(
        model_id="vuse-generic-v1",
        input=chunk['base64_chunk'],
        input_type="base64"
    )

    # Insert into Supabase
    supabase.table("video_chunks").insert({
        "id": f"chunk_{chunk['start_time']}",
        "start_time": chunk["start_time"],
        "end_time": chunk["end_time"],
        "embedding": embed_response['embedding'],
        "metadata": {"video_url": video_url}
    }).execute()

# Create index for fast search performance
supabase.query("CREATE INDEX ON video_chunks USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100)").execute()
print("Created index")

Text Query

query = "two boys inside a car"

embed_response = mixpeek.embed.video(
    model_id="vuse-generic-v1",
    input=query,
    input_type="text"
)['embedding']

results = supabase.rpc(
    'match_video_chunks',
    {
        'query_embedding': embed_response['embedding'],
        'match_threshold': 0.8,
        'match_count': 10
    }
).execute()

for result in results.data:
    print(f"Chunk ID: {result['id']}, Similarity: {result['similarity']}, Start Time: {result['start_time']}, End Time: {result['end_time']}")

Video Query

# we'll use a cartoon version of jurassic park
file_url = "https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/starter/jurassic_bunny.mp4"

embed_response = mixpeek.embed.video(
    model_id="vuse-generic-v1",
    input=file_url,
    input_type="url"
)

results = supabase.rpc(
    'match_video_chunks',
    {
        'query_embedding': embed_response['embedding'],
        'match_threshold': 0.8,
        'match_count': 10
    }
).execute()

for result in results.data:
    print(f"Chunk ID: {result['id']}, Similarity: {result['similarity']}, Start Time: {result['start_time']}, End Time: {result['end_time']}")

Image

We’ll be using openai-clip-vit-base-patch32 to build a collection of image embeddings with 512 dimensions.

You’ll need to create a Supabase table called images with appropriate columns for storing image data and embeddings.

Ingest

from supabase import create_client, Client
from mixpeek import Mixpeek
import os

# Initialize the Mixpeek client with your API key
mixpeek = Mixpeek("YOUR_MIXPEEK_API_KEY")

# Initialize Supabase client
supabase: Client = create_client("YOUR_SUPABASE_URL", "YOUR_SUPABASE_KEY")

# Create table for images (if not already created)
supabase.table("images").create({
    "id": "text",
    "embedding": "vector(512)",
    "image_url": "text"
})

# List of image URLs to process
image_urls = [
    "https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/supabase/image1.jpg",
    "https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/supabase/image2.jpg",
    "https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/supabase/image3.jpg",
    "https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/supabase/image4.jpg"
]

for url in image_urls:
    print(f"Processing image: {url}")

    # Embed each image
    embed_response = mixpeek.embed.image(
        model_id="openai-clip-vit-base-patch32",
        input=url,
        input_type="url"
    )['embedding']

    # Insert into Supabase
    supabase.table("images").insert({
        "id": f"image_{url.split('/')[-1]}",
        "embedding": embed_response,
        "image_url": url
    }).execute()

# Create index for fast search performance
supabase.query("CREATE INDEX ON images USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100)").execute()
print("Created index")

Text Query

query = "a cat sitting on a windowsill"

embed_response = mixpeek.embed.image(
    model_id="openai-clip-vit-base-patch32",
    input=query,
    input_type="text"
)

results = supabase.rpc(
    'match_images',
    {
        'query_embedding': embed_response['embedding'],
        'match_threshold': 0.8,
        'match_count': 5
    }
).execute()

for result in results.data:
    print(f"Image ID: {result['id']}, Similarity: {result['similarity']}, Image URL: {result['image_url']}")

Image Query

# Use an image from the same bucket as a query
query_image = "https://mixpeek-public-demo.s3.us-east-2.amazonaws.com/supabase/query_image.jpg"

embed_response = mixpeek.embed.image(
    model_id="openai-clip-vit-base-patch32",
    input=query_image,
    input_type="url"
)

results = supabase.rpc(
    'match_images',
    {
        'query_embedding': embed_response['embedding'],
        'match_threshold': 0.8,
        'match_count': 5
    }
).execute()

for result in results.data:
    print(f"Image ID: {result['id']}, Similarity: {result['similarity']}, Image URL: {result['image_url']}")