Clone Collection

curl --request POST \
  --url https://api.mixpeek.com/v1/collections/{collection_identifier}/clone \
  --header 'Content-Type: application/json' \
  --data '
{
  "collection_name": "<string>",
  "description": "Cloned from product_embeddings with CLIP v2",
  "source": {
    "bucket_ids": [
      "bkt_marketing_videos"
    ],
    "description": "Single bucket source",
    "type": "bucket"
  },
  "feature_extractor": {
    "description": "Text extractor with field passthrough",
    "feature_extractor_name": "text_extractor",
    "field_passthrough": [
      {
        "required": true,
        "source_path": "title"
      },
      {
        "source_path": "author"
      }
    ],
    "input_mappings": {
      "text": "content"
    },
    "parameters": {
      "model": "text-embedding-3-small"
    },
    "version": "v1"
  },
  "enabled": true,
  "metadata": {},
  "taxonomy_applications": [
    {
      "taxonomy_id": "<string>",
      "execution_mode": "materialize",
      "target_collection_id": "<string>",
      "scroll_filters": {
        "AND": [
          {
            "field": "name",
            "operator": "eq",
            "value": "John"
          },
          {
            "field": "age",
            "operator": "gte",
            "value": 30
          }
        ],
        "OR": [
          {
            "field": "status",
            "operator": "eq",
            "value": "active"
          },
          {
            "field": "role",
            "operator": "eq",
            "value": "admin"
          }
        ],
        "NOT": [
          {
            "field": "department",
            "operator": "eq",
            "value": "HR"
          },
          {
            "field": "location",
            "operator": "eq",
            "value": "remote"
          }
        ],
        "case_sensitive": true
      },
      "execution_phase": 1,
      "priority": 0
    }
  ]
}
'

{
  "collection": {
    "collection_name": "<string>",
    "input_schema": {
      "properties": {}
    },
    "output_schema": {
      "properties": {}
    },
    "feature_extractor": {
      "feature_extractor_name": "<string>",
      "version": "<string>",
      "feature_extractor_id": "<string>",
      "params": {},
      "parameters": {
        "chunk_overlap": 0,
        "chunk_size": 1000,
        "description": "E-commerce product search (no chunking, default settings)",
        "expected_behavior": "Find semantically similar products even if they use different wording. Entire product description becomes one document.",
        "extractor_type": "text_extractor",
        "input_example": "wireless bluetooth headphones with noise cancelling",
        "split_by": "none",
        "use_case": "Search 1M products by natural language descriptions",
        "split_method": "time",
        "time_split_interval": 10
      },
      "input_mappings": {
        "image": "product_image",
        "text": "title"
      },
      "field_passthrough": [
        {
          "source_path": "<string>",
          "target_path": "title",
          "default": "Unknown",
          "required": false
        }
      ],
      "include_all_source_fields": false
    },
    "source": {
      "type": "bucket",
      "bucket_ids": [
        "bkt_marketing_videos"
      ],
      "collection_id": "col_video_frames",
      "collection_ids": [
        "col_us_products",
        "col_eu_products"
      ],
      "inherited_bucket_ids": [
        "bkt_marketing_videos"
      ],
      "source_filters": {
        "description": "Filter only video content",
        "filters": {
          "AND": [
            {
              "field": "blobs.type",
              "operator": "eq",
              "value": "video"
            }
          ]
        }
      }
    },
    "collection_id": "<string>",
    "description": "Video frames extracted at 1 FPS with CLIP embeddings",
    "source_bucket_schemas": null,
    "source_lineage": [
      {
        "source_config": {
          "type": "bucket",
          "bucket_ids": [
            "bkt_marketing_videos"
          ],
          "collection_id": "col_video_frames",
          "collection_ids": [
            "col_us_products",
            "col_eu_products"
          ],
          "inherited_bucket_ids": [
            "bkt_marketing_videos"
          ],
          "source_filters": {
            "description": "Filter only video content",
            "filters": {
              "AND": [
                {
                  "field": "blobs.type",
                  "operator": "eq",
                  "value": "video"
                }
              ]
            }
          }
        },
        "feature_extractor": {
          "feature_extractor_name": "<string>",
          "version": "<string>",
          "feature_extractor_id": "<string>",
          "params": {},
          "parameters": {
            "chunk_overlap": 0,
            "chunk_size": 1000,
            "description": "E-commerce product search (no chunking, default settings)",
            "expected_behavior": "Find semantically similar products even if they use different wording. Entire product description becomes one document.",
            "extractor_type": "text_extractor",
            "input_example": "wireless bluetooth headphones with noise cancelling",
            "split_by": "none",
            "use_case": "Search 1M products by natural language descriptions",
            "split_method": "time",
            "time_split_interval": 10
          },
          "input_mappings": {
            "image": "product_image",
            "text": "title"
          },
          "field_passthrough": [
            {
              "source_path": "<string>",
              "target_path": "title",
              "default": "Unknown",
              "required": false
            }
          ],
          "include_all_source_fields": false
        },
        "output_schema": {
          "properties": {}
        }
      }
    ],
    "vector_indexes": [
      "<unknown>"
    ],
    "payload_indexes": [
      "<unknown>"
    ],
    "enabled": true,
    "metadata": {
      "environment": "production",
      "project": "Q4_campaign",
      "team": "data-science"
    },
    "created_at": "2023-11-07T05:31:56Z",
    "updated_at": "2023-11-07T05:31:56Z",
    "document_count": 0,
    "schema_version": 1,
    "last_schema_sync": "2023-11-07T05:31:56Z",
    "schema_sync_enabled": true,
    "taxonomy_applications": null,
    "cluster_applications": null,
    "alert_applications": null
  },
  "source_collection_id": "<string>"
}

Collections

Clone Collection

Clone a collection with optional modifications.

Purpose: Creates a NEW collection (with new ID) based on an existing one. This is the recommended way to iterate on collection designs when you need to modify core configuration that PATCH doesn’t allow (source, feature_extractor, field_passthrough).

Clone vs PATCH vs Template:

PATCH: Update metadata only (enabled, metadata, taxonomy_applications)
Clone: Copy and modify core configuration (source, feature_extractor)
Template: Start from a pre-configured pattern (for new projects)

Common Use Cases:

Change feature extractor configuration (model, parameters)
Modify field_passthrough to include/exclude fields
Switch to different source (bucket or collection)
Test modifications before replacing production collection
Create variants (e.g., different embedding models)

How it works:

Source collection is copied
You provide a new name (REQUIRED)
Optionally override any other fields
A new collection is created with a new ID
Original collection remains unchanged

POST

collections

{collection_identifier}

clone

Clone Collection

curl --request POST \
  --url https://api.mixpeek.com/v1/collections/{collection_identifier}/clone \
  --header 'Content-Type: application/json' \
  --data '
{
  "collection_name": "<string>",
  "description": "Cloned from product_embeddings with CLIP v2",
  "source": {
    "bucket_ids": [
      "bkt_marketing_videos"
    ],
    "description": "Single bucket source",
    "type": "bucket"
  },
  "feature_extractor": {
    "description": "Text extractor with field passthrough",
    "feature_extractor_name": "text_extractor",
    "field_passthrough": [
      {
        "required": true,
        "source_path": "title"
      },
      {
        "source_path": "author"
      }
    ],
    "input_mappings": {
      "text": "content"
    },
    "parameters": {
      "model": "text-embedding-3-small"
    },
    "version": "v1"
  },
  "enabled": true,
  "metadata": {},
  "taxonomy_applications": [
    {
      "taxonomy_id": "<string>",
      "execution_mode": "materialize",
      "target_collection_id": "<string>",
      "scroll_filters": {
        "AND": [
          {
            "field": "name",
            "operator": "eq",
            "value": "John"
          },
          {
            "field": "age",
            "operator": "gte",
            "value": 30
          }
        ],
        "OR": [
          {
            "field": "status",
            "operator": "eq",
            "value": "active"
          },
          {
            "field": "role",
            "operator": "eq",
            "value": "admin"
          }
        ],
        "NOT": [
          {
            "field": "department",
            "operator": "eq",
            "value": "HR"
          },
          {
            "field": "location",
            "operator": "eq",
            "value": "remote"
          }
        ],
        "case_sensitive": true
      },
      "execution_phase": 1,
      "priority": 0
    }
  ]
}
'

{
  "collection": {
    "collection_name": "<string>",
    "input_schema": {
      "properties": {}
    },
    "output_schema": {
      "properties": {}
    },
    "feature_extractor": {
      "feature_extractor_name": "<string>",
      "version": "<string>",
      "feature_extractor_id": "<string>",
      "params": {},
      "parameters": {
        "chunk_overlap": 0,
        "chunk_size": 1000,
        "description": "E-commerce product search (no chunking, default settings)",
        "expected_behavior": "Find semantically similar products even if they use different wording. Entire product description becomes one document.",
        "extractor_type": "text_extractor",
        "input_example": "wireless bluetooth headphones with noise cancelling",
        "split_by": "none",
        "use_case": "Search 1M products by natural language descriptions",
        "split_method": "time",
        "time_split_interval": 10
      },
      "input_mappings": {
        "image": "product_image",
        "text": "title"
      },
      "field_passthrough": [
        {
          "source_path": "<string>",
          "target_path": "title",
          "default": "Unknown",
          "required": false
        }
      ],
      "include_all_source_fields": false
    },
    "source": {
      "type": "bucket",
      "bucket_ids": [
        "bkt_marketing_videos"
      ],
      "collection_id": "col_video_frames",
      "collection_ids": [
        "col_us_products",
        "col_eu_products"
      ],
      "inherited_bucket_ids": [
        "bkt_marketing_videos"
      ],
      "source_filters": {
        "description": "Filter only video content",
        "filters": {
          "AND": [
            {
              "field": "blobs.type",
              "operator": "eq",
              "value": "video"
            }
          ]
        }
      }
    },
    "collection_id": "<string>",
    "description": "Video frames extracted at 1 FPS with CLIP embeddings",
    "source_bucket_schemas": null,
    "source_lineage": [
      {
        "source_config": {
          "type": "bucket",
          "bucket_ids": [
            "bkt_marketing_videos"
          ],
          "collection_id": "col_video_frames",
          "collection_ids": [
            "col_us_products",
            "col_eu_products"
          ],
          "inherited_bucket_ids": [
            "bkt_marketing_videos"
          ],
          "source_filters": {
            "description": "Filter only video content",
            "filters": {
              "AND": [
                {
                  "field": "blobs.type",
                  "operator": "eq",
                  "value": "video"
                }
              ]
            }
          }
        },
        "feature_extractor": {
          "feature_extractor_name": "<string>",
          "version": "<string>",
          "feature_extractor_id": "<string>",
          "params": {},
          "parameters": {
            "chunk_overlap": 0,
            "chunk_size": 1000,
            "description": "E-commerce product search (no chunking, default settings)",
            "expected_behavior": "Find semantically similar products even if they use different wording. Entire product description becomes one document.",
            "extractor_type": "text_extractor",
            "input_example": "wireless bluetooth headphones with noise cancelling",
            "split_by": "none",
            "use_case": "Search 1M products by natural language descriptions",
            "split_method": "time",
            "time_split_interval": 10
          },
          "input_mappings": {
            "image": "product_image",
            "text": "title"
          },
          "field_passthrough": [
            {
              "source_path": "<string>",
              "target_path": "title",
              "default": "Unknown",
              "required": false
            }
          ],
          "include_all_source_fields": false
        },
        "output_schema": {
          "properties": {}
        }
      }
    ],
    "vector_indexes": [
      "<unknown>"
    ],
    "payload_indexes": [
      "<unknown>"
    ],
    "enabled": true,
    "metadata": {
      "environment": "production",
      "project": "Q4_campaign",
      "team": "data-science"
    },
    "created_at": "2023-11-07T05:31:56Z",
    "updated_at": "2023-11-07T05:31:56Z",
    "document_count": 0,
    "schema_version": 1,
    "last_schema_sync": "2023-11-07T05:31:56Z",
    "schema_sync_enabled": true,
    "taxonomy_applications": null,
    "cluster_applications": null,
    "alert_applications": null
  },
  "source_collection_id": "<string>"
}

Headers

Authorization

string

REQUIRED: Bearer token authentication using your API key. Format: 'Bearer sk_xxxxxxxxxxxxx'. You can create API keys in the Mixpeek dashboard under Organization Settings.

Examples:

"Bearer YOUR_API_KEY"

"Bearer YOUR_STRIPE_API_KEY"

X-Namespace

string

REQUIRED: Namespace identifier for scoping this request. All resources (collections, buckets, taxonomies, etc.) are scoped to a namespace. You can provide either the namespace name or namespace ID. Format: ns_xxxxxxxxxxxxx (ID) or a custom name like 'my-namespace'

Examples:

"ns_abc123def456"

"production"

"my-namespace"

Path Parameters

collection_identifier

string

required

Source collection ID or name to clone.

Body

application/json

Request to clone a collection with optional modifications.

Purpose: Cloning creates a NEW collection (with new ID) based on an existing one, allowing you to make changes that aren't allowed via PATCH (source, feature_extractor, field_passthrough). This is the recommended way to iterate on collection designs.

Clone vs Template vs Version:

Clone: Copy THIS collection and modify it (for iteration/fixes)
Template: Create collection from a reusable pattern (for new projects)
Version: (Not implemented) - Use clone instead

Use Cases:

Change feature extractor configuration without breaking production
Modify field_passthrough to include/exclude fields
Switch to different source (bucket or collection)
Test modifications before replacing production collection
Create variants (e.g., different embedding models)

All fields are OPTIONAL:

Omit a field to keep the original value
Provide a field to override the original value
collection_name is REQUIRED (clones must have unique names)

collection_name

string

required

REQUIRED. Name for the cloned collection. Must be unique and different from the source collection.

Minimum string length: 1

Examples:

"product_embeddings_v2"

"video_frames_clip_v2"

description

string | null

OPTIONAL. Description override. If omitted, copies from source collection.

Example:

"Cloned from product_embeddings with CLIP v2"

source

SourceConfig · object

OPTIONAL. Override source configuration. If omitted, copies from source collection. Allows switching between buckets or collections.

Show child attributes

Example:

{
  "bucket_ids": ["bkt_marketing_videos"],
  "description": "Single bucket source",
  "type": "bucket"
}

feature_extractor

FeatureExtractorConfig · object

OPTIONAL. Override feature extractor configuration. If omitted, copies from source collection. This is where you'd change models, parameters, or field_passthrough.

Show child attributes

Example:

{
  "description": "Text extractor with field passthrough",
  "feature_extractor_name": "text_extractor",
  "field_passthrough": [
    { "required": true, "source_path": "title" },
    { "source_path": "author" }
  ],
  "input_mappings": { "text": "content" },
  "parameters": { "model": "text-embedding-3-small" },
  "version": "v1"
}

enabled

boolean | null

OPTIONAL. Override enabled status. If omitted, copies from source collection.

metadata

Metadata · object

OPTIONAL. Override metadata. If omitted, copies from source collection.

taxonomy_applications

TaxonomyApplicationConfig · object[] | null

OPTIONAL. Override taxonomy applications. If omitted, copies from source collection.

Show child attributes

Response

Successful Response

Response after cloning a collection.

collection

CollectionModel · object

required

Cloned collection configuration with new collection_id.

Show child attributes

Examples:

{
  "collection_id": "col_a1b2c3d4e5",
  "collection_name": "article_embeddings",
  "description": "Simple text collection: News articles with text embeddings from bucket source",
  "enabled": true,
  "feature_extractor": {
    "feature_extractor_name": "text_extractor",
    "field_passthrough": [{ "source_path": "title" }],
    "input_mappings": { "text": "content" },
    "version": "v1"
  },
  "input_schema": {
    "properties": {
      "title": { "type": "string" },
      "content": { "type": "text" }
    }
  },
  "output_schema": {
    "properties": {
      "title": { "type": "string" },
      "text_extractor_v1_embedding": { "type": "array" }
    }
  },
  "source": {
    "bucket_id": "bkt_articles",
    "type": "bucket"
  }
}

{
  "collection_id": "col_xyz789abc",
  "collection_name": "video_frames",
  "description": "Video frames: Extracted at 1 FPS with CLIP embeddings and campaign_id passthrough",
  "enabled": true,
  "feature_extractor": {
    "feature_extractor_name": "multimodal_extractor",
    "field_passthrough": [{ "source_path": "campaign_id" }],
    "input_mappings": { "video": "video" },
    "parameters": { "fps": 1 },
    "version": "v1"
  },
  "input_schema": {
    "properties": {
      "video": { "type": "video" },
      "campaign_id": { "type": "string" }
    }
  },
  "output_schema": {
    "properties": {
      "campaign_id": { "type": "string" },
      "multimodal_extractor_v1_embedding": { "type": "array" }
    }
  },
  "source": {
    "bucket_id": "bkt_marketing_videos",
    "type": "bucket"
  }
}

{
  "collection_id": "col_scenes_def",
  "collection_name": "detected_scenes",
  "description": "Tier 2 decomposition: Scenes detected from frame embeddings (collection source)",
  "enabled": true,
  "feature_extractor": {
    "feature_extractor_name": "scene_extractor",
    "field_passthrough": [{ "source_path": "campaign_id" }],
    "input_mappings": {
      "embedding": "multimodal_extractor_v1_embedding"
    },
    "version": "v1"
  },
  "input_schema": {
    "properties": {
      "campaign_id": { "type": "string" },
      "multimodal_extractor_v1_embedding": { "type": "array" }
    }
  },
  "output_schema": {
    "properties": {
      "campaign_id": { "type": "string" },
      "scene_extractor_v1_embedding": { "type": "array" }
    }
  },
  "source": {
    "collection_id": "col_video_frames",
    "type": "collection"
  }
}

source_collection_id

string

required

ID of the source collection that was cloned.

Create Collection Get Collection

⌘I

Namespaces

Buckets

Feature Extractors

Collections

Retrievers

Taxonomies

Clusters

Templates

Manifest

Resource Search

Inference

Tasks

Webhooks

Clone Collection

Headers

Path Parameters

Body

Response