Skip to main content
POST
/
v1
/
collections
/
{collection_identifier}
/
clone
Clone Collection
curl --request POST \
  --url https://api.mixpeek.com/v1/collections/{collection_identifier}/clone \
  --header 'Content-Type: application/json' \
  --data '
{
  "collection_name": "<string>",
  "description": "Cloned from product_embeddings with CLIP v2",
  "source": {
    "bucket_ids": [
      "bkt_marketing_videos"
    ],
    "description": "Single bucket source",
    "type": "bucket"
  },
  "feature_extractor": {
    "description": "Text extractor with field passthrough",
    "feature_extractor_name": "text_extractor",
    "field_passthrough": [
      {
        "required": true,
        "source_path": "title"
      },
      {
        "source_path": "author"
      }
    ],
    "input_mappings": {
      "text": "content"
    },
    "parameters": {
      "model": "text-embedding-3-small"
    },
    "version": "v1"
  },
  "enabled": true,
  "metadata": {},
  "taxonomy_applications": [
    {
      "taxonomy_id": "<string>",
      "execution_mode": "materialize",
      "target_collection_id": "<string>",
      "scroll_filters": {
        "AND": [
          {
            "field": "name",
            "operator": "eq",
            "value": "John"
          },
          {
            "field": "age",
            "operator": "gte",
            "value": 30
          }
        ],
        "OR": [
          {
            "field": "status",
            "operator": "eq",
            "value": "active"
          },
          {
            "field": "role",
            "operator": "eq",
            "value": "admin"
          }
        ],
        "NOT": [
          {
            "field": "department",
            "operator": "eq",
            "value": "HR"
          },
          {
            "field": "location",
            "operator": "eq",
            "value": "remote"
          }
        ],
        "case_sensitive": true
      }
    }
  ]
}
'
{
  "collection": {
    "collection_name": "<string>",
    "input_schema": {
      "properties": {}
    },
    "output_schema": {
      "properties": {}
    },
    "feature_extractor": {
      "feature_extractor_name": "<string>",
      "version": "<string>",
      "feature_extractor_id": "<string>",
      "params": {},
      "parameters": {
        "chunk_overlap": 0,
        "chunk_size": 1000,
        "description": "E-commerce product search (no chunking)",
        "expected_behavior": "Find semantically similar products even if they use different wording",
        "input_example": "wireless bluetooth headphones with noise cancelling",
        "split_by": "none",
        "use_case": "Search 1M products by natural language descriptions",
        "split_method": "time",
        "time_split_interval": 10
      },
      "input_mappings": {
        "image": "product_image",
        "text": "title"
      },
      "field_passthrough": [
        {
          "source_path": "<string>",
          "target_path": "title",
          "default": "Unknown",
          "required": false
        }
      ],
      "include_all_source_fields": false
    },
    "source": {
      "type": "bucket",
      "bucket_ids": [
        "bkt_marketing_videos"
      ],
      "collection_id": "col_video_frames",
      "collection_ids": [
        "col_us_products",
        "col_eu_products"
      ],
      "inherited_bucket_ids": [
        "bkt_marketing_videos"
      ],
      "source_filters": {
        "description": "Filter only video content",
        "filters": {
          "AND": [
            {
              "field": "blobs.type",
              "operator": "eq",
              "value": "video"
            }
          ]
        }
      }
    },
    "collection_id": "<string>",
    "description": "Video frames extracted at 1 FPS with CLIP embeddings",
    "source_bucket_schemas": null,
    "source_lineage": [
      {
        "source_config": {
          "type": "bucket",
          "bucket_ids": [
            "bkt_marketing_videos"
          ],
          "collection_id": "col_video_frames",
          "collection_ids": [
            "col_us_products",
            "col_eu_products"
          ],
          "inherited_bucket_ids": [
            "bkt_marketing_videos"
          ],
          "source_filters": {
            "description": "Filter only video content",
            "filters": {
              "AND": [
                {
                  "field": "blobs.type",
                  "operator": "eq",
                  "value": "video"
                }
              ]
            }
          }
        },
        "feature_extractor": {
          "feature_extractor_name": "<string>",
          "version": "<string>",
          "feature_extractor_id": "<string>",
          "params": {},
          "parameters": {
            "chunk_overlap": 0,
            "chunk_size": 1000,
            "description": "E-commerce product search (no chunking)",
            "expected_behavior": "Find semantically similar products even if they use different wording",
            "input_example": "wireless bluetooth headphones with noise cancelling",
            "split_by": "none",
            "use_case": "Search 1M products by natural language descriptions",
            "split_method": "time",
            "time_split_interval": 10
          },
          "input_mappings": {
            "image": "product_image",
            "text": "title"
          },
          "field_passthrough": [
            {
              "source_path": "<string>",
              "target_path": "title",
              "default": "Unknown",
              "required": false
            }
          ],
          "include_all_source_fields": false
        },
        "output_schema": {
          "properties": {}
        }
      }
    ],
    "vector_indexes": [
      "<unknown>"
    ],
    "payload_indexes": [
      "<unknown>"
    ],
    "enabled": true,
    "metadata": {
      "environment": "production",
      "project": "Q4_campaign",
      "team": "data-science"
    },
    "created_at": "2023-11-07T05:31:56Z",
    "updated_at": "2023-11-07T05:31:56Z",
    "document_count": 0,
    "schema_version": 1,
    "last_schema_sync": "2023-11-07T05:31:56Z",
    "schema_sync_enabled": true,
    "taxonomy_applications": null,
    "cluster_applications": null
  },
  "source_collection_id": "<string>"
}

Headers

Authorization
string

REQUIRED: Bearer token authentication using your API key. Format: 'Bearer sk_xxxxxxxxxxxxx'. You can create API keys in the Mixpeek dashboard under Organization Settings.

Examples:

"Bearer YOUR_API_KEY"

"Bearer YOUR_STRIPE_API_KEY"

X-Namespace
string

REQUIRED: Namespace identifier for scoping this request. All resources (collections, buckets, taxonomies, etc.) are scoped to a namespace. You can provide either the namespace name or namespace ID. Format: ns_xxxxxxxxxxxxx (ID) or a custom name like 'my-namespace'

Examples:

"ns_abc123def456"

"production"

"my-namespace"

Path Parameters

collection_identifier
string
required

Source collection ID or name to clone.

Body

application/json

Request to clone a collection with optional modifications.

Purpose: Cloning creates a NEW collection (with new ID) based on an existing one, allowing you to make changes that aren't allowed via PATCH (source, feature_extractor, field_passthrough). This is the recommended way to iterate on collection designs.

Clone vs Template vs Version:

  • Clone: Copy THIS collection and modify it (for iteration/fixes)
  • Template: Create collection from a reusable pattern (for new projects)
  • Version: (Not implemented) - Use clone instead

Use Cases:

  • Change feature extractor configuration without breaking production
  • Modify field_passthrough to include/exclude fields
  • Switch to different source (bucket or collection)
  • Test modifications before replacing production collection
  • Create variants (e.g., different embedding models)

All fields are OPTIONAL:

  • Omit a field to keep the original value
  • Provide a field to override the original value
  • collection_name is REQUIRED (clones must have unique names)
collection_name
string
required

REQUIRED. Name for the cloned collection. Must be unique and different from the source collection.

Minimum string length: 1
Examples:

"product_embeddings_v2"

"video_frames_clip_v2"

description
string | null

OPTIONAL. Description override. If omitted, copies from source collection.

Example:

"Cloned from product_embeddings with CLIP v2"

source
SourceConfig · object

OPTIONAL. Override source configuration. If omitted, copies from source collection. Allows switching between buckets or collections.

Example:
{
"bucket_ids": ["bkt_marketing_videos"],
"description": "Single bucket source",
"type": "bucket"
}
feature_extractor
FeatureExtractorConfig · object

OPTIONAL. Override feature extractor configuration. If omitted, copies from source collection. This is where you'd change models, parameters, or field_passthrough.

Example:
{
"description": "Text extractor with field passthrough",
"feature_extractor_name": "text_extractor",
"field_passthrough": [
{ "required": true, "source_path": "title" },
{ "source_path": "author" }
],
"input_mappings": { "text": "content" },
"parameters": { "model": "text-embedding-3-small" },
"version": "v1"
}
enabled
boolean | null

OPTIONAL. Override enabled status. If omitted, copies from source collection.

metadata
Metadata · object

OPTIONAL. Override metadata. If omitted, copies from source collection.

taxonomy_applications
TaxonomyApplicationConfig · object[] | null

OPTIONAL. Override taxonomy applications. If omitted, copies from source collection.

Response

Successful Response

Response after cloning a collection.

collection
CollectionModel · object
required

Cloned collection configuration with new collection_id.

Examples:
{
"collection_id": "col_a1b2c3d4e5",
"collection_name": "article_embeddings",
"description": "Simple text collection: News articles with text embeddings from bucket source",
"enabled": true,
"feature_extractor": {
"feature_extractor_name": "text_extractor",
"field_passthrough": [{ "source_path": "title" }],
"input_mappings": { "text": "content" },
"version": "v1"
},
"input_schema": {
"properties": {
"title": { "type": "string" },
"content": { "type": "text" }
}
},
"output_schema": {
"properties": {
"title": { "type": "string" },
"text_extractor_v1_embedding": { "type": "array" }
}
},
"source": {
"bucket_id": "bkt_articles",
"type": "bucket"
}
}
{
"collection_id": "col_xyz789abc",
"collection_name": "video_frames",
"description": "Video frames: Extracted at 1 FPS with CLIP embeddings and campaign_id passthrough",
"enabled": true,
"feature_extractor": {
"feature_extractor_name": "multimodal_extractor",
"field_passthrough": [{ "source_path": "campaign_id" }],
"input_mappings": { "video": "video" },
"parameters": { "fps": 1 },
"version": "v1"
},
"input_schema": {
"properties": {
"video": { "type": "video" },
"campaign_id": { "type": "string" }
}
},
"output_schema": {
"properties": {
"campaign_id": { "type": "string" },
"multimodal_extractor_v1_embedding": { "type": "array" }
}
},
"source": {
"bucket_id": "bkt_marketing_videos",
"type": "bucket"
}
}
{
"collection_id": "col_scenes_def",
"collection_name": "detected_scenes",
"description": "Tier 2 decomposition: Scenes detected from frame embeddings (collection source)",
"enabled": true,
"feature_extractor": {
"feature_extractor_name": "scene_extractor",
"field_passthrough": [{ "source_path": "campaign_id" }],
"input_mappings": {
"embedding": "multimodal_extractor_v1_embedding"
},
"version": "v1"
},
"input_schema": {
"properties": {
"campaign_id": { "type": "string" },
"multimodal_extractor_v1_embedding": { "type": "array" }
}
},
"output_schema": {
"properties": {
"campaign_id": { "type": "string" },
"scene_extractor_v1_embedding": { "type": "array" }
}
},
"source": {
"collection_id": "col_video_frames",
"type": "collection"
}
}
source_collection_id
string
required

ID of the source collection that was cloned.