Skip to content

How to Validate Complex Data Structures

This guide shows you how to validate complex, nested data structures with pyvider.cty.

๐Ÿค– AI-Generated Content

This documentation was generated with AI assistance and is still being audited. Some, or potentially a lot, of this information may be inaccurate. Learn more.

Basic Validation Pattern

The fundamental pattern for validation is:

  1. Define a type schema
  2. Call .validate() with raw Python data
  3. Handle validation errors
  4. Work with the validated CtyValue
from pyvider.cty import CtyObject, CtyString, CtyNumber

# 1. Define schema
user_type = CtyObject(
    attribute_types={
        "name": CtyString(),
        "age": CtyNumber()
    }
)

# 2. Validate
try:
    user_value = user_type.validate({"name": "Alice", "age": 30})
    # 3. Success - work with the value
    print(f"Valid user: {user_value['name'].raw_value}")
except Exception as e:
    # 3. Handle errors
    print(f"Validation failed: {e}")

Validating Nested Structures

For complex nested data, build your schema from the inside out:

from pyvider.cty import CtyObject, CtyString, CtyNumber, CtyList, CtyBool

# Define nested types
address_type = CtyObject(
    attribute_types={
        "street": CtyString(),
        "city": CtyString(),
        "zip": CtyString()
    }
)

contact_type = CtyObject(
    attribute_types={
        "email": CtyString(),
        "phone": CtyString()
    }
)

# Use nested types in main schema
person_type = CtyObject(
    attribute_types={
        "name": CtyString(),
        "age": CtyNumber(),
        "address": address_type,
        "contact": contact_type,
        "active": CtyBool()
    }
)

# Validate nested data
person_data = {
    "name": "Bob",
    "age": 25,
    "address": {
        "street": "123 Main St",
        "city": "Boston",
        "zip": "02101"
    },
    "contact": {
        "email": "[email protected]",
        "phone": "555-0100"
    },
    "active": True
}

person_value = person_type.validate(person_data)

Optional Fields

Mark fields as optional when they may be missing:

user_type = CtyObject(
    attribute_types={
        "name": CtyString(),
        "email": CtyString(),
        "phone": CtyString(),
        "bio": CtyString()
    },
    optional_attributes={"phone", "bio"}  # These can be omitted
)

# Valid: missing optional fields
minimal_user = user_type.validate({
    "name": "Alice",
    "email": "[email protected]"
})

# Optional fields are null
print(minimal_user['phone'].is_null)  # True
print(minimal_user['bio'].is_null)    # True

Validating Collections

Lists with Consistent Types

from pyvider.cty import CtyList, CtyString

# All elements must be strings
tags_type = CtyList(element_type=CtyString())

tags_value = tags_type.validate(["python", "types", "validation"])

# Access elements
for tag in tags_value:
    print(tag.raw_value)

Lists of Objects

# List of user objects
users_type = CtyList(
    element_type=CtyObject(
        attribute_types={
            "name": CtyString(),
            "email": CtyString()
        }
    )
)

users_data = [
    {"name": "Alice", "email": "[email protected]"},
    {"name": "Bob", "email": "[email protected]"}
]

users_value = users_type.validate(users_data)

# Access nested data
for user in users_value:
    print(f"{user['name'].raw_value}: {user['email'].raw_value}")

Maps

from pyvider.cty import CtyMap, CtyNumber

# Map of string keys to number values
scores_type = CtyMap(element_type=CtyNumber())

scores_value = scores_type.validate({
    "alice": 95,
    "bob": 87,
    "charlie": 92
})

# Access map values
print(scores_value["alice"].raw_value)  # 95

Handling Validation Errors

Validation errors provide detailed information about what went wrong:

from pyvider.cty.exceptions import CtyValidationError

try:
    user_value = user_type.validate({
        "name": "Alice",
        "age": "thirty"  # Wrong type!
    })
except CtyValidationError as e:
    print(f"Validation error: {e}")
    # Output: Attribute 'age': expected CtyNumber, got str

Validate Multiple Items

def validate_many(data_list, schema):
    """Validate multiple items and collect errors."""
    results = []
    errors = []

    for i, data in enumerate(data_list):
        try:
            value = schema.validate(data)
            results.append(value)
        except CtyValidationError as e:
            errors.append((i, str(e)))

    return results, errors

# Use it
users_data = [
    {"name": "Alice", "age": 30},
    {"name": "Bob", "age": "invalid"},  # Error
    {"name": "Charlie", "age": 25}
]

valid_users, validation_errors = validate_many(users_data, user_type)

print(f"Valid: {len(valid_users)}, Errors: {len(validation_errors)}")
for index, error in validation_errors:
    print(f"Item {index}: {error}")

Dynamic Validation

Use CtyDynamic when you don't know the type ahead of time:

from pyvider.cty import CtyDynamic, CtyObject

flexible_type = CtyObject(
    attribute_types={
        "name": CtyString(),
        "data": CtyDynamic()  # Can be any type
    }
})

# The 'data' field can hold any type
example1 = flexible_type.validate({
    "name": "Example 1",
    "data": "some text"
})

example2 = flexible_type.validate({
    "name": "Example 2",
    "data": {"nested": "object"}
})

example3 = flexible_type.validate({
    "name": "Example 3",
    "data": [1, 2, 3]
})

Validating Against Multiple Schemas

Sometimes you need to try different schemas:

def try_validate(data, schemas):
    """Try validating against multiple schemas."""
    for i, schema in enumerate(schemas):
        try:
            return schema.validate(data)
        except CtyValidationError:
            continue
    raise ValueError("Data doesn't match any schema")

# Define alternative schemas
schema_v1 = CtyObject(
    attribute_types={"name": CtyString(), "value": CtyNumber()}
)
schema_v2 = CtyObject(
    attribute_types={"name": CtyString(), "count": CtyNumber()}
)

schemas = [schema_v1, schema_v2]

# Try validation
data = {"name": "Test", "count": 42}
result = try_validate(data, schemas)  # Matches schema_v2

Best Practices

  1. Build schemas incrementally: Start with simple types and compose them
  2. Use descriptive variable names: user_type, address_type are clear
  3. Validate early: Validate at system boundaries (API inputs, file loads)
  4. Handle errors gracefully: Provide clear error messages to users
  5. Reuse schemas: Define common schemas once and import them
  6. Document optional fields: Make it clear which fields can be omitted

Common Patterns

Configuration Validation

config_type = CtyObject(
    attribute_types={
        "api_key": CtyString(),
        "api_url": CtyString(),
        "timeout": CtyNumber(),
        "retries": CtyNumber(),
        "debug": CtyBool()
    },
    optional_attributes={"timeout", "retries", "debug"}
)

def load_config(config_dict):
    """Load and validate configuration."""
    try:
        return config_type.validate(config_dict)
    except CtyValidationError as e:
        raise ValueError(f"Invalid configuration: {e}")

API Response Validation

api_response_type = CtyObject(
    attribute_types={
        "status": CtyString(),
        "data": CtyDynamic(),
        "error": CtyString()
    },
    optional_attributes={"error"}
)

def validate_api_response(response_data):
    """Validate API response structure."""
    response = api_response_type.validate(response_data)

    if response['status'].raw_value == "error":
        error_msg = response['error'].raw_value
        raise Exception(f"API error: {error_msg}")

    return response['data']

Next Steps