# Import Hijacking via sys.modules Poisoning

Language: Python
Severity: Critical
CWE: CWE-494

## Source
3, 16

## Flow
3-5-6-7-8-9, 16-17-3-5-6-7-8-9

## Sink
8, 9

## Vulnerable Code
```python
import sys
import importlib

def load_cloud_provider_sdk(provider_name, custom_module_path=None):
    sdk_module = f"cloud_{provider_name}_sdk"
    if custom_module_path:
        spec = importlib.util.spec_from_file_location(sdk_module, custom_module_path)
        module = importlib.util.module_from_spec(spec)
        sys.modules[sdk_module] = module
        spec.loader.exec_module(module)
    else:
        module = importlib.import_module(sdk_module)
    return module.get_credentials()

def deploy_infrastructure(cloud_config):
    provider_sdk = load_cloud_provider_sdk(cloud_config['provider'], cloud_config.get('sdk_override'))
    return provider_sdk.deploy(cloud_config['resources'])
```

## Explanation

The function accepts untrusted `custom_module_path` and `provider_name` parameters which flow directly into `spec_from_file_location()` and `sys.modules` dictionary poisoning. An attacker can supply arbitrary file paths to load malicious modules or poison the module cache with attacker-controlled code that will be imported by subsequent legitimate imports.

## Remediation

The fix applies multiple layers of defense: (1) provider names are validated against a strict allowlist of known providers with regex format checking to prevent path traversal, (2) custom module paths are resolved to their real absolute path and validated against an allowlist of approved directories to prevent arbitrary file loading, and (3) the loaded module is no longer injected into sys.modules to prevent cache poisoning that could affect other parts of the application.

## Secure Code
```python
import sys
import importlib
import os
import re

# Allowlist of valid cloud providers
ALLOWED_PROVIDERS = frozenset(['aws', 'azure', 'gcp'])

# Allowlist of permitted SDK override directories (e.g., for testing)
ALLOWED_SDK_DIRECTORIES = [
    '/opt/cloud-platform/approved-sdks',
    '/usr/lib/cloud-sdks'
]

def _validate_provider_name(provider_name):
    """Validate that provider name is in the allowlist and contains only safe characters."""
    if not provider_name:
        raise ValueError("Provider name cannot be empty")
    if not re.match(r'^[a-z]+$', provider_name):
        raise ValueError(f"Invalid provider name format: {provider_name}")
    if provider_name not in ALLOWED_PROVIDERS:
        raise ValueError(f"Unsupported cloud provider: {provider_name}. Allowed: {', '.join(ALLOWED_PROVIDERS)}")
    return provider_name

def _validate_custom_module_path(custom_module_path):
    """Validate that custom module path is within allowed directories and is a real .py file."""
    if not custom_module_path:
        return None
    
    # Resolve to absolute real path to prevent path traversal
    real_path = os.path.realpath(os.path.abspath(custom_module_path))
    
    # Ensure the file has a .py extension
    if not real_path.endswith('.py'):
        raise ValueError("Custom SDK path must point to a .py file")
    
    # Ensure the resolved path is within an allowed directory
    path_allowed = False
    for allowed_dir in ALLOWED_SDK_DIRECTORIES:
        allowed_real = os.path.realpath(os.path.abspath(allowed_dir))
        if real_path.startswith(allowed_real + os.sep):
            path_allowed = True
            break
    
    if not path_allowed:
        raise ValueError(
            f"Custom SDK path '{real_path}' is not within allowed directories. "
            f"Allowed: {ALLOWED_SDK_DIRECTORIES}"
        )
    
    # Ensure the file actually exists
    if not os.path.isfile(real_path):
        raise FileNotFoundError(f"Custom SDK file not found: {real_path}")
    
    return real_path

def load_cloud_provider_sdk(provider_name, custom_module_path=None):
    """Load a cloud provider SDK with strict validation."""
    # Validate provider name against allowlist
    validated_provider = _validate_provider_name(provider_name)
    sdk_module = f"cloud_{validated_provider}_sdk"
    
    if custom_module_path:
        # Validate the custom module path
        validated_path = _validate_custom_module_path(custom_module_path)
        
        spec = importlib.util.spec_from_file_location(sdk_module, validated_path)
        if spec is None or spec.loader is None:
            raise ImportError(f"Cannot create module spec from path: {validated_path}")
        module = importlib.util.module_from_spec(spec)
        # Do NOT poison sys.modules globally; use the module locally only
        spec.loader.exec_module(module)
    else:
        module = importlib.import_module(sdk_module)
    
    return module.get_credentials()

def deploy_infrastructure(cloud_config):
    """Deploy infrastructure with validated cloud configuration."""
    if not isinstance(cloud_config, dict):
        raise TypeError("cloud_config must be a dictionary")
    if 'provider' not in cloud_config:
        raise ValueError("cloud_config must include 'provider' key")
    if 'resources' not in cloud_config:
        raise ValueError("cloud_config must include 'resources' key")
    
    provider_sdk = load_cloud_provider_sdk(
        cloud_config['provider'],
        cloud_config.get('sdk_override')
    )
    return provider_sdk.deploy(cloud_config['resources'])
```
