{"title":"Import Hijacking via sys.modules Poisoning","language":"Python","severity":"Critical","cwe":"CWE-494","source_lines":[3,16],"flow_lines":[3,5,6,7,8,9,16,17,3,5,6,7,8,9],"sink_lines":[8,9],"vulnerable_code":"import sys\nimport importlib\n\ndef load_cloud_provider_sdk(provider_name, custom_module_path=None):\n    sdk_module = f\"cloud_{provider_name}_sdk\"\n    if custom_module_path:\n        spec = importlib.util.spec_from_file_location(sdk_module, custom_module_path)\n        module = importlib.util.module_from_spec(spec)\n        sys.modules[sdk_module] = module\n        spec.loader.exec_module(module)\n    else:\n        module = importlib.import_module(sdk_module)\n    return module.get_credentials()\n\ndef deploy_infrastructure(cloud_config):\n    provider_sdk = load_cloud_provider_sdk(cloud_config['provider'], cloud_config.get('sdk_override'))\n    return provider_sdk.deploy(cloud_config['resources'])","explanation":"The function accepts untrusted `custom_module_path` and `provider_name` parameters which flow directly into `spec_from_file_location()` and `sys.modules` dictionary poisoning. An attacker can supply arbitrary file paths to load malicious modules or poison the module cache with attacker-controlled code that will be imported by subsequent legitimate imports.","remediation":"The fix applies multiple layers of defense: (1) provider names are validated against a strict allowlist of known providers with regex format checking to prevent path traversal, (2) custom module paths are resolved to their real absolute path and validated against an allowlist of approved directories to prevent arbitrary file loading, and (3) the loaded module is no longer injected into sys.modules to prevent cache poisoning that could affect other parts of the application.","secure_code":"import sys\nimport importlib\nimport os\nimport re\n\n# Allowlist of valid cloud providers\nALLOWED_PROVIDERS = frozenset(['aws', 'azure', 'gcp'])\n\n# Allowlist of permitted SDK override directories (e.g., for testing)\nALLOWED_SDK_DIRECTORIES = [\n    '/opt/cloud-platform/approved-sdks',\n    '/usr/lib/cloud-sdks'\n]\n\ndef _validate_provider_name(provider_name):\n    \"\"\"Validate that provider name is in the allowlist and contains only safe characters.\"\"\"\n    if not provider_name:\n        raise ValueError(\"Provider name cannot be empty\")\n    if not re.match(r'^[a-z]+$', provider_name):\n        raise ValueError(f\"Invalid provider name format: {provider_name}\")\n    if provider_name not in ALLOWED_PROVIDERS:\n        raise ValueError(f\"Unsupported cloud provider: {provider_name}. Allowed: {', '.join(ALLOWED_PROVIDERS)}\")\n    return provider_name\n\ndef _validate_custom_module_path(custom_module_path):\n    \"\"\"Validate that custom module path is within allowed directories and is a real .py file.\"\"\"\n    if not custom_module_path:\n        return None\n    \n    # Resolve to absolute real path to prevent path traversal\n    real_path = os.path.realpath(os.path.abspath(custom_module_path))\n    \n    # Ensure the file has a .py extension\n    if not real_path.endswith('.py'):\n        raise ValueError(\"Custom SDK path must point to a .py file\")\n    \n    # Ensure the resolved path is within an allowed directory\n    path_allowed = False\n    for allowed_dir in ALLOWED_SDK_DIRECTORIES:\n        allowed_real = os.path.realpath(os.path.abspath(allowed_dir))\n        if real_path.startswith(allowed_real + os.sep):\n            path_allowed = True\n            break\n    \n    if not path_allowed:\n        raise ValueError(\n            f\"Custom SDK path '{real_path}' is not within allowed directories. \"\n            f\"Allowed: {ALLOWED_SDK_DIRECTORIES}\"\n        )\n    \n    # Ensure the file actually exists\n    if not os.path.isfile(real_path):\n        raise FileNotFoundError(f\"Custom SDK file not found: {real_path}\")\n    \n    return real_path\n\ndef load_cloud_provider_sdk(provider_name, custom_module_path=None):\n    \"\"\"Load a cloud provider SDK with strict validation.\"\"\"\n    # Validate provider name against allowlist\n    validated_provider = _validate_provider_name(provider_name)\n    sdk_module = f\"cloud_{validated_provider}_sdk\"\n    \n    if custom_module_path:\n        # Validate the custom module path\n        validated_path = _validate_custom_module_path(custom_module_path)\n        \n        spec = importlib.util.spec_from_file_location(sdk_module, validated_path)\n        if spec is None or spec.loader is None:\n            raise ImportError(f\"Cannot create module spec from path: {validated_path}\")\n        module = importlib.util.module_from_spec(spec)\n        # Do NOT poison sys.modules globally; use the module locally only\n        spec.loader.exec_module(module)\n    else:\n        module = importlib.import_module(sdk_module)\n    \n    return module.get_credentials()\n\ndef deploy_infrastructure(cloud_config):\n    \"\"\"Deploy infrastructure with validated cloud configuration.\"\"\"\n    if not isinstance(cloud_config, dict):\n        raise TypeError(\"cloud_config must be a dictionary\")\n    if 'provider' not in cloud_config:\n        raise ValueError(\"cloud_config must include 'provider' key\")\n    if 'resources' not in cloud_config:\n        raise ValueError(\"cloud_config must include 'resources' key\")\n    \n    provider_sdk = load_cloud_provider_sdk(\n        cloud_config['provider'],\n        cloud_config.get('sdk_override')\n    )\n    return provider_sdk.deploy(cloud_config['resources'])"}