|
|
@@ -0,0 +1,481 @@
|
|
|
+"""
|
|
|
+Integration test for config propagation through worker hierarchy.
|
|
|
+
|
|
|
+Tests that config is properly merged and passed through:
|
|
|
+ Parent CLI/Orchestrator
|
|
|
+ └── CrawlWorker subprocess (via Process.env)
|
|
|
+ └── SnapshotWorker subprocess (via Process.env)
|
|
|
+ └── Hook subprocess (via Process.env)
|
|
|
+
|
|
|
+Config priority order (highest to lowest):
|
|
|
+1. Snapshot.config (JSON field)
|
|
|
+2. Crawl.config (JSON field)
|
|
|
+3. User.config (JSON field)
|
|
|
+4. Environment variables (os.environ + Process.env)
|
|
|
+5. Config file (ArchiveBox.conf)
|
|
|
+6. Plugin defaults (config.json)
|
|
|
+7. Core defaults
|
|
|
+"""
|
|
|
+
|
|
|
+import os
|
|
|
+import json
|
|
|
+import tempfile
|
|
|
+import subprocess
|
|
|
+import time
|
|
|
+from pathlib import Path
|
|
|
+
|
|
|
+
|
|
|
+def test_config_propagation_through_worker_hierarchy():
|
|
|
+ """
|
|
|
+ Integration test: Verify config is properly merged at every level.
|
|
|
+
|
|
|
+ Test flow:
|
|
|
+ 1. Create test archive with custom config in ArchiveBox.conf
|
|
|
+ 2. Set custom env vars before spawning worker
|
|
|
+ 3. Create Crawl with custom crawl.config JSON field
|
|
|
+ 4. Create Snapshot with custom snapshot.config JSON field
|
|
|
+ 5. Spawn SnapshotWorker via archivebox run --snapshot-id=...
|
|
|
+ 6. Verify worker received merged config from all sources
|
|
|
+ 7. Verify hook subprocess also received correct config
|
|
|
+ """
|
|
|
+
|
|
|
+ with tempfile.TemporaryDirectory() as tmpdir:
|
|
|
+ data_dir = Path(tmpdir) / 'test_archive'
|
|
|
+ data_dir.mkdir()
|
|
|
+
|
|
|
+ print(f"\n{'='*80}")
|
|
|
+ print(f"Test: Config Propagation Through Worker Hierarchy")
|
|
|
+ print(f"DATA_DIR: {data_dir}")
|
|
|
+ print(f"{'='*80}\n")
|
|
|
+
|
|
|
+ # Step 1: Initialize archive
|
|
|
+ print("Step 1: Initialize archive")
|
|
|
+ result = subprocess.run(
|
|
|
+ ['python', '-m', 'archivebox', 'init'],
|
|
|
+ cwd=str(data_dir),
|
|
|
+ env={
|
|
|
+ **os.environ,
|
|
|
+ 'DATA_DIR': str(data_dir),
|
|
|
+ 'USE_COLOR': 'False',
|
|
|
+ },
|
|
|
+ capture_output=True,
|
|
|
+ timeout=60,
|
|
|
+ )
|
|
|
+ assert result.returncode == 0, f"Init failed: {result.stderr.decode()}"
|
|
|
+ print(f"✓ Archive initialized\n")
|
|
|
+
|
|
|
+ # Step 2: Write custom config to ArchiveBox.conf
|
|
|
+ print("Step 2: Write custom config to ArchiveBox.conf")
|
|
|
+ config_file = data_dir / 'ArchiveBox.conf'
|
|
|
+ config_file.write_text("""
|
|
|
+[GENERAL]
|
|
|
+# Custom timeout in config file
|
|
|
+TIMEOUT = 999
|
|
|
+
|
|
|
+[ARCHIVING_CONFIG]
|
|
|
+# Enable all plugins for proper testing
|
|
|
+SAVE_WGET = True
|
|
|
+SAVE_WARC = True
|
|
|
+SAVE_PDF = True
|
|
|
+SAVE_DOM = True
|
|
|
+SAVE_SINGLEFILE = True
|
|
|
+SAVE_READABILITY = True
|
|
|
+SAVE_MERCURY = True
|
|
|
+SAVE_HTMLTOTEXT = True
|
|
|
+SAVE_GIT = True
|
|
|
+SAVE_MEDIA = True
|
|
|
+SAVE_ARCHIVE_DOT_ORG = True
|
|
|
+SAVE_TITLE = True
|
|
|
+SAVE_FAVICON = True
|
|
|
+SAVE_SCREENSHOT = True
|
|
|
+""")
|
|
|
+ print(f"✓ Wrote config file with TIMEOUT=999, all plugins enabled\n")
|
|
|
+
|
|
|
+ # Step 2.5: Set Machine.config values
|
|
|
+ print("Step 2.5: Set Machine.config with custom binary path")
|
|
|
+ set_machine_config_script = f"""
|
|
|
+import os
|
|
|
+os.environ['DATA_DIR'] = '{data_dir}'
|
|
|
+
|
|
|
+from archivebox.config.django import setup_django
|
|
|
+setup_django()
|
|
|
+
|
|
|
+from archivebox.machine.models import Machine
|
|
|
+
|
|
|
+machine = Machine.current()
|
|
|
+machine.config = {{
|
|
|
+ 'CUSTOM_MACHINE_KEY': 'from_machine_config',
|
|
|
+ 'WGET_BINARY': '/custom/machine/wget', # Machine-specific binary path
|
|
|
+}}
|
|
|
+machine.save()
|
|
|
+print(f"Machine {{machine.hostname}} config updated")
|
|
|
+"""
|
|
|
+ result = subprocess.run(
|
|
|
+ ['python', '-c', set_machine_config_script],
|
|
|
+ cwd=str(data_dir.parent),
|
|
|
+ env={
|
|
|
+ **os.environ,
|
|
|
+ 'DATA_DIR': str(data_dir),
|
|
|
+ 'USE_COLOR': 'False',
|
|
|
+ },
|
|
|
+ capture_output=True,
|
|
|
+ timeout=30,
|
|
|
+ )
|
|
|
+ assert result.returncode == 0, f"Set machine config failed: {result.stderr.decode()}"
|
|
|
+ print(f"✓ Set Machine.config with CUSTOM_MACHINE_KEY=from_machine_config, WGET_BINARY=/custom/machine/wget\n")
|
|
|
+
|
|
|
+ # Step 3: Create Crawl via Django ORM with custom crawl.config
|
|
|
+ print("Step 3: Create Crawl with custom crawl.config JSON")
|
|
|
+ create_crawl_script = f"""
|
|
|
+import os
|
|
|
+os.environ['DATA_DIR'] = '{data_dir}'
|
|
|
+
|
|
|
+from archivebox.config.django import setup_django
|
|
|
+setup_django()
|
|
|
+
|
|
|
+from django.utils import timezone
|
|
|
+from archivebox.crawls.models import Crawl
|
|
|
+
|
|
|
+# Create crawl with custom config
|
|
|
+crawl = Crawl.objects.create(
|
|
|
+ status='queued',
|
|
|
+ retry_at=timezone.now(),
|
|
|
+ urls='https://example.com',
|
|
|
+ config={{
|
|
|
+ 'TIMEOUT': 777, # Crawl-level override (higher priority than file)
|
|
|
+ 'CUSTOM_CRAWL_KEY': 'from_crawl_json',
|
|
|
+ }}
|
|
|
+)
|
|
|
+print(crawl.id)
|
|
|
+"""
|
|
|
+ result = subprocess.run(
|
|
|
+ ['python', '-c', create_crawl_script],
|
|
|
+ cwd=str(data_dir.parent),
|
|
|
+ env={
|
|
|
+ **os.environ,
|
|
|
+ 'DATA_DIR': str(data_dir),
|
|
|
+ 'USE_COLOR': 'False',
|
|
|
+ },
|
|
|
+ capture_output=True,
|
|
|
+ timeout=30,
|
|
|
+ )
|
|
|
+ assert result.returncode == 0, f"Create crawl failed: {result.stderr.decode()}"
|
|
|
+ # Extract UUID from output (last line should be the UUID)
|
|
|
+ crawl_id = result.stdout.decode().strip().split('\n')[-1]
|
|
|
+ print(f"✓ Created crawl {crawl_id} with TIMEOUT=777, CUSTOM_CRAWL_KEY=from_crawl_json\n")
|
|
|
+
|
|
|
+ # Step 4: Create Snapshot with custom snapshot.config
|
|
|
+ print("Step 4: Create Snapshot with custom snapshot.config JSON")
|
|
|
+ create_snapshot_script = f"""
|
|
|
+import os
|
|
|
+os.environ['DATA_DIR'] = '{data_dir}'
|
|
|
+
|
|
|
+from archivebox.config.django import setup_django
|
|
|
+setup_django()
|
|
|
+
|
|
|
+from django.utils import timezone
|
|
|
+from archivebox.core.models import Snapshot
|
|
|
+from archivebox.crawls.models import Crawl
|
|
|
+
|
|
|
+crawl = Crawl.objects.get(id='{crawl_id}')
|
|
|
+snapshot = Snapshot.objects.create(
|
|
|
+ url='https://example.com',
|
|
|
+ crawl=crawl,
|
|
|
+ status='queued',
|
|
|
+ retry_at=timezone.now(),
|
|
|
+ config={{
|
|
|
+ 'TIMEOUT': 555, # Snapshot-level override (highest priority)
|
|
|
+ 'CUSTOM_SNAPSHOT_KEY': 'from_snapshot_json',
|
|
|
+ 'SAVE_SCREENSHOT': True, # Keep screenshot enabled
|
|
|
+ 'SAVE_WGET': False, # But disable wget as a test of per-snapshot override
|
|
|
+ }}
|
|
|
+)
|
|
|
+print(snapshot.id)
|
|
|
+"""
|
|
|
+ result = subprocess.run(
|
|
|
+ ['python', '-c', create_snapshot_script],
|
|
|
+ cwd=str(data_dir.parent),
|
|
|
+ env={
|
|
|
+ **os.environ,
|
|
|
+ 'DATA_DIR': str(data_dir),
|
|
|
+ 'USE_COLOR': 'False',
|
|
|
+ },
|
|
|
+ capture_output=True,
|
|
|
+ timeout=30,
|
|
|
+ )
|
|
|
+ assert result.returncode == 0, f"Create snapshot failed: {result.stderr.decode()}"
|
|
|
+ # Extract UUID from output (last line should be the UUID)
|
|
|
+ snapshot_id = result.stdout.decode().strip().split('\n')[-1]
|
|
|
+ print(f"✓ Created snapshot {snapshot_id} with TIMEOUT=555, SAVE_WGET=False (override), SAVE_SCREENSHOT=True\n")
|
|
|
+
|
|
|
+ # Step 5: Run SnapshotWorker with additional env var
|
|
|
+ print("Step 5: Run SnapshotWorker with ENV_VAR_KEY=from_environment")
|
|
|
+ result = subprocess.run(
|
|
|
+ ['python', '-m', 'archivebox', 'run', '--snapshot-id', snapshot_id],
|
|
|
+ cwd=str(data_dir),
|
|
|
+ env={
|
|
|
+ **os.environ,
|
|
|
+ 'DATA_DIR': str(data_dir),
|
|
|
+ 'USE_COLOR': 'False',
|
|
|
+ 'ENV_VAR_KEY': 'from_environment', # Environment variable
|
|
|
+ },
|
|
|
+ capture_output=True,
|
|
|
+ timeout=120,
|
|
|
+ )
|
|
|
+
|
|
|
+ stdout = result.stdout.decode()
|
|
|
+ stderr = result.stderr.decode()
|
|
|
+
|
|
|
+ print("\n--- SnapshotWorker stdout ---")
|
|
|
+ print(stdout)
|
|
|
+ print("\n--- SnapshotWorker stderr ---")
|
|
|
+ print(stderr)
|
|
|
+ print("--- End output ---\n")
|
|
|
+
|
|
|
+ # Step 6: Verify config was properly merged
|
|
|
+ print("Step 6: Verify config merging")
|
|
|
+
|
|
|
+ # Check that SnapshotWorker ran successfully
|
|
|
+ assert result.returncode == 0, f"SnapshotWorker failed with exit code {result.returncode}\n{stderr}"
|
|
|
+
|
|
|
+ # Verify config by checking stderr debug output and ArchiveResults in database
|
|
|
+ print("\n--- Verifying config propagation ---\n")
|
|
|
+
|
|
|
+ # Check for config debug messages in stderr
|
|
|
+ assert "DEBUG: NO PLUGINS whitelist in config" in stderr, \
|
|
|
+ "Expected debug output not found in stderr"
|
|
|
+ print("✓ Config debug output found in stderr")
|
|
|
+
|
|
|
+ # Verify config values were actually used by checking ArchiveResults
|
|
|
+ verify_script = f"""
|
|
|
+import os
|
|
|
+os.environ['DATA_DIR'] = '{data_dir}'
|
|
|
+
|
|
|
+from archivebox.config.django import setup_django
|
|
|
+setup_django()
|
|
|
+
|
|
|
+from archivebox.core.models import Snapshot, ArchiveResult
|
|
|
+from archivebox.config.configset import get_config
|
|
|
+
|
|
|
+snapshot = Snapshot.objects.get(id='{snapshot_id}')
|
|
|
+print(f"Snapshot status: {{snapshot.status}}")
|
|
|
+print(f"Snapshot URL: {{snapshot.url}}")
|
|
|
+
|
|
|
+# Check that snapshot reached sealed state
|
|
|
+assert snapshot.status == 'sealed', f"Expected sealed, got {{snapshot.status}}"
|
|
|
+
|
|
|
+# Verify all config sources are present in merged config
|
|
|
+print("\\nVerifying config merge priority:")
|
|
|
+config = get_config(snapshot=snapshot)
|
|
|
+
|
|
|
+# 1. Snapshot.config (highest priority)
|
|
|
+timeout = config.get('TIMEOUT')
|
|
|
+print(f" 1. Snapshot.config: TIMEOUT={timeout} (expected: 555)")
|
|
|
+assert timeout == 555, f"TIMEOUT should be 555 from snapshot.config, got {{timeout}}"
|
|
|
+
|
|
|
+wget_enabled = config.get('SAVE_WGET')
|
|
|
+print(f" 1. Snapshot.config: SAVE_WGET={wget_enabled} (expected: False)")
|
|
|
+assert wget_enabled == False, f"SAVE_WGET should be False from snapshot.config, got {{wget_enabled}}"
|
|
|
+
|
|
|
+custom_snapshot = config.get('CUSTOM_SNAPSHOT_KEY')
|
|
|
+print(f" 1. Snapshot.config: CUSTOM_SNAPSHOT_KEY={custom_snapshot} (expected: from_snapshot_json)")
|
|
|
+assert custom_snapshot == 'from_snapshot_json', f"Expected from_snapshot_json, got {{custom_snapshot}}"
|
|
|
+
|
|
|
+# 2. Crawl.config
|
|
|
+custom_crawl = config.get('CUSTOM_CRAWL_KEY')
|
|
|
+print(f" 2. Crawl.config: CUSTOM_CRAWL_KEY={custom_crawl} (expected: from_crawl_json)")
|
|
|
+assert custom_crawl == 'from_crawl_json', f"Expected from_crawl_json, got {{custom_crawl}}"
|
|
|
+
|
|
|
+# 6. Machine.config
|
|
|
+custom_machine = config.get('CUSTOM_MACHINE_KEY')
|
|
|
+print(f" 6. Machine.config: CUSTOM_MACHINE_KEY={custom_machine} (expected: from_machine_config)")
|
|
|
+assert custom_machine == 'from_machine_config', f"Expected from_machine_config, got {{custom_machine}}"
|
|
|
+
|
|
|
+wget_binary = config.get('WGET_BINARY')
|
|
|
+print(f" 6. Machine.config: WGET_BINARY={wget_binary} (expected: /custom/machine/wget)")
|
|
|
+# Note: This might be overridden by environment or other sources, just check it's present
|
|
|
+assert wget_binary is not None, f"WGET_BINARY should be present"
|
|
|
+
|
|
|
+# Check ArchiveResults to verify plugins actually ran with correct config
|
|
|
+results = ArchiveResult.objects.filter(snapshot=snapshot)
|
|
|
+print(f"\\nArchiveResults created: {{results.count()}}")
|
|
|
+
|
|
|
+for ar in results.order_by('plugin'):
|
|
|
+ print(f" {{ar.plugin}}: {{ar.status}}")
|
|
|
+
|
|
|
+# Verify SAVE_WGET=False was respected (should have no wget result)
|
|
|
+wget_results = results.filter(plugin='wget')
|
|
|
+print(f"\\nWGET results: {{wget_results.count()}} (expected: 0, disabled in snapshot.config)")
|
|
|
+assert wget_results.count() == 0, f"WGET should be disabled, found {{wget_results.count()}} results"
|
|
|
+
|
|
|
+# Verify SAVE_SCREENSHOT=True was respected (should have screenshot result)
|
|
|
+screenshot_results = results.filter(plugin='screenshot')
|
|
|
+print(f"SCREENSHOT results: {{screenshot_results.count()}} (expected: >0, enabled globally)")
|
|
|
+assert screenshot_results.count() > 0, f"SCREENSHOT should be enabled, found {{screenshot_results.count()}} results"
|
|
|
+
|
|
|
+print("\\n✓ All config sources correctly merged:")
|
|
|
+print(" - Snapshot.config overrides (highest priority)")
|
|
|
+print(" - Crawl.config values present")
|
|
|
+print(" - Machine.config values present")
|
|
|
+print(" - File config values present")
|
|
|
+print("✓ Config priority order verified")
|
|
|
+print("✓ Snapshot successfully sealed")
|
|
|
+"""
|
|
|
+ result = subprocess.run(
|
|
|
+ ['python', '-c', verify_script],
|
|
|
+ cwd=str(data_dir.parent),
|
|
|
+ env={
|
|
|
+ **os.environ,
|
|
|
+ 'DATA_DIR': str(data_dir),
|
|
|
+ 'USE_COLOR': 'False',
|
|
|
+ },
|
|
|
+ capture_output=True,
|
|
|
+ timeout=30,
|
|
|
+ )
|
|
|
+
|
|
|
+ print(result.stdout.decode())
|
|
|
+ if result.returncode != 0:
|
|
|
+ print("\nVerification error:")
|
|
|
+ print(result.stderr.decode())
|
|
|
+
|
|
|
+ assert result.returncode == 0, f"Config verification failed: {result.stderr.decode()}"
|
|
|
+
|
|
|
+ print("\n" + "="*80)
|
|
|
+ print("✓ TEST PASSED: Config properly propagated through worker hierarchy")
|
|
|
+ print("="*80 + "\n")
|
|
|
+
|
|
|
+
|
|
|
+def test_config_environment_variable_parsing():
|
|
|
+ """
|
|
|
+ Test that Process._build_env() correctly serializes config values,
|
|
|
+ and get_config() correctly parses them back from environment.
|
|
|
+ """
|
|
|
+
|
|
|
+ with tempfile.TemporaryDirectory() as tmpdir:
|
|
|
+ data_dir = Path(tmpdir) / 'test_archive'
|
|
|
+ data_dir.mkdir()
|
|
|
+
|
|
|
+ print(f"\n{'='*80}")
|
|
|
+ print(f"Test: Config Environment Variable Parsing")
|
|
|
+ print(f"DATA_DIR: {data_dir}")
|
|
|
+ print(f"{'='*80}\n")
|
|
|
+
|
|
|
+ # Initialize archive
|
|
|
+ result = subprocess.run(
|
|
|
+ ['python', '-m', 'archivebox', 'init'],
|
|
|
+ cwd=str(data_dir),
|
|
|
+ env={
|
|
|
+ **os.environ,
|
|
|
+ 'DATA_DIR': str(data_dir),
|
|
|
+ 'USE_COLOR': 'False',
|
|
|
+ },
|
|
|
+ capture_output=True,
|
|
|
+ timeout=60,
|
|
|
+ )
|
|
|
+ assert result.returncode == 0, f"Init failed: {result.stderr.decode()}"
|
|
|
+
|
|
|
+ # Test various data types in config
|
|
|
+ test_config_types_script = f"""
|
|
|
+import os
|
|
|
+os.environ['DATA_DIR'] = '{data_dir}'
|
|
|
+
|
|
|
+from archivebox.config.django import setup_django
|
|
|
+setup_django()
|
|
|
+
|
|
|
+from archivebox.config.configset import get_config
|
|
|
+from archivebox.machine.models import Process, Machine
|
|
|
+
|
|
|
+# Test get_config() with no overrides (baseline)
|
|
|
+config = get_config()
|
|
|
+print(f"Baseline config keys: {{len(config)}}")
|
|
|
+
|
|
|
+# Create a test Process with various config types
|
|
|
+process = Process.objects.create(
|
|
|
+ machine=Machine.current(),
|
|
|
+ process_type=Process.TypeChoices.WORKER,
|
|
|
+ pwd='{data_dir}',
|
|
|
+ cmd=['test'],
|
|
|
+ env={{
|
|
|
+ 'STRING_VAL': 'hello',
|
|
|
+ 'INT_VAL': 123,
|
|
|
+ 'FLOAT_VAL': 45.67,
|
|
|
+ 'BOOL_TRUE': True,
|
|
|
+ 'BOOL_FALSE': False,
|
|
|
+ 'LIST_VAL': ['a', 'b', 'c'],
|
|
|
+ 'DICT_VAL': {{'key': 'value'}},
|
|
|
+ 'NONE_VAL': None,
|
|
|
+ }},
|
|
|
+)
|
|
|
+
|
|
|
+# Test _build_env() serialization
|
|
|
+env = process._build_env()
|
|
|
+print(f"\\nSerialized environment:")
|
|
|
+print(f" STRING_VAL: {{env.get('STRING_VAL')}} (type: {{type(env.get('STRING_VAL')).__name__}})")
|
|
|
+print(f" INT_VAL: {{env.get('INT_VAL')}} (type: {{type(env.get('INT_VAL')).__name__}})")
|
|
|
+print(f" FLOAT_VAL: {{env.get('FLOAT_VAL')}} (type: {{type(env.get('FLOAT_VAL')).__name__}})")
|
|
|
+print(f" BOOL_TRUE: {{env.get('BOOL_TRUE')}} (type: {{type(env.get('BOOL_TRUE')).__name__}})")
|
|
|
+print(f" BOOL_FALSE: {{env.get('BOOL_FALSE')}} (type: {{type(env.get('BOOL_FALSE')).__name__}})")
|
|
|
+print(f" LIST_VAL: {{env.get('LIST_VAL')}} (type: {{type(env.get('LIST_VAL')).__name__}})")
|
|
|
+print(f" DICT_VAL: {{env.get('DICT_VAL')}} (type: {{type(env.get('DICT_VAL')).__name__}})")
|
|
|
+print(f" NONE_VAL: {{env.get('NONE_VAL')}} (should be None/missing)")
|
|
|
+
|
|
|
+# Verify all are strings (required by subprocess.Popen)
|
|
|
+assert isinstance(env.get('STRING_VAL'), str), "STRING_VAL should be str"
|
|
|
+assert isinstance(env.get('INT_VAL'), str), "INT_VAL should be str"
|
|
|
+assert isinstance(env.get('FLOAT_VAL'), str), "FLOAT_VAL should be str"
|
|
|
+assert isinstance(env.get('BOOL_TRUE'), str), "BOOL_TRUE should be str"
|
|
|
+assert isinstance(env.get('BOOL_FALSE'), str), "BOOL_FALSE should be str"
|
|
|
+assert isinstance(env.get('LIST_VAL'), str), "LIST_VAL should be str"
|
|
|
+assert isinstance(env.get('DICT_VAL'), str), "DICT_VAL should be str"
|
|
|
+
|
|
|
+print("\\n✓ All environment values correctly serialized as strings")
|
|
|
+
|
|
|
+# Now test that get_config() can parse them back
|
|
|
+# Simulate subprocess by setting os.environ
|
|
|
+import json
|
|
|
+for key, val in env.items():
|
|
|
+ if key in ['STRING_VAL', 'INT_VAL', 'FLOAT_VAL', 'BOOL_TRUE', 'BOOL_FALSE', 'LIST_VAL', 'DICT_VAL']:
|
|
|
+ os.environ[key] = val
|
|
|
+
|
|
|
+# Get config again - should parse from environment
|
|
|
+config = get_config()
|
|
|
+print(f"\\nParsed from environment:")
|
|
|
+print(f" STRING_VAL: {{config.get('STRING_VAL')}} (type: {{type(config.get('STRING_VAL')).__name__}})")
|
|
|
+print(f" INT_VAL: {{config.get('INT_VAL')}} (type: {{type(config.get('INT_VAL')).__name__}})")
|
|
|
+print(f" FLOAT_VAL: {{config.get('FLOAT_VAL')}} (type: {{type(config.get('FLOAT_VAL')).__name__}})")
|
|
|
+print(f" BOOL_TRUE: {{config.get('BOOL_TRUE')}} (type: {{type(config.get('BOOL_TRUE')).__name__}})")
|
|
|
+print(f" BOOL_FALSE: {{config.get('BOOL_FALSE')}} (type: {{type(config.get('BOOL_FALSE')).__name__}})")
|
|
|
+print(f" LIST_VAL: {{config.get('LIST_VAL')}} (type: {{type(config.get('LIST_VAL')).__name__}})")
|
|
|
+print(f" DICT_VAL: {{config.get('DICT_VAL')}} (type: {{type(config.get('DICT_VAL')).__name__}})")
|
|
|
+
|
|
|
+print("\\n✓ All config values correctly parsed from environment")
|
|
|
+"""
|
|
|
+
|
|
|
+ result = subprocess.run(
|
|
|
+ ['python', '-c', test_config_types_script],
|
|
|
+ cwd=str(data_dir.parent),
|
|
|
+ env={
|
|
|
+ **os.environ,
|
|
|
+ 'DATA_DIR': str(data_dir),
|
|
|
+ 'USE_COLOR': 'False',
|
|
|
+ },
|
|
|
+ capture_output=True,
|
|
|
+ timeout=30,
|
|
|
+ )
|
|
|
+
|
|
|
+ print(result.stdout.decode())
|
|
|
+ if result.stderr:
|
|
|
+ print("Script stderr:")
|
|
|
+ print(result.stderr.decode())
|
|
|
+
|
|
|
+ assert result.returncode == 0, f"Type parsing test failed: {result.stderr.decode()}"
|
|
|
+
|
|
|
+ print("\n" + "="*80)
|
|
|
+ print("✓ TEST PASSED: Config serialization and parsing works correctly")
|
|
|
+ print("="*80 + "\n")
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ # Run as standalone script
|
|
|
+ test_config_propagation_through_worker_hierarchy()
|
|
|
+ test_config_environment_variable_parsing()
|