# Research Site Configuration # Generated: 2026-02-19 # This file intentionally has no extension to test crawler handling # of extensionless files. [server] host = 0.0.0.0 port = 443 protocol = https region = us-east-1 bucket = site4726837462198733423 [logging] enabled = true target_bucket = site4726837462198733423-logs target_prefix = access-logs/ format = s3_server_access fields = timestamp,remote_ip,operation,key,status,bytes,referrer,user_agent [content] index_document = index.html error_document = error.html default_cache_control = max-age=3600 enable_cors = false [crawlers] # Known crawler User-Agent patterns for identification gptbot = GPTBot claudebot = ClaudeBot amazonbot = Amazonbot bingbot = bingbot googlebot = Googlebot applebot = Applebot [experiments] redirect_chain_enabled = true redirect_chain_max_depth = 100 large_file_test_enabled = true pagination_enabled = true pagination_max_pages = 10000 link_discovery_enabled = true link_discovery_count = 100000 [rate_limiting] enabled = false max_requests_per_minute = 60 burst_size = 10 [monitoring] health_check_path = /health metrics_enabled = true alert_threshold_5xx = 10 alert_threshold_bandwidth_gb = 50 [security] block_public_access = false enable_versioning = false enable_encryption = false cors_allowed_origins = *