{
  "metadata": {
    "project": "bot-research",
    "version": "1.0.0",
    "generated": "2026-02-19T12:00:00Z",
    "description": "Sample dataset for crawler behavior research"
  },
  "crawlers": [
    {
      "id": 0,
      "name": "GPTBot",
      "operator": "OpenAI",
      "user_agent_pattern": "Mozilla/5.0 AppleWebKit/537.36 (compatible; GPTBot/1.2)",
      "observed_behavior": {
        "avg_requests_per_day": 167721,
        "respects_robots_txt": true,
        "follows_redirects": true,
        "max_redirect_depth": 6,
        "downloads_images": false,
        "downloads_large_files": true,
        "parses_javascript": true,
        "follows_sitemap": true,
        "revisit_interval_hours": 27
      },
      "ip_ranges": [
        "161.216.16.0/24"
      ],
      "first_seen": "2026-01-01T02:00:00Z",
      "notes": "Crawler GPTBot shows moderate crawling patterns"
    },
    {
      "id": 1,
      "name": "ClaudeBot",
      "operator": "Anthropic",
      "user_agent_pattern": "ClaudeBot/1.0",
      "observed_behavior": {
        "avg_requests_per_day": 132575,
        "respects_robots_txt": true,
        "follows_redirects": true,
        "max_redirect_depth": 40,
        "downloads_images": true,
        "downloads_large_files": false,
        "parses_javascript": true,
        "follows_sitemap": true,
        "revisit_interval_hours": 151
      },
      "ip_ranges": [
        "217.3.81.0/24",
        "188.216.174.0/24",
        "81.79.110.0/24"
      ],
      "first_seen": "2026-01-31T10:00:00Z",
      "notes": "Crawler ClaudeBot shows moderate crawling patterns"
    },
    {
      "id": 2,
      "name": "Amazonbot",
      "operator": "Amazon",
      "user_agent_pattern": "Mozilla/5.0 (compatible; Amazonbot/0.1)",
      "observed_behavior": {
        "avg_requests_per_day": 99695,
        "respects_robots_txt": true,
        "follows_redirects": true,
        "max_redirect_depth": 27,
        "downloads_images": false,
        "downloads_large_files": false,
        "parses_javascript": true,
        "follows_sitemap": false,
        "revisit_interval_hours": 118
      },
      "ip_ranges": [
        "106.40.150.0/24"
      ],
      "first_seen": "2026-01-27T20:00:00Z",
      "notes": "Crawler Amazonbot shows aggressive crawling patterns"
    },
    {
      "id": 3,
      "name": "bingbot",
      "operator": "Microsoft",
      "user_agent_pattern": "Mozilla/5.0 (compatible; bingbot/2.0)",
      "observed_behavior": {
        "avg_requests_per_day": 94900,
        "respects_robots_txt": true,
        "follows_redirects": true,
        "max_redirect_depth": 50,
        "downloads_images": true,
        "downloads_large_files": true,
        "parses_javascript": true,
        "follows_sitemap": true,
        "revisit_interval_hours": 21
      },
      "ip_ranges": [
        "35.194.142.0/24",
        "126.186.83.0/24"
      ],
      "first_seen": "2026-01-12T11:00:00Z",
      "notes": "Crawler bingbot shows moderate crawling patterns"
    },
    {
      "id": 4,
      "name": "Googlebot",
      "operator": "Google",
      "user_agent_pattern": "Mozilla/5.0 (compatible; Googlebot/2.1)",
      "observed_behavior": {
        "avg_requests_per_day": 70086,
        "respects_robots_txt": true,
        "follows_redirects": true,
        "max_redirect_depth": 43,
        "downloads_images": true,
        "downloads_large_files": true,
        "parses_javascript": true,
        "follows_sitemap": true,
        "revisit_interval_hours": 98
      },
      "ip_ranges": [
        "173.112.166.0/24",
        "206.28.117.0/24",
        "220.16.161.0/24"
      ],
      "first_seen": "2026-01-13T08:00:00Z",
      "notes": "Crawler Googlebot shows moderate crawling patterns"
    },
    {
      "id": 5,
      "name": "Applebot",
      "operator": "Apple",
      "user_agent_pattern": "Mozilla/5.0 (Applebot/0.1)",
      "observed_behavior": {
        "avg_requests_per_day": 148782,
        "respects_robots_txt": true,
        "follows_redirects": true,
        "max_redirect_depth": 18,
        "downloads_images": false,
        "downloads_large_files": false,
        "parses_javascript": false,
        "follows_sitemap": true,
        "revisit_interval_hours": 68
      },
      "ip_ranges": [
        "73.134.219.0/24",
        "159.204.185.0/24"
      ],
      "first_seen": "2026-01-08T04:00:00Z",
      "notes": "Crawler Applebot shows aggressive crawling patterns"
    },
    {
      "id": 6,
      "name": "MJ12bot",
      "operator": "Majestic",
      "user_agent_pattern": "Mozilla/5.0 (compatible; MJ12bot/v1.4.8)",
      "observed_behavior": {
        "avg_requests_per_day": 23931,
        "respects_robots_txt": true,
        "follows_redirects": true,
        "max_redirect_depth": 12,
        "downloads_images": true,
        "downloads_large_files": true,
        "parses_javascript": false,
        "follows_sitemap": false,
        "revisit_interval_hours": 17
      },
      "ip_ranges": [
        "107.239.128.0/24",
        "151.5.58.0/24",
        "184.136.174.0/24",
        "38.150.222.0/24"
      ],
      "first_seen": "2026-01-06T14:00:00Z",
      "notes": "Crawler MJ12bot shows moderate crawling patterns"
    },
    {
      "id": 7,
      "name": "PetalBot",
      "operator": "Huawei",
      "user_agent_pattern": "Mozilla/5.0 (compatible; PetalBot/2.0)",
      "observed_behavior": {
        "avg_requests_per_day": 189392,
        "respects_robots_txt": true,
        "follows_redirects": true,
        "max_redirect_depth": 37,
        "downloads_images": true,
        "downloads_large_files": true,
        "parses_javascript": false,
        "follows_sitemap": false,
        "revisit_interval_hours": 130
      },
      "ip_ranges": [
        "49.191.82.0/24",
        "148.0.165.0/24"
      ],
      "first_seen": "2026-01-16T00:00:00Z",
      "notes": "Crawler PetalBot shows moderate crawling patterns"
    }
  ],
  "experiments": [
    {
      "id": "exp-000",
      "name": "redirect_chain_depth",
      "status": "in_progress",
      "start_date": "2026-02-10",
      "parameters": {
        "key": "value_31"
      }
    },
    {
      "id": "exp-001",
      "name": "large_file_download_limits",
      "status": "planned",
      "start_date": "2026-02-03",
      "parameters": {
        "key": "value_63"
      }
    },
    {
      "id": "exp-002",
      "name": "iframe_following",
      "status": "completed",
      "start_date": "2026-02-18",
      "parameters": {
        "key": "value_17"
      }
    },
    {
      "id": "exp-003",
      "name": "sitemap_parsing",
      "status": "planned",
      "start_date": "2026-02-16",
      "parameters": {
        "key": "value_34"
      }
    },
    {
      "id": "exp-004",
      "name": "robots_txt_compliance",
      "status": "planned",
      "start_date": "2026-02-14",
      "parameters": {
        "key": "value_97"
      }
    },
    {
      "id": "exp-005",
      "name": "pagination_depth",
      "status": "planned",
      "start_date": "2026-02-07",
      "parameters": {
        "key": "value_52"
      }
    },
    {
      "id": "exp-006",
      "name": "link_discovery_rate",
      "status": "planned",
      "start_date": "2026-02-12",
      "parameters": {
        "key": "value_58"
      }
    },
    {
      "id": "exp-007",
      "name": "content_type_handling",
      "status": "completed",
      "start_date": "2026-02-08",
      "parameters": {
        "key": "value_44"
      }
    },
    {
      "id": "exp-008",
      "name": "rate_limiting_response",
      "status": "completed",
      "start_date": "2026-02-19",
      "parameters": {
        "key": "value_76"
      }
    },
    {
      "id": "exp-009",
      "name": "cloaking_detection",
      "status": "completed",
      "start_date": "2026-02-01",
      "parameters": {
        "key": "value_81"
      }
    }
  ],
  "summary_stats": {
    "total_requests_observed": 2847293,
    "unique_crawlers": 8,
    "date_range": {
      "start": "2026-01-15",
      "end": "2026-02-19"
    },
    "top_requested_paths": [
      "/",
      "/index.html",
      "/about.html",
      "/robots.txt",
      "/sitemap.xml",
      "/style.css",
      "/data.json"
    ],
    "status_code_distribution": {
      "200": 2654821,
      "301": 12045,
      "302": 89432,
      "304": 45123,
      "404": 34562,
      "429": 11310
    }
  }
}