{
  "generatedAt": "2026-06-10T21:52:29Z",
  "prompt": "icp-classifier",
  "description": "The icp-classifier prompt that the lead-scoring pipeline runs in production, tested across versions against the same real YC Spring '26 companies.",
  "caseCount": 12,
  "checkTypes": [
    "deterministic assertions",
    "exact-match vs gold tier",
    "LLM-as-judge grounding"
  ],
  "model": "claude -p headless",
  "versions": [
    {
      "version": "v1",
      "note": "Loose baseline: 'return JSON', no format spec, no thresholds, no grounding rule.",
      "rate": 76,
      "passed": 66,
      "total": 87,
      "checks": {
        "grounded_judge": {
          "passed": 5,
          "total": 9,
          "rate": 56
        },
        "no_banned_phrases": {
          "passed": 9,
          "total": 9,
          "rate": 100
        },
        "rationale_present": {
          "passed": 9,
          "total": 9,
          "rate": 100
        },
        "rationale_under_30w": {
          "passed": 0,
          "total": 9,
          "rate": 0
        },
        "score_in_range": {
          "passed": 9,
          "total": 9,
          "rate": 100
        },
        "tier_exact_match": {
          "passed": 7,
          "total": 12,
          "rate": 58
        },
        "tier_in_set": {
          "passed": 9,
          "total": 9,
          "rate": 100
        },
        "tier_matches_threshold": {
          "passed": 9,
          "total": 9,
          "rate": 100
        },
        "valid_json": {
          "passed": 9,
          "total": 12,
          "rate": 75
        }
      },
      "cases": [
        {
          "company": "Replicas",
          "goldTier": "A",
          "score": 92,
          "tier": "A",
          "rationale": "Direct hit on the ICP. Replicas is a B2B vendor (sells to engineering teams) whose core product *is* LLM agents running in production — background coding agents aren't a side feature or an internal experiment, they're the deliverable. That implies production-grade agent infrastructure, eval/reliability pain, and budget for tooling around it. Held back from a higher score only because the brief gives no signal on company size, funding stage, or buying authority.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": false
          }
        },
        {
          "company": "Dayjob",
          "goldTier": "A",
          "score": 85,
          "tier": "A",
          "rationale": "Dayjob builds AI agents for industrial logistics, so LLM agents in production are their core product, not an experiment — a direct hit on 'B2B companies deploying LLM agents to production.' Industrial logistics customers make them clearly B2B. Docked points because the one-liner doesn't confirm the agents are LLM-based (vs. other AI/automation) or that they're actually live in production with customers rather than pre-launch.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": false
          }
        },
        {
          "company": "AgentPhone",
          "goldTier": "A",
          "score": null,
          "tier": "",
          "rationale": "",
          "signal": "",
          "play": "",
          "parsed": false,
          "ok": false,
          "checks": {
            "valid_json": false,
            "tier_exact_match": false
          }
        },
        {
          "company": "Asendia AI",
          "goldTier": "A",
          "score": 88,
          "tier": "A",
          "rationale": "Direct ICP hit: B2B (sells to staffing agencies and enterprise) and their core product is an LLM agent — AI recruiters performing autonomous candidate screening/engagement — shipped to paying customers, i.e. agents in production as the business, not a side feature. Docked ~12 points because the one-line description doesn't let us verify production maturity (live customers vs. pre-launch) or confirm the 'AI Recruiter' is agentic LLM tech rather than classical matching.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": false
          }
        },
        {
          "company": "Callab",
          "goldTier": "B",
          "score": 85,
          "tier": "A",
          "rationale": "Direct ICP hit: Callab is a B2B vendor whose core product is LLM-powered voice agents running in production against customers' legacy telephony systems. Agent deployment isn't an experiment or internal pilot — it's the product itself, which implies live production traffic, real reliability/eval pain, and ongoing prompt iteration. Docked from a higher score only because 'voice agents for legacy telephony' is inferred to be LLM-based rather than confirmed (some voice automation is still rules/IVR-based), and company size/stage is unknown.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": false,
            "grounded_judge": false
          }
        },
        {
          "company": "RentAHuman",
          "goldTier": "B",
          "score": null,
          "tier": "",
          "rationale": "",
          "signal": "",
          "play": "",
          "parsed": false,
          "ok": false,
          "checks": {
            "valid_json": false,
            "tier_exact_match": false
          }
        },
        {
          "company": "Cohesion",
          "goldTier": "B",
          "score": 85,
          "tier": "A",
          "rationale": "Strong ICP fit: the product is itself an LLM agent ('public equities agentic teammate'), making agent deployment core to the business rather than incidental, and the team-oriented positioning indicates a B2B sale into investment firms. Held back from a higher score only because production status is implied but unverified from the single-line description.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": false,
            "grounded_judge": true
          }
        },
        {
          "company": "Tenet Industries",
          "goldTier": "C",
          "score": 5,
          "tier": "C",
          "rationale": "Hardware manufacturer (mass-produced strike drones), not a software company deploying LLM agents to production. Nothing in the description indicates LLM or agent usage at all — the core ICP criterion is absent. The small non-zero score only reflects that they are B2B and that defense-hardware firms occasionally run internal AI tooling, but there is no evidence of that here.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Arlo Industries",
          "goldTier": "C",
          "score": 12,
          "tier": "C",
          "rationale": "Arlo is a defense-tech hardware/sensing company (aerial sensing mesh for drone and missile tracking). Its buyers are likely defense primes and government agencies (B2G more than classic B2B), and nothing in the description indicates it deploys LLM agents to production — its core product is sensor fusion/perception, which may use ML but is not LLM-agent work. It misses the ICP's defining criterion. The non-zero score reflects only that it's a technical company that could plausibly adopt LLM agents internally (e.g., analyst tooling), but there is no evidence of that.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "AICE Power",
          "goldTier": "C",
          "score": 5,
          "tier": "C",
          "rationale": "Clear non-fit. The ICP is B2B companies deploying LLM agents to production; AICE Power builds defense hardware (submarine drones). There is no signal of LLM agent deployment, and defense hardware manufacturing is a different buyer, stack, and procurement motion entirely. The few residual points reflect only that a defense-tech company could plausibly have internal AI initiatives — but nothing stated supports that.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Lattice Health",
          "goldTier": "C",
          "score": null,
          "tier": "",
          "rationale": "",
          "signal": "",
          "play": "",
          "parsed": false,
          "ok": false,
          "checks": {
            "valid_json": false,
            "tier_exact_match": false
          }
        },
        {
          "company": "Adialante",
          "goldTier": "C",
          "score": 10,
          "tier": "C",
          "rationale": "No ICP fit on either axis. Adialante is a healthcare company focused on patient access to cancer screening — a consumer/patient-facing mission, not a B2B motion. There is no evidence of LLM agents in production; any AI involved is more likely diagnostic/imaging ML than agentic LLM systems. Small residual score reflects the one-line description's ambiguity, not positive fit signal.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        }
      ]
    },
    {
      "version": "v2",
      "note": "Added the full ICP and a strict single-line-JSON contract.",
      "rate": 90,
      "passed": 97,
      "total": 108,
      "checks": {
        "grounded_judge": {
          "passed": 6,
          "total": 12,
          "rate": 50
        },
        "no_banned_phrases": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "rationale_present": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "rationale_under_30w": {
          "passed": 10,
          "total": 12,
          "rate": 83
        },
        "score_in_range": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "tier_exact_match": {
          "passed": 9,
          "total": 12,
          "rate": 75
        },
        "tier_in_set": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "tier_matches_threshold": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "valid_json": {
          "passed": 12,
          "total": 12,
          "rate": 100
        }
      },
      "cases": [
        {
          "company": "Replicas",
          "goldTier": "A",
          "score": 92,
          "tier": "A",
          "rationale": "Replicas builds background coding agents as its core product, meaning production LLM agents are central to the business and observability/evaluation tooling is a direct operational need.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Dayjob",
          "goldTier": "A",
          "score": 85,
          "tier": "A",
          "rationale": "Dayjob builds and ships LLM agents for industrial logistics customers, making production agent reliability core to its product and a strong fit for observability and evaluation tooling, with modest uncertainty only around its stage.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": false
          }
        },
        {
          "company": "AgentPhone",
          "goldTier": "A",
          "score": 75,
          "tier": "B",
          "rationale": "AgentPhone is an early-stage B2B company squarely in the AI-agent infrastructure ecosystem, but as a telephony provider for agents it enables others' deployments rather than clearly running its own production LLM agents, making it a strong-adjacent rather than core fit.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": false,
            "grounded_judge": false
          }
        },
        {
          "company": "Asendia AI",
          "goldTier": "A",
          "score": 85,
          "tier": "A",
          "rationale": "Asendia AI is an early-stage B2B company shipping production LLM agents (AI recruiters) to external staffing and enterprise customers, making agent observability and evaluation a core operational need.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": false
          }
        },
        {
          "company": "Callab",
          "goldTier": "B",
          "score": 88,
          "tier": "A",
          "rationale": "Callab ships production LLM voice agents for B2B telephony customers, exactly the early-stage agent-deploying profile that needs observability and evaluation tooling.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": false,
            "grounded_judge": false
          }
        },
        {
          "company": "RentAHuman",
          "goldTier": "B",
          "score": 72,
          "tier": "B",
          "rationale": "An early-stage agent-economy marketplace whose product depends on production LLM-agent interactions, making observability relevant, though the agents it serves are largely operated by its customers rather than deployed in-house.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": false
          }
        },
        {
          "company": "Cohesion",
          "goldTier": "B",
          "score": 88,
          "tier": "A",
          "rationale": "Cohesion ships an LLM-agent 'teammate' for public equities as its core B2B product, so production agent reliability and evaluation are mission-critical needs.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": false,
            "grounded_judge": false
          }
        },
        {
          "company": "Tenet Industries",
          "goldTier": "C",
          "score": 4,
          "tier": "C",
          "rationale": "Tenet Industries is a hardware drone manufacturer, not a software company deploying LLM agents to production, making it a poor fit for an AI agent observability platform.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Arlo Industries",
          "goldTier": "C",
          "score": 8,
          "tier": "C",
          "rationale": "Arlo Industries is a defense hardware/sensing company, not a B2B software company deploying LLM agents to production, so it falls squarely into the hardware poor-fit category.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "AICE Power",
          "goldTier": "C",
          "score": 12,
          "tier": "C",
          "rationale": "AICE Power builds defense hardware (submarine drones), not a B2B software company deploying LLM agents to production, making it a poor fit for an agent observability and evaluation platform.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Lattice Health",
          "goldTier": "C",
          "score": 30,
          "tier": "C",
          "rationale": "Lattice Health is a clinical imaging AI platform — regulated diagnostics-style ML rather than LLM agents in production, placing it outside the core ICP despite being an AI-native company.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Adialante",
          "goldTier": "C",
          "score": 12,
          "tier": "C",
          "rationale": "Adialante is a healthcare diagnostics company focused on cancer screening, matching the explicitly excluded pure-diagnostics ML profile rather than a B2B company shipping LLM agents to production.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        }
      ]
    },
    {
      "version": "v3",
      "note": "Derive tier from score by fixed thresholds; cap the rationale at 30 words.",
      "rate": 79,
      "passed": 63,
      "total": 80,
      "checks": {
        "grounded_judge": {
          "passed": 2,
          "total": 8,
          "rate": 25
        },
        "no_banned_phrases": {
          "passed": 8,
          "total": 8,
          "rate": 100
        },
        "rationale_present": {
          "passed": 8,
          "total": 8,
          "rate": 100
        },
        "rationale_under_30w": {
          "passed": 8,
          "total": 8,
          "rate": 100
        },
        "score_in_range": {
          "passed": 8,
          "total": 8,
          "rate": 100
        },
        "tier_exact_match": {
          "passed": 5,
          "total": 12,
          "rate": 42
        },
        "tier_in_set": {
          "passed": 8,
          "total": 8,
          "rate": 100
        },
        "tier_matches_threshold": {
          "passed": 8,
          "total": 8,
          "rate": 100
        },
        "valid_json": {
          "passed": 8,
          "total": 12,
          "rate": 67
        }
      },
      "cases": [
        {
          "company": "Replicas",
          "goldTier": "A",
          "score": 92,
          "tier": "A",
          "rationale": "Replicas builds production LLM coding agents, the exact early-stage B2B profile needing agent observability and evaluation tooling.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": false
          }
        },
        {
          "company": "Dayjob",
          "goldTier": "A",
          "score": 85,
          "tier": "A",
          "rationale": "Dayjob builds production AI agents for industrial logistics customers, a core fit for agent observability and evaluation tooling, assuming seed or early stage.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": false
          }
        },
        {
          "company": "AgentPhone",
          "goldTier": "A",
          "score": 65,
          "tier": "B",
          "rationale": "Early-stage B2B agent infrastructure adjacent to production LLM agents, but provides telephony plumbing rather than clearly deploying its own agents, making fit moderate.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": false,
            "grounded_judge": false
          }
        },
        {
          "company": "Asendia AI",
          "goldTier": "A",
          "score": 85,
          "tier": "A",
          "rationale": "Asendia ships AI recruiter agents to production for B2B customers, a strong match for agent observability and evaluation despite some enterprise rather than purely early-stage focus.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": false
          }
        },
        {
          "company": "Callab",
          "goldTier": "B",
          "score": 85,
          "tier": "A",
          "rationale": "Callab ships LLM voice agents to production for B2B telephony customers, a core buyer profile for agent observability and evaluation tooling.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": false,
            "grounded_judge": false
          }
        },
        {
          "company": "RentAHuman",
          "goldTier": "B",
          "score": 62,
          "tier": "B",
          "rationale": "Early-stage agent-economy marketplace operating in production, but its agents are external customers rather than LLM agents it deploys, weakening direct need for observability tooling.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": false
          }
        },
        {
          "company": "Cohesion",
          "goldTier": "B",
          "score": 85,
          "tier": "A",
          "rationale": "Cohesion ships an LLM agent product (equities agentic teammate) to external users, a strong fit for agent observability and evaluation tooling.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": false,
            "grounded_judge": true
          }
        },
        {
          "company": "Tenet Industries",
          "goldTier": "C",
          "score": 4,
          "tier": "C",
          "rationale": "Hardware manufacturer of strike drones, not a B2B software company deploying LLM agents to production, so a poor fit for agent observability and evaluation tooling.",
          "signal": "",
          "play": "",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Arlo Industries",
          "goldTier": "C",
          "score": null,
          "tier": "",
          "rationale": "",
          "signal": "",
          "play": "",
          "parsed": false,
          "ok": false,
          "checks": {
            "valid_json": false,
            "tier_exact_match": false
          }
        },
        {
          "company": "AICE Power",
          "goldTier": "C",
          "score": null,
          "tier": "",
          "rationale": "",
          "signal": "",
          "play": "",
          "parsed": false,
          "ok": false,
          "checks": {
            "valid_json": false,
            "tier_exact_match": false
          }
        },
        {
          "company": "Lattice Health",
          "goldTier": "C",
          "score": null,
          "tier": "",
          "rationale": "",
          "signal": "",
          "play": "",
          "parsed": false,
          "ok": false,
          "checks": {
            "valid_json": false,
            "tier_exact_match": false
          }
        },
        {
          "company": "Adialante",
          "goldTier": "C",
          "score": null,
          "tier": "",
          "rationale": "",
          "signal": "",
          "play": "",
          "parsed": false,
          "ok": false,
          "checks": {
            "valid_json": false,
            "tier_exact_match": false
          }
        }
      ]
    },
    {
      "version": "v4",
      "note": "Require the rationale grounded only in the description; add signal + play.",
      "rate": 92,
      "passed": 99,
      "total": 108,
      "checks": {
        "grounded_judge": {
          "passed": 11,
          "total": 12,
          "rate": 92
        },
        "no_banned_phrases": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "rationale_present": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "rationale_under_30w": {
          "passed": 7,
          "total": 12,
          "rate": 58
        },
        "score_in_range": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "tier_exact_match": {
          "passed": 9,
          "total": 12,
          "rate": 75
        },
        "tier_in_set": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "tier_matches_threshold": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "valid_json": {
          "passed": 12,
          "total": 12,
          "rate": 100
        }
      },
      "cases": [
        {
          "company": "Replicas",
          "goldTier": "A",
          "score": 88,
          "tier": "A",
          "rationale": "Replicas ships background coding agents to engineering teams, meaning LLM agents running in production are its core product and observability/evaluation is a direct operational need.",
          "signal": "agents are product",
          "play": "Direct outreach with agent-eval demo",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Dayjob",
          "goldTier": "A",
          "score": 85,
          "tier": "A",
          "rationale": "Dayjob builds AI agents for industrial logistics, a B2B product where LLM agents are the core offering and would need production observability and evaluation, though its funding stage is not stated.",
          "signal": "agents are product",
          "play": "Outbound with agent-eval use case demo",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "AgentPhone",
          "goldTier": "A",
          "score": 68,
          "tier": "B",
          "rationale": "AgentPhone sells phone-number infrastructure for AI agents, placing it squarely in the agent ecosystem as a likely early-stage B2B vendor, but the description does not state that it deploys LLM agents to production itself rather than enabling others'.",
          "signal": "agent infra adjacent",
          "play": "Qualify whether they run production voice agents in-house",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": false,
            "grounded_judge": true
          }
        },
        {
          "company": "Asendia AI",
          "goldTier": "A",
          "score": 85,
          "tier": "A",
          "rationale": "Asendia builds AI recruiter agents for staffing agencies and enterprise, a B2B product whose core offering is deploying AI agents into production workflows — a strong match for an agent observability and evaluation platform.",
          "signal": "AI agents in production",
          "play": "Outbound with agent-eval use case for recruiting workflows",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Callab",
          "goldTier": "B",
          "score": 85,
          "tier": "A",
          "rationale": "Callab builds AI voice agents for legacy telephony systems, meaning it ships LLM agents into production customer-facing environments where reliability monitoring and evaluation are critical.",
          "signal": "production voice agents",
          "play": "outbound with voice-agent eval demo",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": false,
            "grounded_judge": false
          }
        },
        {
          "company": "RentAHuman",
          "goldTier": "B",
          "score": 62,
          "tier": "B",
          "rationale": "RentAHuman operates a marketplace built around production AI agents as its core users, making agent reliability tooling plausibly relevant, but it is not stated that the company itself deploys LLM agents to production.",
          "signal": "agent-adjacent marketplace",
          "play": "qualify whether they run their own agents",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Cohesion",
          "goldTier": "B",
          "score": 85,
          "tier": "A",
          "rationale": "Cohesion builds an agentic AI teammate for public equities, meaning it deploys LLM agents as its core product and would need observability and evaluation for them.",
          "signal": "agentic product core",
          "play": "outbound with agent-eval demo",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": false,
            "grounded_judge": true
          }
        },
        {
          "company": "Tenet Industries",
          "goldTier": "C",
          "score": 5,
          "tier": "C",
          "rationale": "Tenet Industries builds low-cost, mass-produced strike drones, making it a hardware manufacturer with no stated LLM agent deployment — an explicit poor fit for the ICP.",
          "signal": "hardware, not software",
          "play": "Disqualify; do not pursue",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Arlo Industries",
          "goldTier": "C",
          "score": 5,
          "tier": "C",
          "rationale": "Arlo Industries builds a hardware-based passive aerial sensing mesh for tracking drones and missiles, with no stated use of LLM agents in production, placing it squarely in the hardware poor-fit category for an AI agent observability platform.",
          "signal": "defense sensing hardware",
          "play": "Disqualify; do not pursue.",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "AICE Power",
          "goldTier": "C",
          "score": 5,
          "tier": "C",
          "rationale": "AICE Power builds submarine drone hardware for defense, not a B2B software company deploying LLM agents to production, making it a poor fit for an agent observability platform.",
          "signal": "defense hardware vendor",
          "play": "disqualify",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Lattice Health",
          "goldTier": "C",
          "score": 30,
          "tier": "C",
          "rationale": "An operating system for clinical AI in imaging points to diagnostics-focused medical imaging ML rather than LLM agents deployed to production, which the ICP flags as a poor fit.",
          "signal": "imaging diagnostics ML",
          "play": "deprioritize; revisit if LLM agent use emerges",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Adialante",
          "goldTier": "C",
          "score": 8,
          "tier": "C",
          "rationale": "Adialante does cancer screening, which is a pure-diagnostics healthcare offering with no stated B2B LLM agent deployment, making it a poor fit for an agent observability and evaluation platform.",
          "signal": "diagnostics, not agents",
          "play": "disqualify; no outreach",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        }
      ]
    },
    {
      "version": "v5",
      "note": "Two-part rationale (restate, then conclude about the ICP); hedge anything the description doesn't state. Judge calibrated the same run: hedged negatives and category paraphrase are grounded, asserted specifics are not.",
      "rate": 87,
      "passed": 94,
      "total": 108,
      "checks": {
        "grounded_judge": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "no_banned_phrases": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "rationale_present": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "rationale_under_30w": {
          "passed": 0,
          "total": 12,
          "rate": 0
        },
        "score_in_range": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "tier_exact_match": {
          "passed": 10,
          "total": 12,
          "rate": 83
        },
        "tier_in_set": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "tier_matches_threshold": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "valid_json": {
          "passed": 12,
          "total": 12,
          "rate": 100
        }
      },
      "cases": [
        {
          "company": "Replicas",
          "goldTier": "A",
          "score": 85,
          "tier": "A",
          "rationale": "Replicas builds background coding agents for engineering teams, which fits the ICP because agent products like this typically run LLM agents in production and would likely need observability and evaluation, though there is no stated indication of company stage.",
          "signal": "production agent product",
          "play": "Outbound with agent-eval angle",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Dayjob",
          "goldTier": "A",
          "score": 85,
          "tier": "A",
          "rationale": "Dayjob builds AI agents for industrial logistics, which fits the ICP because an agent product like this would likely run LLM agents in production for customers, though there is no stated indication of company stage.",
          "signal": "agent product company",
          "play": "Prioritize outreach with agent-reliability angle",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "AgentPhone",
          "goldTier": "A",
          "score": 85,
          "tier": "A",
          "rationale": "AgentPhone provides phone numbers for AI agents, which fits the ICP because companies building infrastructure for AI agents like this would likely run LLM agents in production themselves, though there is no stated indication of company stage.",
          "signal": "agent infra product",
          "play": "outbound with agent-eval angle",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Asendia AI",
          "goldTier": "A",
          "score": 85,
          "tier": "A",
          "rationale": "Asendia AI builds AI recruiters for staffing agencies and enterprise, which fits the ICP because an AI-recruiter product like this would likely run LLM agents in production, though there is no stated indication of company stage.",
          "signal": "agent product live",
          "play": "outbound with agent-eval angle",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Callab",
          "goldTier": "B",
          "score": 85,
          "tier": "A",
          "rationale": "Callab builds AI voice agents for legacy telephony systems, which fits the ICP because voice-agent products like this typically run LLM agents in production for customers, though there is no stated indication of company stage.",
          "signal": "production voice agents",
          "play": "outbound with voice-agent eval case study",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": false,
            "grounded_judge": true
          }
        },
        {
          "company": "RentAHuman",
          "goldTier": "B",
          "score": 68,
          "tier": "B",
          "rationale": "RentAHuman operates a marketplace where AI agents hire humans, which suggests a fit with the ICP because an agent-facing platform like this would likely run LLM agents in production, though the description gives no stated indication that the company deploys its own agents.",
          "signal": "agent-native marketplace",
          "play": "Discovery call on agent stack",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Cohesion",
          "goldTier": "B",
          "score": 85,
          "tier": "A",
          "rationale": "Cohesion builds an agentic teammate for public equities, which fits the ICP because an agent product like this would likely run LLM agents in production for customer-facing workflows, though the description gives no stated indication of company stage.",
          "signal": "agentic product core",
          "play": "Outbound with agent-eval angle",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": false,
            "grounded_judge": true
          }
        },
        {
          "company": "Tenet Industries",
          "goldTier": "C",
          "score": 5,
          "tier": "C",
          "rationale": "Tenet Industries builds low-cost, mass-produced strike drones, which is a poor fit for the ICP because it is a hardware product with no stated indication of LLM agents deployed to production.",
          "signal": "hardware, no LLM",
          "play": "Disqualify; no outreach",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Arlo Industries",
          "goldTier": "C",
          "score": 8,
          "tier": "C",
          "rationale": "Arlo Industries builds a passive aerial sensing mesh for tracking drones and missiles, which is a poor fit for the ICP because hardware sensing products like this give no stated indication of LLM agents deployed to production.",
          "signal": "defense sensing hardware",
          "play": "disqualify; no outreach",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "AICE Power",
          "goldTier": "C",
          "score": 8,
          "tier": "C",
          "rationale": "AICE Power builds submarine drones for defense, which is a poor fit for the ICP because hardware products are an explicitly poor fit and there is no stated indication of LLM agents deployed to production.",
          "signal": "defense hardware",
          "play": "Disqualify; no outreach",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Lattice Health",
          "goldTier": "C",
          "score": 25,
          "tier": "C",
          "rationale": "Lattice Health builds an operating system for clinical AI in imaging, which fits the ICP poorly because imaging-focused clinical AI suggests diagnostics-style ML rather than production LLM agents, with no stated indication of agent deployments.",
          "signal": "clinical imaging AI",
          "play": "deprioritize; revisit if agent use emerges",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Adialante",
          "goldTier": "C",
          "score": 8,
          "tier": "C",
          "rationale": "Adialante works on making cancer screening accessible without barriers, which is a poor fit for the ICP because diagnostics-focused healthcare like this typically relies on screening/diagnostic ML rather than production LLM agents, with no stated indication of agent deployment.",
          "signal": "pure diagnostics healthcare",
          "play": "disqualify; no outreach",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": false,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        }
      ]
    },
    {
      "version": "v6",
      "note": "v5's grounding discipline + a hard 30-word cap with a compressed example. Ships only if it holds v4's rate.",
      "rate": 97,
      "passed": 105,
      "total": 108,
      "checks": {
        "grounded_judge": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "no_banned_phrases": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "rationale_present": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "rationale_under_30w": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "score_in_range": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "tier_exact_match": {
          "passed": 9,
          "total": 12,
          "rate": 75
        },
        "tier_in_set": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "tier_matches_threshold": {
          "passed": 12,
          "total": 12,
          "rate": 100
        },
        "valid_json": {
          "passed": 12,
          "total": 12,
          "rate": 100
        }
      },
      "cases": [
        {
          "company": "Replicas",
          "goldTier": "A",
          "score": 85,
          "tier": "A",
          "rationale": "Builds background coding agents for engineering teams; such agent products would likely run LLM agents in production, strongly matching the ICP, though stage is unstated.",
          "signal": "agents are the product",
          "play": "Prioritize outbound; lead with eval/observability pain",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Dayjob",
          "goldTier": "A",
          "score": 85,
          "tier": "A",
          "rationale": "Builds AI agents for industrial logistics; an agent product like this would likely run LLM agents in production for B2B customers, a strong ICP fit.",
          "signal": "production agent vendor",
          "play": "outbound with agent-eval angle",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "AgentPhone",
          "goldTier": "A",
          "score": 72,
          "tier": "B",
          "rationale": "Provides phone numbers for AI agents; agent-infrastructure vendors like this likely operate LLM agents in production themselves, suggesting ICP fit, though the description doesn't confirm it.",
          "signal": "agent infra adjacent",
          "play": "Discovery call on agent usage",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": false,
            "grounded_judge": true
          }
        },
        {
          "company": "Asendia AI",
          "goldTier": "A",
          "score": 85,
          "tier": "A",
          "rationale": "Sells AI recruiter agents to staffing agencies and enterprises; recruiter products like this would likely run LLM agents in production, a strong ICP fit.",
          "signal": "production agent product",
          "play": "Fast-track outbound with agent-eval angle",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Callab",
          "goldTier": "B",
          "score": 85,
          "tier": "A",
          "rationale": "Builds AI voice agents for legacy telephony; voice agents like this typically run LLM agents in production, a strong ICP fit despite no stated company stage.",
          "signal": "production voice agents",
          "play": "Prioritize for direct outreach",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": false,
            "grounded_judge": true
          }
        },
        {
          "company": "RentAHuman",
          "goldTier": "B",
          "score": 68,
          "tier": "B",
          "rationale": "Marketplace where AI agents hire humans; serving agent customers suggests production agent infrastructure, a likely ICP fit, though no stated indication it deploys its own agents.",
          "signal": "agent-native marketplace",
          "play": "Qualify whether they run their own agents before outreach",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Cohesion",
          "goldTier": "B",
          "score": 82,
          "tier": "A",
          "rationale": "Offers an agentic teammate for public equities; an agent product like this would likely run LLM agents in production, a strong ICP fit.",
          "signal": "production agent product",
          "play": "Prioritize outbound; lead with agent eval/observability pain.",
          "parsed": true,
          "ok": false,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": false,
            "grounded_judge": true
          }
        },
        {
          "company": "Tenet Industries",
          "goldTier": "C",
          "score": 5,
          "tier": "C",
          "rationale": "Mass-produces low-cost strike drones; hardware manufacturing like this gives no stated indication of production LLM agents, a poor ICP fit.",
          "signal": "defense hardware",
          "play": "Disqualify; no outreach",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Arlo Industries",
          "goldTier": "C",
          "score": 8,
          "tier": "C",
          "rationale": "Builds a passive aerial sensing mesh to track drones and missiles; hardware sensing with no stated LLM agent deployment, a poor ICP fit.",
          "signal": "defense sensing hardware",
          "play": "disqualify; no outreach",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "AICE Power",
          "goldTier": "C",
          "score": 8,
          "tier": "C",
          "rationale": "Builds submarine drones for defense; hardware-focused product with no stated indication of LLM agents in production, a poor ICP fit.",
          "signal": "defense hardware",
          "play": "Disqualify; no outreach",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Lattice Health",
          "goldTier": "C",
          "score": 20,
          "tier": "C",
          "rationale": "Operating system for clinical AI in medical imaging; imaging platforms like this typically center diagnostic vision models, not production LLM agents, a poor ICP fit.",
          "signal": "clinical imaging AI",
          "play": "Deprioritize; revisit if LLM agents ship",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        },
        {
          "company": "Adialante",
          "goldTier": "C",
          "score": 5,
          "tier": "C",
          "rationale": "Provides barrier-free cancer screening; healthcare diagnostics with no stated indication of production LLM agents falls outside the ICP.",
          "signal": "diagnostics, not agents",
          "play": "Disqualify; no outreach",
          "parsed": true,
          "ok": true,
          "checks": {
            "valid_json": true,
            "score_in_range": true,
            "tier_in_set": true,
            "tier_matches_threshold": true,
            "rationale_present": true,
            "rationale_under_30w": true,
            "no_banned_phrases": true,
            "tier_exact_match": true,
            "grounded_judge": true
          }
        }
      ]
    }
  ],
  "runNote": "Single logical run resumed across session-quota interruptions: v1-v3 2026-06-10T16:46:41Z, v4-v5 2026-06-10T21:42:27Z, v6 2026-06-10T21:52:29Z. Same gold cases and same calibrated grounding judge in all three."
}