{"id":"899be3b7-ada1-4e0a-9b32-bd431bd78bc0","slug":"clawhub-rustyorb-agent-evaluation","name":"Agent Evaluation","description":"Testing and benchmarking LLM agents including behavioral testing, capability assessment, reliability metrics, and production monitoring—where even top agents achieve less than 50% on real-world benchmarks Use when: agent testing, agent evaluation, benchmark agents, agent reliability, test agent.","canonicalUrl":"https://xpersona.co/agent/clawhub-rustyorb-agent-evaluation","sourceUrl":"https://clawhub.ai/rustyorb/agent-evaluation","homepage":"https://clawhub.ai/rustyorb/agent-evaluation","source":"CLAWHUB","vendor":{"slug":"clawhub","label":"Clawhub","url":"https://clawhub.ai/rustyorb/agent-evaluation"},"protocols":["OPENCLEW"],"capabilities":[],"trustScore":null,"trustConfidence":"unknown","artifactCount":0,"benchmarkCount":0,"lastRelease":null,"freshnessAt":"2026-04-15T00:36:37.234Z","freshnessLabel":"Apr 15, 2026","securityReviewed":true,"openapiReady":false,"stats":[{"label":"Trust score","value":"Unknown"},{"label":"Compatibility","value":"OpenClaw"},{"label":"Freshness","value":"Apr 15, 2026"},{"label":"Vendor","value":"Clawhub"},{"label":"Artifacts","value":"0"},{"label":"Benchmarks","value":"0"},{"label":"Last release","value":"Unpublished"}],"factsPreview":[{"factKey":"vendor","category":"vendor","label":"Vendor","value":"Clawhub","href":"https://clawhub.ai/rustyorb/agent-evaluation","sourceUrl":"https://clawhub.ai/rustyorb/agent-evaluation","sourceType":"profile","confidence":"medium","observedAt":"2026-04-15T00:45:39.800Z","isPublic":true},{"factKey":"protocols","category":"compatibility","label":"Protocol compatibility","value":"OpenClaw","href":"https://xpersona.co/api/v1/agents/clawhub-rustyorb-agent-evaluation/contract","sourceUrl":"https://xpersona.co/api/v1/agents/clawhub-rustyorb-agent-evaluation/contract","sourceType":"contract","confidence":"medium","observedAt":"2026-04-15T00:45:39.800Z","isPublic":true},{"factKey":"handshake_status","category":"security","label":"Handshake status","value":"UNKNOWN","href":"https://xpersona.co/api/v1/agents/clawhub-rustyorb-agent-evaluation/trust","sourceUrl":"https://xpersona.co/api/v1/agents/clawhub-rustyorb-agent-evaluation/trust","sourceType":"trust","confidence":"medium","observedAt":null,"isPublic":true}],"highlights":["Trust evidence available"],"agentCard":{"name":"Agent Evaluation","description":"Testing and benchmarking LLM agents including behavioral testing, capability assessment, reliability metrics, and production monitoring—where even top agents achieve less than 50% on real-world benchmarks Use when: agent testing, agent evaluation, benchmark agents, agent reliability, test agent.","source":"CLAWHUB","sourceId":"clawhub:kn76pzx058jtj181fzkk729zp5801nac:agent-evaluation","homepage":"https://clawhub.ai/rustyorb/agent-evaluation","repository":"https://clawhub.ai/rustyorb/agent-evaluation","documentation":"https://xpersona.co/agent/clawhub-rustyorb-agent-evaluation","protocols":["OPENCLEW"]}}