[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"global-settings":3,"blog":155,"blog-posts":182},{"data":4},{"id":5,"documentId":6,"siteName":7,"address":8,"contactEmail":9,"copyrightText":10,"hubspotPortalId":11,"hubspotNewsletterFormId":12,"createdAt":13,"updatedAt":14,"publishedAt":15,"locale":12,"hubspotJobApplicationFormId":12,"hubspotDemoRequestFormId":12,"hubspotContactFormId":16,"defaultOgImageAlt":7,"logo":17,"logoDark":31,"defaultOgImage":39,"headerCtas":84,"legalLinks":95,"socialLinks":105,"headerNav":111,"footerColumns":142},4,"t4ysbj7xqjxe9vwpng6wvs9y","Hyground","Versmannstraße 2, 20457 Hamburg, Germany","contact@hyground.ai","© 2026 Hyground. All rights reserved.","145901011",null,"2026-04-22T08:43:29.438Z","2026-05-13T11:43:50.588Z","2026-05-13T11:43:50.471Z","496682dd-c91c-463f-b8f5-30e9679f415e",{"id":18,"documentId":19,"name":20,"alternativeText":12,"caption":12,"focalPoint":12,"width":21,"height":22,"formats":12,"hash":23,"ext":24,"mime":25,"size":26,"url":27,"previewUrl":12,"provider":28,"provider_metadata":12,"folderPath":29,"createdAt":30,"updatedAt":30,"publishedAt":30,"locale":12},69,"l1zkkv2k4anah2ethspw4kk4","logo.svg",508,103,"logo_230b5ea131",".svg","image\u002Fsvg+xml",9.8,"\u002Fuploads\u002Flogo_230b5ea131.svg","local","\u002F","2026-05-04T13:52:45.374Z",{"id":32,"documentId":33,"name":34,"alternativeText":12,"caption":12,"focalPoint":12,"width":21,"height":22,"formats":12,"hash":35,"ext":24,"mime":25,"size":36,"url":37,"previewUrl":12,"provider":28,"provider_metadata":12,"folderPath":29,"createdAt":30,"updatedAt":30,"publishedAt":38,"locale":12},70,"n75laf0qlq0cjo1ksgxafb4i","logo-dark.svg","logo_dark_793a0f8cb9",9.81,"\u002Fuploads\u002Flogo_dark_793a0f8cb9.svg","2026-05-04T13:52:45.375Z",{"id":40,"documentId":41,"name":42,"alternativeText":12,"caption":12,"focalPoint":12,"width":43,"height":44,"formats":45,"hash":80,"ext":47,"mime":50,"size":81,"url":82,"previewUrl":12,"provider":28,"provider_metadata":12,"folderPath":29,"createdAt":83,"updatedAt":83,"publishedAt":83,"locale":12},162,"wo4i957bepocr78jgqrcswz6","hyground-og-img.webp",1200,630,{"large":46,"small":56,"medium":64,"thumbnail":72},{"ext":47,"url":48,"hash":49,"mime":50,"name":51,"path":12,"size":52,"width":53,"height":54,"sizeInBytes":55},".webp","\u002Fuploads\u002Flarge_hyground_og_img_f5c8198dfa.webp","large_hyground_og_img_f5c8198dfa","image\u002Fwebp","large_hyground-og-img.webp",9.23,1000,525,9226,{"ext":47,"url":57,"hash":58,"mime":50,"name":59,"path":12,"size":60,"width":61,"height":62,"sizeInBytes":63},"\u002Fuploads\u002Fsmall_hyground_og_img_f5c8198dfa.webp","small_hyground_og_img_f5c8198dfa","small_hyground-og-img.webp",4.71,500,262,4710,{"ext":47,"url":65,"hash":66,"mime":50,"name":67,"path":12,"size":68,"width":69,"height":70,"sizeInBytes":71},"\u002Fuploads\u002Fmedium_hyground_og_img_f5c8198dfa.webp","medium_hyground_og_img_f5c8198dfa","medium_hyground-og-img.webp",6.84,750,394,6844,{"ext":47,"url":73,"hash":74,"mime":50,"name":75,"path":12,"size":76,"width":77,"height":78,"sizeInBytes":79},"\u002Fuploads\u002Fthumbnail_hyground_og_img_f5c8198dfa.webp","thumbnail_hyground_og_img_f5c8198dfa","thumbnail_hyground-og-img.webp",2.29,245,129,2288,"hyground_og_img_f5c8198dfa",11.92,"\u002Fuploads\u002Fhyground_og_img_f5c8198dfa.webp","2026-05-12T07:30:09.091Z",[85,90],{"id":86,"label":87,"url":88,"variant":89},56,"Try our Sandbox","\u002Ftry-hyground-sandbox","ghost",{"id":91,"label":92,"url":93,"variant":94},55,"Book a demo","\u002Fbook-demo","primary",[96,101],{"id":97,"label":98,"url":99,"external":100},46,"Privacy Policy","\u002Fprivacy-policy",false,{"id":102,"label":103,"url":104,"external":100},47,"Imprint","\u002Fimprint",[106],{"id":107,"label":108,"url":109,"external":110},48,"LinkedIn","https:\u002F\u002Fwww.linkedin.com\u002Fcompany\u002Fhyground",true,[112,116,128,133,138],{"id":113,"label":114,"url":29,"external":100,"children":115},1,"Home",[],{"id":117,"label":118,"url":12,"external":100,"children":119},5,"Product",[120,124],{"id":121,"label":122,"url":123,"external":100},49,"Skills","https:\u002F\u002Fhyground.ai\u002Fproduct-skills",{"id":125,"label":126,"url":127,"external":100},50,"Scheduling & Triggers","https:\u002F\u002Fhyground.ai\u002Fproduct-scheduling",{"id":129,"label":130,"url":131,"external":100,"children":132},2," Blog","\u002Fblog",[],{"id":134,"label":135,"url":136,"external":100,"children":137},3,"Company","\u002Fcompany",[],{"id":5,"label":139,"url":140,"external":100,"children":141},"Contact","\u002Fcontact",[],[143],{"id":5,"heading":144,"links":145},"Site",[146,148,151,153],{"id":147,"label":114,"url":29,"external":100},41,{"id":149,"label":150,"url":131,"external":100},42,"Blog",{"id":152,"label":139,"url":140,"external":100},44,{"id":154,"label":92,"url":93,"external":100},45,{"data":156},{"id":157,"documentId":158,"allLabel":159,"createdAt":160,"updatedAt":161,"publishedAt":12,"locale":12,"newsletter":12,"seo":162,"hero":166},6,"u6p6xe0i6mgyp5wl5uli1t0v","All","2026-05-04T13:36:28.992Z","2026-05-11T21:37:11.523Z",{"id":163,"title":164,"description":165,"ogImage":12},30,"Blog — Hyground","Thoughts on sovereign AI, autonomous operations and building resilient systems.",{"id":163,"eyebrow":150,"headline":167,"subheadline":165,"imageFit":168,"imageMaxWidth":12,"customHeightVh":169,"backgroundImage":170,"ctas":181},"AI in SRE Insights","cover",40,{"id":171,"documentId":172,"name":173,"alternativeText":12,"caption":12,"focalPoint":12,"width":69,"height":174,"formats":12,"hash":175,"ext":24,"mime":25,"size":176,"url":177,"previewUrl":12,"provider":28,"provider_metadata":12,"folderPath":29,"createdAt":178,"updatedAt":179,"publishedAt":180,"locale":12},68,"wtgtb597cio77so5h32n5x4r","hyground-eagle-svg.svg",653,"hyground_eagle_svg_894f156eb5",1.01,"\u002Fuploads\u002Fhyground_eagle_svg_894f156eb5.svg","2026-05-04T13:52:45.343Z","2026-05-12T12:28:04.139Z","2026-05-04T13:52:45.344Z",[],{"data":183,"meta":385},[184,214,238,253,268,283,299,321,335,350,364],{"id":185,"documentId":186,"title":187,"slug":188,"excerpt":189,"body":190,"coverImageUrl":191,"readingMinutes":157,"tags":192,"publishedDate":195,"createdAt":196,"updatedAt":197,"publishedAt":198,"coverImage":12,"categories":199},62,"c1vdulikaj1rpmp0pau33xed","What an SRE Agent Can Do For Testers","what-an-sre-agent-can-do-for-testers","Testers lose hours chasing bugs that turn out to be mismatched deploys, conflicting integrations or broken infrastructure. Hyground's SRE Agent gives you the environmental clarity to know whether your next test session will actually produce trustworthy findings, before you start.","I've wasted countless hours reporting bugs, getting frustrated, pinging the developers, having them investigate,... only to find out the test environment was broken. Or the wrong branch was deployed. Or the test data was outdated...\n\nAm I even looking at something stable enough to trust my own findings? The automated checks can help out with a lot, but often still fall short when it comes to infrastructure.\n\nThe job of a tester may have evolved over the past decades, but nothing has sufficiently replaced the clever, investigative and based on real-world evidence interaction with the software by a skilled human.\n\nNot \"clicking around,\" as some still like to caricature it, but a deliberate, skilled investigation where learning, test design, and execution happen simultaneously. And when you combine that with risk-based thinking, you get something powerful: a focused session where every minute counts because you know _why_ you're looking where you're looking.\n\nAt Hyground, we build an SRE Agent that lives and breathes production environments, but also development and test environments can be hooked on.\n\nSeeing the amazing capabilities and insights it has access to, it makes me think about my previous experiences as a tester.\n\nA few years ago I was brought in on a project that was, to put it kindly, under siege. The development team had doubled in size. Two additional projects had launched alongside the main rebuild. And the software, a full rewrite of a legacy system using modern technology, was being deployed faster than anyone could verify what was actually in it.\n\nBugs multiplied faster than fix-releases could contain them. At some point, I even printed them out, pasted them against the wall so management couldn't ignore them.\n\nWhat made it truly painful: the bugs we found weren't always bugs. The wrong build was deployed. Othertimes, data had gone stale or been removed between test cycles. Features that had been reworked since we last looked at them. We were spending enormous energy investigating things that weren't product defects at all. What made it worse is that our questions confused the development teams and wasted their time as well.\n\n... we did eventually make the project successful but it was painful. If only we had a Hyground-like system then.\n\nI think about that project a lot these days. Because the situation it represented: fast-moving changes, too many PRs to track, an environment where you can't trust the stability of what you're testing,... Well that's an everyday thing right now.\n\n**With AI-generated code accelerating development velocity, it's becoming the default.**\n\nHyground, an AI SRE Agent, is plugged into your infrastructure, observability and beyond. It would've been an absolute hero in my previous project and is a lifesaver for testers now.\n\n## What's Changed?\n\nWhether you're testing an AI agent on a platform, or a non-AI product or anything in between, chances are your product is changing at an incredible rate since at least last December.\n\nConstant and drastic change. Between one test session and the next, the landscape could shift completely. New code merged, data migrated, infrastructure tweaked, features reworked. And nobody told you exactly what changed. You'd start a session with assumptions from last week and discover, sometimes after logging several bugs, that they no longer hold true.\n\nFor any tester approaching an environment under heavy development, one of the first thoughts should be about the environment itself. What changed since I last looked? Are the changes I'm seeing intentional or accidental? Is this data fresh or stale?\n\nAn SRE Agent that monitors the environment continuously could answer this before you even start your session. It can draw a real picture: these services were redeployed, this data pipeline ran (or didn't), these infrastructure components changed configuration. The kind of situational awareness that took us hours of Slack messages and guesswork to piece together.\n\nAt the very best of times, we'd have to \"ask that one person\", and they were usually quite busy.\n\n## Am I Set Up for Success?\n\nOn that project, I watched skilled testers waste entire half-day sessions investigating what turned out to be environment inconsistencies. Not bugs. Not design issues. Just the debris of a codebase moving faster than its integration process could handle. Those sessions were lost and resulted in a lot of wasted time and added frustrations.\n\nBefore I start any exploratory session now, I want to know:\n\n- Is this environment in a healthy state?\n- Which PR's are deployed?\n- Any 3rd party processes running?\n- Are the downstream services reachable and healthy?\n- Are there any open Jira issues flagging known problems with this environment?\n- What's the current error rate in the logs?\n\n... or I could just send an email to Hyground with \"Hey Hyground, I want to do a test session on X on environment Y. Anything out of the ordinary I should know?\"\n\nThis is where something like Hyground changes the game. An SRE Agent that can tell you \"your Kubernetes cluster is healthy, but this service was redeployed 20 minutes ago and hasn't stabilized\" saves you from chasing phantoms. It turns a guessing game into a briefing.\n\n## Why This Matters Now\n\nThe project I described at the start wasn't unusual for its time. A team moving that fast, with that much change, and that little environmental stability, it felt extreme, but probably more normal than we'd like to admit. We coped with colored stickers on a wall and the goodwill of twenty driven teammembers.\n\nToday, that velocity is normal. AI-assisted development means more PRs, more frequent deployments, more change. The tester's challenge of \"_can I trust what I'm looking at?\"_ is intensifying, not receding.\n\nRisk-based exploratory testing gives you a framework for focusing your limited time on what matters most. However, it only works if the environment cooperates, or if you have the tooling to understand when it doesn't. That's the piece that was missing on my project years ago. It's the piece I'm now helping to build.","https:\u002F\u002Fcdn.prod.website-files.com\u002F69b9da099afc8bd6bb5eaf1f\u002F69dcf8ef3a2568e4c3949f5c_fc06ba48-1f1a-47fa-9733-831d1ccab4a8.jpeg",[193,194],"SRE","AI","2026-04-29","2026-05-03T13:47:55.530Z","2026-05-04T12:52:05.817Z","2026-05-04T12:52:05.828Z",[200,207],{"id":201,"documentId":202,"name":193,"slug":203,"description":12,"createdAt":204,"updatedAt":205,"publishedAt":206},21,"esmbw6j5qnwo3ejuy41sch11","sre","2026-05-03T18:55:16.748Z","2026-05-04T12:52:05.755Z","2026-05-04T12:52:05.762Z",{"id":208,"documentId":209,"name":194,"slug":210,"description":12,"createdAt":211,"updatedAt":212,"publishedAt":213},22,"eqxvy9rdz02zgs1omov67xxo","ai","2026-05-03T19:00:36.648Z","2026-05-04T12:52:05.774Z","2026-05-04T12:52:05.778Z",{"id":121,"documentId":215,"title":216,"slug":217,"excerpt":218,"body":219,"coverImageUrl":220,"readingMinutes":221,"tags":222,"publishedDate":224,"createdAt":225,"updatedAt":226,"publishedAt":227,"coverImage":12,"categories":228},"mjsspze7k9843s31004oy2m5","Claude Code Is Not an SRE Agent","claude-code-is-not-an-sre-agent","AI is great at observing production systems but can't replace SREs because root cause analysis requires system history, institutional knowledge, and human judgment that models lack.","**Claude Code Is Not an SRE Agent**\n\n_Why reading logs at I\u002FO speed is still not the same as understanding production._\n\nEveryone wants the same story right now: if AI can write code, surely the next step is that it can run the systems that code creates. Anthropic's own reliability team just offered a much more useful reality check. At QCon London, Alex Palcuie described Claude as genuinely helpful during incidents, but still a poor substitute for an SRE. Anthropic's own Site Reliability Agent cookbook quietly says the same thing in architectural form: once you move from demo to production, you do not just need a model. You need scoped tools, safety boundaries, runbooks, knowledge capture, and human approval around the model.\n\nThat distinction matters because the market is drifting toward the wrong abstraction. We keep treating \"AI that can code\" as if it naturally extends to \"AI that can run production.\" It does not. Palcuie described incident response as a loop of observe, orient, decide, and act, while explicitly saying AI is fantastic at the observation part. That framing is the whole story. Production incident response is not a log-reading contest. The hardest part is not seeing more data. The hardest part is deciding what the data means.\n\n## Observation Is Not Diagnosis\n\nThis is where the difference becomes painfully obvious.\n\nClaude is extremely good at observation. In Palcuie's examples, it could move through evidence at machine speed, query data quickly, and surface patterns that would have taken a human much longer to find. In one incident, it helped distinguish a cluster of HTTP 500s from what turned out to be an abuse or fraud pattern. That is not trivial. At production scale, the ability to read logs \"at the speed of I\u002FO\" is a real advantage.\n\nBut the more important example is the negative one. During KV cache incidents, Claude repeatedly saw rising request volume and concluded that the system needed more servers. The visible symptom was real. The conclusion was wrong. The actual issue was the broken cache, not a simple capacity shortfall. That is the exact failure mode that makes many AI-for-ops demos look more capable than they are: they confuse correlation with causation, then wrap the mistake in a confident, readable explanation. Palcuie made the point even more directly on postmortems: the model can produce a persuasive story while still being bad at identifying the true root causes.\n\nAt Hyground, we think this is the line the industry needs to draw much more clearly. AI is already very good at the observation layer. It is good at searching, correlating, summarizing, and narrowing the search space. Root cause analysis is something else. It is hypothesis management under uncertainty. It is deciding which signals are upstream, which ones are downstream, and which ones are just noise. That is why the right product goal is guided investigation, not autonomous certainty. Anthropic's own cookbook reflects that same logic: the agent is most effective when it can synthesize across metrics, logs, alerts, and configuration, while keeping remediation inside a structured human-in-the-loop workflow.\n\n## Root Cause Lives in History\n\nPalcuie's most important point may be even simpler: models do not know the history of your system.\n\nAnd in real production environments, history is half the diagnosis. The alert threshold that was relaxed three years ago because of a migration. The service that still depends on an old ownership model no one fully untangled. The config workaround that became permanent. The incident that everyone remembers, but nobody documented properly. None of that lives in today's logs. None of that is obvious from the current dashboard. Yet all of it shapes the meaning of the current failure. Palcuie said this directly: Claude does not know the history of your system, especially when that system has been around for ten years.\n\nThis is why knowledge preservation is not a nice-to-have layer on top of incident AI. It is part of the core system. Anthropic's own SRE cookbook ends up in exactly that place. The generic agent becomes materially more useful once it can follow runbooks, encode institutional procedures as skills, search prior postmortems, and write new postmortem pages into Confluence. That is not \"extra context.\" That is the missing operational memory that turns a plausible story into a useful investigation.\n\nIt is also why we designed Hyground the way we did. Our platform is built to run inside the customer's environment, connect to operational systems like Prometheus, Loki, and Kubernetes, and work with the existing toolchain and knowledge base rather than behave like an isolated chatbot. Hyground's product language already reflects this: natural-language access to infra data, living documentation that reads and writes team knowledge, and a guided assistant that investigates multi-signal events inside the environment where the incident actually happens.\n\n## Jevons Does Not Care About Your Demo\n\nThe strategic takeaway from Palcuie's talk is not just that models still struggle with causality. It is that the demand side of operations is likely to grow, not shrink.\n\nHe explicitly invokes Jevons Paradox: when technology makes something cheaper, we often end up doing more of it, not less. In software, that means AI makes it easier to write code, so organizations write more code, create more services, increase complexity, and end up with more interesting failures. The result is not a world with less on-call. It is a world where the surface area for incidents expands faster than teams can manually reason about it.\n\nThis is the part many AI narratives still miss. AI-assisted development is not just a productivity story. It is also a complexity story. Every gain in generation speed can translate into more services, more dependencies, more deploys, more hidden coupling, and more chances to discover that a system was only \"working\" because nobody had stressed it in exactly this way before. That is why the market for operational intelligence is getting bigger at the same time AI coding tools are getting better. The tools that accelerate change are also increasing the need for tools that can safely understand change in production.\n\n## Do Not Let Scar Tissue Evaporate\n\nPalcuie also raised a concern that every engineering leader should take seriously: skill atrophy.\n\nHe said good SREs carry scar tissue. That is exactly the right phrase. Great incident responders are not just people who know where the logs are. They are people who have seen which dashboards lie, which symptoms repeat, which rollbacks are safe, which \"obvious fixes\" create a second incident twenty minutes later, and which systems only look independent on the architecture diagram. That scar tissue is expensive to build and easy to lose.\n\nThe right role for AI is not to replace that scar tissue. It is to preserve it, spread it, and make it more accessible to the rest of the organization. Let the model do the boring but high-volume work: sift logs, compare deploy history, correlate alerts, summarize state, draft the first postmortem. Let humans own judgment, escalation, tradeoffs, and action under risk. Even Anthropic's own setup separates investigation from remediation and treats the boundary between read-only analysis and write access as a first-class design decision. That is teammate design, not replacement design.\n\n## Anthropic's Architecture Quietly Validates the Right Design\n\nThere is a second signal buried in Anthropic's own materials, and it is probably the most useful one for builders in this space.\n\nTheir official Site Reliability Agent is not \"just Claude Code.\" It is an architecture. It uses MCP-connected tools for metrics, logs, configs, alerts, and deployment history. It scopes access with restricted directories, command allowlists, and validation hooks. It separates investigation from remediation. It supports runbooks and postmortem workflows. It extends into tools like PagerDuty and Confluence. In other words: even Anthropic does not treat operational AI as a model alone. They treat it as a model embedded in a carefully structured operational system.\n\nThat independently validates the direction we believe matters most. The product moat in AI for operations is not \"our model is magical.\" It is whether the system is connected to the right evidence, whether it can retrieve institutional knowledge, whether it can structure an investigation safely, and whether it can act inside the right approval boundaries. Hyground's current architecture points in exactly that direction as well: cluster-resident deployment, local data processing, integration into the existing toolchain, living documentation, and guided investigation instead of blind autonomy.\n\n## The Real Opportunity\n\nSo yes: Claude Code is not an SRE agent.\n\nBut that is not an indictment of Claude Code. It is a statement about the category. SRE work is not coding with different inputs. It is evidence-weighting under uncertainty inside systems shaped by years of technical decisions, organizational compromises, and accumulated operational memory. A model that reads faster is helpful. A system that can investigate safely, retrieve history, preserve team knowledge, and collaborate with humans is what operations teams actually need.\n\nThe winners in this market will not be the companies promising to fire the SRE team. They will be the ones that make every engineer more effective in the first fifteen minutes of an incident, preserve what the team learns in the fifteen hours after it, and free the best SREs to work on harder reliability problems. Anthropic's own experience is one of the clearest public signals yet that this is where the industry is actually heading. If your AI can read logs but cannot separate symptoms from causes, retrieve system history, and operate inside safe workflows, you do not have an SRE agent. You have a very fast observer.","https:\u002F\u002Fcdn.prod.website-files.com\u002F69b9da099afc8bd6bb5eaf1f\u002F69be7021499e1af91869dc61_Hyground-Thumbnail-stock.jpg",8,[223,194,193],"DevOps","2026-04-22","2026-05-03T13:47:55.539Z","2026-05-04T12:52:05.845Z","2026-05-04T12:52:05.855Z",[229,236,237],{"id":230,"documentId":231,"name":223,"slug":232,"description":12,"createdAt":233,"updatedAt":234,"publishedAt":235},23,"jzwk2k7kt8k18ghmv59hl8lq","devops","2026-05-03T19:00:36.652Z","2026-05-04T12:52:05.785Z","2026-05-04T12:52:05.789Z",{"id":208,"documentId":209,"name":194,"slug":210,"description":12,"createdAt":211,"updatedAt":212,"publishedAt":213},{"id":201,"documentId":202,"name":193,"slug":203,"description":12,"createdAt":204,"updatedAt":205,"publishedAt":206},{"id":239,"documentId":240,"title":241,"slug":242,"excerpt":243,"body":244,"coverImageUrl":245,"readingMinutes":117,"tags":246,"publishedDate":247,"createdAt":248,"updatedAt":249,"publishedAt":250,"coverImage":12,"categories":251},57,"q7ij5hg6inwraufmmjtz5n34","Hyground Raises €3M Pre-Seed Round Fueling our Ambitions to Redefine Enterprise IT Operations","hyground-raises-3m","Hyground Raises €3M Pre-Seed Round for its Sovereign SRE Agent for Enterprise IT Operations","Hyground has closed a €3 million pre-seed funding round backed by Partech, Adesso Ventures, Angel Invest, and Plug and Play. The round will accelerate the development of Hyground's Sovereign SRE Agent, an AI system that runs entirely within a customer's own infrastructure and gives every engineer on a team the investigative depth of a seasoned senior SRE.\n\nThe announcement comes at KubeCon Amsterdam, a community at the forefront of the infrastructure challenges the company was built to solve.\n\n## The $400 billion problem hiding in plain sight\n\nPicture a customer mid-checkout on an online store. Cart full, ready to pay. The page goes blank. Items vanish. Within seconds, trust and revenue are gone.\n\nHigh-profile outages like this are only the visible tip. Modern production systems constantly experience smaller incidents, degradations, and silent failures beneath the surface that collectively cost the global economy hundreds of billions of dollars every year.\n\nThe root cause is structural. Modern infrastructure generates enormous volumes of logs, metrics, traces, and events across fragmented tools and systems. Observability platforms surface this data but stop short of solving the problem. Someone still has to correlate the signals, navigate the dashboards, and diagnose the issue under pressure. That process is slow, complex, and entirely dependent on whoever in the building holds the most context.\n\nOperational knowledge accumulates in people, not systems. When those people are unavailable, teams grind to a halt. The alert fires. The engineer who truly understands the system is unreachable. Everyone else is stuck at \"something is wrong\" without a path to \"why.\" An investigation that should take twenty minutes takes five hours.\n\nHyground was built to close that gap.\n\n## What Hyground does\n\nHyground is a Sovereign SRE Agent. It runs inside a customer's own Kubernetes cluster, connects to their existing observability stack, and delivers the same investigative depth as the most experienced engineer on the team to every engineer, around the clock.\n\nWhen an alert fires, Hyground begins its investigation automatically. By the time an engineer checks their phone, the diagnosis is already running. The system correlates signals across Kubernetes events, Prometheus metrics, Loki logs, CI\u002FCD pipelines, deployment history, and internal documentation. It returns not more data to interpret, but a clear answer: what happened, why it happened, and what to do next.\n\nThe platform captures operational patterns and institutional knowledge that typically reside with senior engineers and makes them accessible across the organisation. During incidents it provides structured root cause analysis and recommended remediation steps. Between incidents it automates the repetitive investigative work that consumes a disproportionate share of SRE time every day.\n\n## Built for sovereignty and enterprise trust\n\nMost AIOps products require organisations to send logs, metrics, and traces to a third-party cloud. For regulated industries, critical infrastructure, and public sector organisations, that is not an option.\n\nHyground is self-hosted by default and fully air-gapped where required. It integrates natively with existing infrastructure without architectural changes, operates independently of any specific vendor or tooling ecosystem, and keeps all operational data inside the customer's perimeter.\n\n## Beyond incident response\n\nIncident response is where Hyground earns its place on day one. The scope goes further.\n\nBy connecting to virtually every relevant data source across a production environment, Hyground becomes a continuous intelligence layer for IT operations. Teams can automate the repetitive groundwork that consumes up to 70% of SRE time, turning manual analysis into scheduled or event-driven workflows. This covers cost monitoring, resource optimisation, security operations, and performance analysis across environments. As operational data becomes unified and continuously analysed, the range of what can be automated keeps expanding.\n\n## Proven in production\n\nHyground is already in production at Deutsche Bahn and ifm, a leading German automation company. Both run complex, high-availability systems where downtime carries direct operational and financial consequences. Incident resolution times have dropped by up to 80%.\n\nEngineers previously spending hours resolving incidents are now closing them in minutes. Platform teams buried in support tickets are seeing self-service rates climb. Organisations where a single senior engineer was a critical knowledge dependency are building operational resilience that no longer depends on any one person being available.\n\n\"Hyground runs your software operations so that you can run your business. Our goal is to give every company a virtual SRE team that continuously understands their systems and resolves operational issues before they become downtime.\" — Dominik Rehbock, CEO of Hyground\n\n## What the funding enables\n\nThree million euros is enough to build something serious. The investment from Partech, Adesso Ventures, Angel Invest, and Plug and Play confirms what Hyground has believed from the start: the market for sovereign, enterprise-grade AI operations tooling is real and underserved.\n\nThe funding accelerates work already underway: deepening the agent's investigative and autonomous capabilities, expanding beyond incident resolution into FinOps and SecOps, and building the integrations that make Hyground native to the environments enterprise customers already run.\n\n## What comes next\n\nHyground is at KubeCon Amsterdam this week. For the SREs, platform engineers, engineering managers, and CTOs living with these challenges every day, come find the team.\n\nWith this round closed, Hyground is doubling down on autonomous operations: more agentic, more proactive, working toward a future where AI handles the full operational lifecycle so engineering teams can focus on building.\n\nThe €3M gives Hyground the runway to turn production proof into something repeatable and scalable for enterprises across Europe and beyond. Operational knowledge should not live in people's heads. It should be institutional, available, and reliable for every engineer, at any hour. That is what Hyground is building.","https:\u002F\u002Fcdn.prod.website-files.com\u002F69b9da099afc8bd6bb5eaf1f\u002F69c1707be63c7f65a0f23899_Twitter%20%20X%20Post%20-%20%243M%20Pre-seed%20Round%20Funding%20Announcement.png",[223],"2026-04-15","2026-05-03T13:47:55.545Z","2026-05-04T12:52:05.869Z","2026-05-04T12:52:05.876Z",[252],{"id":230,"documentId":231,"name":223,"slug":232,"description":12,"createdAt":233,"updatedAt":234,"publishedAt":235},{"id":254,"documentId":255,"title":256,"slug":257,"excerpt":258,"body":259,"coverImageUrl":220,"readingMinutes":157,"tags":260,"publishedDate":261,"createdAt":262,"updatedAt":263,"publishedAt":264,"coverImage":12,"categories":265},64,"k5lpvjdqi4zjzkvmft5r9nkk","The AI Treadmill: Why Keeping Up Is the Real Engineering Challenge","the-ai-treadmill","Effective operations require specialized, secure, and centralized agent architectures rather than risky local execution.","Three years in generative AI feels like a decade anywhere else. Since late 2022, when ChatGPT made large language models impossible to ignore, we have been fully immersed in applying this technology to software engineering. The pace has been relentless.\n\n## The evolution no one warns you about\n\nThe early days were promising but rough. The first effective tool most engineers encountered was GitHub Copilot, a product that actually predated the generative AI wave and quickly adopted next-token prediction models to offer a smarter IntelliSense. It could auto-complete the next line or few lines of code. Useful, but limited. You still did the thinking. The AI just typed faster.\n\nThen the landscape shifted. Cursor built an entire editor around a fundamentally different interaction model: you could talk to your codebase, query it in natural language, and get answers that were aware of your full project context. Windsurf, launched by Codeium in late 2024, introduced agentic workflows as the first self-described agentic IDE. Claude Code and Codex arrived almost simultaneously in early 2025, pushing things toward autonomous, multi-step reasoning over entire repositories. Each of these represented a genuine leap, not just an incremental update.\n\nTo stay effective, you had to jump. And jump again. Over three years, we have worked with at least fifteen to twenty different tools and models. Not out of curiosity, but out of necessity. The model that was state-of-the-art in March was outperformed by July. The tool that introduced a breakthrough feature in Q1 was matched or surpassed by a competitor in Q2.\n\n## The cost most teams underestimate\n\nHere is what rarely gets discussed: \"keeping up with this evolution is a full-time job.\" It is not just about swapping one tool for another. Each new model has different strengths, different failure modes, different optimal prompting strategies. Each new tool requires rethinking workflows, re-evaluating guardrails, and re-learning what works.\n\nIn 2023, you needed heavy manual oversight and extensive prompt engineering to get decent code generation. Non-reasoning models could handle data retrieval through natural language but fell apart on anything requiring multi-step analysis. By 2025, agentic architectures and reasoning-capable models changed what was possible entirely. But only if you rebuilt your approach to match.\n\nVery few teams have the bandwidth for this. Most companies adopt one tool, learn its quirks, and stick with it even as it falls behind. Others chase every new release without the depth to extract real value. Both approaches leave significant capability on the table.\n\n## Now the same thing is happening in operations\n\nEverything described above played out in software development. But we are now seeing the exact same pattern emerge in software operations. AI SRE agents, automated incident analysis, intelligent runbooks, model-driven root cause detection. The tools and approaches are evolving just as fast, and the same treadmill applies.\n\nA perfect example played out in just a few months. In November 2024, Anthropic introduced the Model Context Protocol (MCP), an open standard for connecting AI models to external tools and data sources. By mid-2025, MCP had exploded. Server downloads grew from roughly 100,000 to over 8 million. OpenAI, Google, and Microsoft all adopted it. Thousands of MCP servers appeared. For a moment, it looked like the definitive integration layer for AI agents, including for operations use cases.\n\nThen reality set in. Security researchers found serious vulnerabilities across the MCP ecosystem: seven CVEs in a single month, path traversal risks in 82% of analyzed implementations, prompt injection attacks through tool poisoning. At the same time, developers and SREs started connecting local agents via MCP to production environments, interacting with Kubernetes clusters, cloud CLIs, and infrastructure tooling. Powerful, but risky. MCP effectively escalates the user's own permissions to the model, with limited guardrails.\n\nAnd now, just a few months later in early 2026, the narrative is already shifting. Many people still talk about MCP as the standard. But among practitioners who are pushing the boundaries, a different consensus is forming: \"models work best not with custom protocol interfaces but with the interfaces that are already deeply represented in their training data.\" CLI tools and shell execution. A single MCP server can consume tens of thousands of tokens in schema overhead before a model even starts working. A CLI command costs a fraction of that and leverages knowledge the model already has from billions of lines of terminal interactions in its training corpus.\n\nThis shift took months, not years. MCP dominated the conversation throughout 2025. By late 2025, the CLI narrative started gaining traction. By March 2026, it is overtaking MCP in practice among the teams that move fastest. Most people have not caught up yet. This is the treadmill in action, now in operations.\n\n## Why operations requires a different setup\n\nWhether engineers use MCP or CLI, the current approach to AI-assisted operations typically means running agents locally. On your own machine, with your own credentials, under your own supervision. For software development, that works. You are writing code, reviewing diffs, running tests. The agent operates in your development environment and you are there to watch it.\n\nOperations is fundamentally different. Incidents do not wait until someone is at their desk. Root cause analysis should not depend on an individual engineer's machine being online. Production access needs organizational controls, audit trails, and permission boundaries, not a developer's personal credentials passed through to a model. And the security risks that are already concerning in development become unacceptable in production: open-ended tool access, privilege escalation, unvetted integrations running against live infrastructure.\n\nLocal agent execution simply does not meet the requirements of production operations.\n\n## Where Hyground stands out\n\nThis is exactly where Hyground comes in. We have spent three years on the AI treadmill, tracking every shift in models, tools, architectures, and integration patterns. We do this so our customers do not have to.\n\nHyground connects to any large language model. Our architecture is built to adapt, so we can leverage the most powerful model capabilities available at any given moment. When reasoning models unlocked genuine incident analysis, we integrated them. When agentic architectures matured enough for reliable multi-step operations workflows, we adopted them. When the integration landscape shifted from MCP to CLI-native tooling, we were already evaluating and adjusting.\n\nBut staying current on models is only half of it. Hyground is purpose-built for operations, which means we solve the problems that local agent setups cannot. We provide curated, security-vetted integrations rather than open-ended tool access. We enforce permission boundaries and runtime controls that prevent privilege escalation. We track CVEs across the agent and tooling ecosystem. And Hyground operates independently of any individual engineer's machine, which means incident analysis, monitoring, and operational tasks run continuously, not only when someone happens to be at their desk.\n\nOur customers do not need to track which model handles root cause analysis best this quarter, or whether the integration layer they built six months ago is already a security liability. We do that. We evaluate, we benchmark, we adjust, and we ship the improvements directly into the platform.\n\n## The bottom line\n\nThe pace of AI development is not slowing down. The treadmill that has been running in software development for three years is now running in operations. The organizations that extract real, compounding value from AI in their operational workflows will be the ones that either invest heavily in staying at the frontier, or partner with someone who already does.",[194,223],"2026-04-08","2026-05-03T13:47:55.551Z","2026-05-04T12:52:05.893Z","2026-05-04T12:52:05.899Z",[266,267],{"id":208,"documentId":209,"name":194,"slug":210,"description":12,"createdAt":211,"updatedAt":212,"publishedAt":213},{"id":230,"documentId":231,"name":223,"slug":232,"description":12,"createdAt":233,"updatedAt":234,"publishedAt":235},{"id":269,"documentId":270,"title":271,"slug":272,"excerpt":273,"body":274,"coverImageUrl":220,"readingMinutes":117,"tags":275,"publishedDate":276,"createdAt":277,"updatedAt":278,"publishedAt":279,"coverImage":12,"categories":280},51,"yy4w0298kyosxy94j195wapb","Observability Won't Save You at 3 A.M","observability-wont-save-you","Shifting focus from 'full visibility' to automated reasoning and actionability reduces the manual burden on engineers.","The SRE industry keeps telling itself: invest enough in observability and your operations problems go away. The tooling vendors love this story. Spend a few million euros a year on their platforms and you get \"full visibility.\" Except visibility is not the same as resolution.\n\nMany years working in Observability and Operations has taught me one of many things: observability is one step in a three-step problem, and almost nobody talks about the other two. The three steps are observability, interpretability, and actionability.\n\n## Observability\n\nThe majority of the industry still rests on the \"three pillars\" of observability: metrics, logs, traces. You instrument your services, you collect your data, you build dashboards, you tune alerts. And you spend a fortune doing it. With that mindset, Observability has become one of the biggest cost sinks in engineering. Companies have invested millions into instrumentation, into massive data platforms, into teams maintaining all of it.\n\nThe promise was always: with enough data, you can solve your operations problems. Here is where it falls apart. \"Observability, as it's implemented by many today, tells you what your system is doing and if something is wrong. It does not tell you why\" and what that data means when things go wrong. That part is on you.\n\n## Interpretability\n\nIt's 3 A.M., an alert fires. Maybe a hundred alerts fire. You get out of bed, open your laptop, and there it is: millions of log lines, 10 million metrics, dozens of dashboards, a wall of noise. Some of those alerts relate to the actual issue. Some don't. Good luck figuring out which is which.\n\nThis is interpretability. Going from raw signals to actually understanding what is wrong. And it is almost entirely manual. Your observability platform gives you the data, maybe a nicer query language, maybe some anomaly detection that fires so often nobody trusts it anymore. But connecting the dots, finding the root cause, understanding why the system is broken right now: that is left to the engineer. One person, half awake, under pressure, hunting for a needle in a haystack.\n\nIf you apply observability really, really well, not the three-pillars version but proper event-driven, high-cardinality observability with carefully structured data, you can get close to solving this. An experienced engineer in a well-instrumented system can find root causes fast. But \"experienced\" is doing heavy lifting there, and in practice almost no company gets to that level. Most teams are stuck with someone spending thirty minutes to two hours digging through data while the incident bleeds out.\n\n## Actionability\n\nSay the engineer finds the root cause. Great. Now what?\n\nObservability cannot help here. By design, observability is completely separated from the systems it monitors. It has no connection to the APIs, the infrastructure, the deployment pipelines, the configuration endpoints where you actually make changes. Your dashboard can tell you a pod is crash-looping. It will not roll back the deployment that caused it. Your logs can show you a bad environment variable. They will not fix it.\n\nThe engineer has to leave the observability tool entirely. Open kubectl, or the cloud console, or the CI\u002FCD pipeline. Figure out the right remediation. Assess the risk. Execute the fix. That is actionability, and observability will never get there. It is architecturally incapable of it.\n\n## So Where Does That Leave Us?\n\nObservability will never reach step three. It barely scratches the surface of step two. It generates huge volumes of data at huge cost and then leaves the engineer to do the hard parts: interpreting what the data means and acting on what they find.\n\nWhen production is broken at 3 A.M., you don't want step one. You want the issue resolved. You want to wrap it up, get back to bed, know the system is healthy. That means you need all three steps, and observability only covers the first.\n\nThis is the direction we're building at Hyground. We take the flood of signals your observability stack already produces and give you an interpretation of what is actually wrong. A root cause. A hypothesis for remediation. And very soon, the ability to apply that fix directly, or at the very least guide the engineer to the exact point where one click resolves it.\n\nBecause at the end of the day, nobody cares how many metrics you collected. They care whether the problem gets fixed.",[223,193],"2026-04-01","2026-05-03T13:47:55.556Z","2026-05-04T12:52:05.912Z","2026-05-04T12:52:05.919Z",[281,282],{"id":230,"documentId":231,"name":223,"slug":232,"description":12,"createdAt":233,"updatedAt":234,"publishedAt":235},{"id":201,"documentId":202,"name":193,"slug":203,"description":12,"createdAt":204,"updatedAt":205,"publishedAt":206},{"id":284,"documentId":285,"title":286,"slug":287,"excerpt":288,"body":289,"coverImageUrl":220,"readingMinutes":290,"tags":291,"publishedDate":292,"createdAt":293,"updatedAt":294,"publishedAt":295,"coverImage":12,"categories":296},65,"g2uak93dcgsf6hokjv6el6hf","The Silent Killer of Your Engineering Culture: Why the 3 AM Call Destroys More Than Just Sleep","the-silent-killer","Anticipatory stress and poor incident context create a toxic cycle of brain drain and high recruiting costs.","## The Alarm Response - a Biological Perspective\n\nThere is one sound that instantly sends every experienced sysadmin, DevOps engineer, or SRE into a biochemical state of emergency: the specific ringtone of their pager app.\n\nWhen that signal cuts through the silence at 3:14 AM, far more than just waking up happens inside your employee's body. Within milliseconds, adrenaline floods the system, cortisol levels spike, heart rate doubles.\n\nFrom an evolutionary standpoint, the body's response is comparable to facing a saber-toothed tiger; in reality, the engineer is staring at a brightly lit screen showing cryptic error codes from a Kubernetes cluster. This massive gap between the physical stress response (\"fight or flight\") and the required cognitive performance (\"analyze and fix\") is the toxic core of modern on-call duty.\n\nWe often discuss the technical impact of incidents, yet we overlook the physiological cost they cause. We are not burning out our best people through too many complex tasks during the day - we are grinding them down through the biochemical stress of the night.\n\n## The Phenomenon of Anticipatory Stress\n\nThe real problem often starts _before_ the alarm goes off. In occupational psychology, this phenomenon is known as **\"Vigilance Decrement\"** or anticipatory stress.\n\nAn SRE on call sleeps differently. They sleep lighter. The subconscious remains in a kind of \"standby mode\", always ready to react to a signal. Studies show that \"the mere knowledge of a potential interruption reduces sleep recovery quality by up to 40%\" - even on nights when the phone stays silent.\n\nThis background noise of anxiety has massive consequences for personal life. The laptop must always be nearby; the weekend trip is planned with the caveat \"hopefully nothing happens\". For the employee, this means: physically present, but mentally never truly free.\n\nIn smaller teams, this pressure intensifies because rotation cycles are shorter. In larger teams, it often leads to diffused responsibility because nobody wants to be the one carrying the firefighter on-call shift.\n\n## The Downward Spiral: From Fatigue to Cynicism\n\nWhen stress becomes chronic, it follows a predictable pattern that we observe in many engineering organizations:\n\n1. **The cognitive dip:** After a night with an alert (or even just poor sleep from the tension), problem-solving capability drops the following day.\n2. **The error rate:** Tired engineers make careless mistakes. A sloppy config change during the day leads to the next incident at night. The vicious cycle begins.\n3. **Alert fatigue:** When monitoring cries \"fire\" too often (especially with false positives), a dangerous defense mechanism kicks in: numbness. Warnings are ignored or dismissed.\n4. **Quiet quitting:** At some point, stress gives way to cynicism. The former \"hero feeling\" of having saved the server is replaced by the question: \"Why am I putting myself through this?\"\n\n\"Hero culture\" - the belief that nightly firefighting is just part of the job - is not a sign of commitment. It is a sign that system stability is being maintained at the expense of individual people. That is not heroic - it is unscalable.\n\n## The Hard Cost of Soft Factors\n\nAs an engineering or platform lead, you might think: \"That comes with the territory.\" But let us look at the math:\n\nBurnout in the tech sector is not a matter of personal sensitivity - it is a significant business risk.\n\n- **Brain drain:** The senior engineers are usually the first to leave - the carriers of implicit knowledge. They effortlessly find jobs at companies that manage their on-call load better.\n- **Loss of tribal knowledge:** When the expert who was the only one who knew why the legacy service acts up under peak load leaves, that knowledge is gone for good. No static documentation can compensate for that.\n- **Recruiting costs:** The cost of replacing a senior SRE position (headhunters, 6-month ramp-up phase) far exceeds the investment in better tooling.\n\nThe equation is simple: poor or ineffective incident management causes the highest costs through employee turnover.\n\n## Technology as a Shield: Control Reduces Stress\n\nHow do we break this cycle? We cannot prevent system failures 100% of the time. But we can change how the failure feels for the person on call.\n\nPsychologically, stress is primarily caused by **loss of control**. The feeling of fumbling in the dark at 3 AM, not knowing where to look, while your manager breathes down your neck - that is the strongest driver of burnout.\n\nThis is where **Hyground** comes in. We see our platform not just as an efficiency tool, but as a \"cognitive shield\" for your teams.\n\n### Clarity Replaces Information Overload\n\nInstead of sending the employee on a frantic search, Hyground delivers context proactively. When the SRE opens the laptop, they do not just see \"Error 500\" - they see the causal chain: _\"Service A is affected. Last change 4 hours ago on Component B. Similar pattern observed 3 weeks ago.\"_\n\nThis immediate situational awareness gives the engineer control back. Their pulse drops because the \"enemy\" (the root cause) becomes visible and tangible.\n\n### AI Copilot as a Partner, Not a Replacement\n\nOur server-side AI handles the grueling work of log correlation before the human has even logged in. It says: \"I scanned 10 million log lines, here are the 3 relevant ones.\"\n\nThis transforms the task from a panicked search into a guided decision. Uncertainty shrinks to a minimum - and that is exactly what brings the team to resolution significantly faster.\n\n## Conclusion: Duty of Care Is Business Strategy\n\nThe era of relying on exhausted admins as stability guarantors is over. Given growing complexity, the cognitive health of your team is your decisive competitive advantage.\n\nModern incident management does not just optimize server uptime - it also protects the resilience of the people who run them. When you give your employees tools that transform chaos into context, you are not just building stable systems - you are building a team that stays.\n\n**Let us talk about how we can reduce the stress level in your on-call rotation. Not just for the metrics, but for your people.**",7,[193,223],"2026-03-25","2026-05-03T13:47:55.562Z","2026-05-04T12:52:05.932Z","2026-05-04T12:52:05.939Z",[297,298],{"id":201,"documentId":202,"name":193,"slug":203,"description":12,"createdAt":204,"updatedAt":205,"publishedAt":206},{"id":230,"documentId":231,"name":223,"slug":232,"description":12,"createdAt":233,"updatedAt":234,"publishedAt":235},{"id":300,"documentId":301,"title":302,"slug":303,"excerpt":304,"body":305,"coverImageUrl":220,"readingMinutes":157,"tags":306,"publishedDate":308,"createdAt":309,"updatedAt":310,"publishedAt":311,"coverImage":12,"categories":312},58,"wi9s214iexop6bxc7opcm4sw","Stop Shouting at Your LLM","stop-shouting","Effective steering relies on high-signal structure and hierarchical clarity rather than aggressive, loud wording.","When we first built our agent system, we leaned heavily on emphasis markers in the system prompt: \"IMPORTANT,\" \"CRITICAL,\" \"URGENT.\" Over time the prompt accumulated more of these markers as we tried to correct edge cases and failures.\n\nIn practice, this didn't scale well. As the prompt got \"louder,\" it became harder to maintain a clear priority order.\n\nThis is **emphasis saturation**: when too many instructions are highlighted, the highlights stop helping. The prompt becomes more forceful in tone, but less clear in structure.\n\n## What we observed in production\n\nWe saw a consistent pattern:\n\n* Adding more emphasis (\"URGENT,\" \"ULTRA IMPORTANT,\" \"!!!,\" bold, repeated warnings) did not reliably improve compliance.\n* Over time, emphasis markers lost impact. We kept escalating wording because earlier escalations stopped working.\n\nWe do not have a clean quantitative measurement for this - this is an operational lesson from iterating on real prompts with real users.\n\n## Why it happens\n\nA useful mental model is that LLMs have a finite capacity to focus on what matters. When you add attention magnets, they compete with the actual instructions. If too many things look \"critical,\" the model has to resolve conflicts rather than execute.\n\nAnthropic's post about effective context engineering describes this idea as an \"attention budget\" for context: everything you add competes for limited capacity, so high-signal structure tends to outperform loud wording.\n\nThis is not a claim that \"bold costs X attention units.\" It is a practical explanation for what we observed: shouting reduces clarity, and clarity is what models need.\n\n## Attention is not the same as token count\n\nThis is where teams often overcorrect. The goal is not \"short prompts.\" The goal is **low conflict**.\n\nA short prompt with competing priorities can perform worse than a longer prompt with a clear hierarchy. In practice:\n\n* Few tokens with many \"must\" rules that pull in different directions creates conflict.\n* A bit more context with one primary objective and supporting constraints often works better.\n\nGoogle Gemini's prompt design strategies recommend being precise and direct, and avoiding unnecessary or overly persuasive language.\n\n## You spend the user's steering room\n\nThe highest cost we saw was not raw model quality. It was loss of **steering capacity**.\n\nIf the system prompt uses every emphasis trick available, all caps, bold, repeated \"CRITICAL\" markers, it dominates the instruction landscape. Then the user arrives with normal language and normal constraints, and their guidance has little chance to compete.\n\nThis compounds when you integrate external systems. Every MCP server and third-party integration comes with its own instructions and constraints. The more external sources you pull in, the louder the baseline becomes, and the harder it is for the user to steer final behavior.\n\n## What worked for us\n\nWe got better results by shifting from \"louder\" to \"clearer\":\n\n* Prefer structure over emphasis. Use clear sectioning and delimiters (for example XML tags) to separate context, tasks, constraints, and outputs.\n* Make priorities explicit. If there is one rule that dominates, state it once, plainly, early.\n* Reduce conflict. Remove redundant constraints and overlapping requirements.\n* Use emphasis sparingly. If you use it at all, reserve it for exactly one rule, after you have fixed structure and hierarchy.\n* Treat prompt tweaks as finite leverage. If repeated iterations do not fix a failure mode, it is often architectural (tooling, retrieval, guardrails, decomposition), not vocabulary.\n\nApplying these principles helped us get better results at Hyground.",[194,307],"Prompt","2026-03-18","2026-05-03T13:47:55.568Z","2026-05-04T12:52:05.952Z","2026-05-04T12:52:05.957Z",[313,314],{"id":208,"documentId":209,"name":194,"slug":210,"description":12,"createdAt":211,"updatedAt":212,"publishedAt":213},{"id":315,"documentId":316,"name":307,"slug":317,"description":12,"createdAt":318,"updatedAt":319,"publishedAt":320},24,"imwf4avfvlndsptdx9t64u7l","prompt","2026-05-03T19:00:36.656Z","2026-05-04T12:52:05.797Z","2026-05-04T12:52:05.801Z",{"id":125,"documentId":322,"title":323,"slug":324,"excerpt":325,"body":326,"coverImageUrl":220,"readingMinutes":117,"tags":327,"publishedDate":328,"createdAt":329,"updatedAt":330,"publishedAt":331,"coverImage":12,"categories":332},"j25z9jqswez98eeqrcoxmlp6","Agentic Behavior: How to Build Reliable AI Agents for Operations","agentic-behaviour","Successful investigation requires autonomous agents that reason and adapt through iterative loops.","When we started building AI systems for incident analysis at Hyground, we made a classic mistake: we wrote incredibly detailed prompts. Step-by-step instructions. Decision trees. Edge case handling. The prompts got longer. The results got worse.\n\nIt took us a while to understand why.\n\n## The Maze Problem\n\nIncident analysis isn't like following a recipe. It's like navigating a maze. You don't know which path leads to the root cause until you've explored it. Each discovery changes where you should look next. The number of steps is unknown, and **the next step depends entirely on what you just learned**.\n\nThis is where traditional prompting breaks down. A prompt runs _before_ you've learned anything. You can't write instructions for a maze you haven't seen yet.\n\nThis is also where AI agents come in.\n\n## What Makes an Agent Different\n\nA chatbot is reactive: prompt in, response out, done. An agent runs in a loop.\n\nThe pattern, called ReAct (Reasoning + Acting), looks like this:\n\n_Chatbot: one pass, path decided upfront. Agent: multiple passes, path discovered at runtime._\n\nThe agent queries a tool, interprets the result and decides what to do next. It might check logs, find something suspicious, query a related service, discover a dependency and trace that upstream. All without being told the specific steps in advance.\n\nAgents handle \"open-ended problems where it's difficult or impossible to predict the required number of steps.\" If you could write out all the steps, you'd have a workflow. When you can't, you need an agent.\n\n## An Agent Investigation in Practice\n\nHere's a simplified example from our system. A monitoring alert fires: API latency spiked to 2.3s.\n\n| Step | Think | Act | Observe |\n|------|-------|-----|---------|\n| 1 | Latency spike—check which endpoints are affected | Query metrics API | \u002Fcheckout endpoint, others normal |\n| 2 | Single endpoint—could be downstream service or database | Check checkout service logs | Connection timeouts to payment service |\n| 3 | Payment service issue—check its health | Query payment service metrics | Healthy, low latency |\n| 4 | Payment service fine but connections timing out—network or connection pool? | Check connection pool metrics | Pool exhausted, 0 available connections |\n| 5 | Pool exhausted—what's holding connections? | Query active transactions | 47 transactions stuck waiting on fraud_check |\n| 6 | Found it—why is fraud_check hanging? | Check fraud service | Deployed 23 min ago, new version has deadlock |\n\n**Root cause:** A deployment introduced a deadlock. Six steps, path unpredictable from the start.\n\nA prompt can't encode \"if payment service is healthy but connections timeout, check connection pools, then trace what's holding them.\" Each step depends on what the previous one revealed.\n\n## Why Longer Prompts Make Things Worse\n\nHere's what surprised us: the more detailed our prompts got, the worse the results became. Three reasons:\n\n**1. You're encoding an algorithm for a problem that doesn't have one**\n\nComplex investigation isn't algorithmic. When you write a 2000-word prompt trying to cover every scenario, you're essentially telling the LLM \"here's exactly how to solve this\" for a problem that requires exploration and judgment. The LLM follows your rigid instructions instead of reasoning about what it actually observes.\n\n**2. Attention is a budget, and you're spending it on instructions**\n\nLLMs don't treat all text equally. Research shows they recall information at the beginning and end of prompts better than the middle. Every \"IMPORTANT\" and \"CRITICAL\" you add to steer attention actually _drains_ the attention budget. With a multi-page prompt, the model struggles to focus on what matters in the actual problem.\n\n**3. Context windows aren't as big as they seem**\n\n87% of context window was consumed by tool definitions alone. Add a verbose prompt, intermediate results from each tool call and you've exhausted your usable context before the interesting work begins.\n\nA 2024 study shows that \"reasoning performance degrades around 3,000 tokens.\" This is well below technical limits. Context is precious working memory, not unlimited storage. Since the publication the performance degradation has improved a lot, but the principle still holds true. Coding agents for example force context compaction, well before hitting the actual context limit.\n\n## What Actually Works\n\nThe key insight: **don't tell the agent how to solve the problem. Give it the tools and let it reason.**\n\nShort, focused prompts that define the goal and constraints. Not the algorithm. The agent figures out the path.\n\nThis requires:\n\n* **Tool access**: APIs, databases, log systems the agent can query\n* **Clear stopping conditions**: When is the task complete?\n* **Guardrails**: What actions require human approval?\n\nFor incident analysis, this means giving the agent access to observe logs, query metrics and trace dependencies. Then letting it investigate. It might take 3 steps or 30. The agent decides based on what it finds.\n\n## The Limits Are Real\n\nWe're not claiming agents solve everything. Current LLMs like Opus 4.5, Gemini 3 and GPT-5.x are _on the brink_ of handling complex investigation tasks reliably. They excel at information gathering and iterative refinement. They struggle with truly novel reasoning.\n\nReliability is an ongoing challenge. The same prompt can produce different results on different runs. Errors in early steps compound downstream. Production systems need comprehensive observability and human oversight for high-stakes decisions.\n\n## What's Next\n\nSingle-agent architectures hit a ceiling when problems get big. Context fills up. The investigation scope exceeds what one agent can track.\n\nThe solution is multi-agent systems. Specialized agents that collaborate, each with focused context and clear responsibilities. We'll cover the patterns for orchestrating multiple agents in our next posts.",[194,193],"2026-03-11","2026-05-03T13:47:55.574Z","2026-05-04T12:52:05.970Z","2026-05-04T12:52:05.975Z",[333,334],{"id":208,"documentId":209,"name":194,"slug":210,"description":12,"createdAt":211,"updatedAt":212,"publishedAt":213},{"id":201,"documentId":202,"name":193,"slug":203,"description":12,"createdAt":204,"updatedAt":205,"publishedAt":206},{"id":336,"documentId":337,"title":338,"slug":339,"excerpt":340,"body":341,"coverImageUrl":220,"readingMinutes":117,"tags":342,"publishedDate":343,"createdAt":344,"updatedAt":345,"publishedAt":346,"coverImage":12,"categories":347},66,"uixexmfe5bancmwxt8fmiska","The Hidden Token Drain: How Intermediate Results Bloat Your AI Agent's Context","the-hidden-token-drain","Multi-step AI workflows often waste tokens by passing large intermediate tool results through the model's context.","Building on our previous exploration of MCP's context consumption challenges, this post examines a less obvious but equally expensive problem: intermediate results in multi-step tool workflows.\n\n## Quick Recap: What Is MCP?\n\nThe Model Context Protocol (MCP) is an open standard that lets AI agents connect to external tools: databases, APIs and file systems through a unified interface. Instead of building custom integrations for each tool, developers implement MCP once and unlock an entire ecosystem.\n\nBut as we covered in our first post, MCP tool definitions consume context. If you do not carefully manage the context window, you can easily consume the majority of your context on tool definitions, leaving little attention for the user's prompt.\n\nToday's problem is different: **what happens when those tools actually run?**\n\n## How Tool Calls Actually Work\n\nBefore diving into the problem, let's understand the mechanics. Cloudflare's engineering team provides a clear explanation in their Code Mode post.\n\nWhen an LLM wants to invoke a tool, it outputs special tokens that signal \"this is a tool request.\" These tokens don't have textual equivalents: the LLM is trained to produce them when it wants to call a tool. The agent harness intercepts these tokens, executes the tool, and feeds the result back into the LLM's context window using another special token sequence.\n\n```\nUser: \"What's the weather in Austin?\"\n\nLLM output:\nI will use the Weather MCP server to find out the weather.\n\u003Ctool_call>{\"name\": \"get_current_weather\", \"args\": {\"location\": \"Austin, TX\"}}\u003C\u002Ftool_call>\n\n[Agent executes tool, returns result]\n\n\u003Ctool_result>{\"location\": \"Austin, TX\", \"temperature\": 93, \"conditions\": \"sunny\"}\u003C\u002Ftool_result>\n\nLLM continues:\nIt's 93°F and sunny in Austin.\n```\n\nThis works well for single tool calls. The problem emerges when tools need to chain.\n\n## The Intermediate Results Problem\n\nConsider this scenario from Anthropic's Code Execution with MCP post: _\"Download my meeting transcript from Google Drive and attach it to the Salesforce lead.\"_\n\nYour agent needs to:\n\n1. Fetch the document from Google Drive\n2. Pass its contents to the Salesforce API\n\nHere's what happens:\n\n```\nTOOL CALL: b.getDocument(documentId: \"abc123\")\n   → Returns full transcript content\n    (This entire output enters the LLM's context window)\n\nTOOL CALL: salesforce.updateRecord(\n      objectType: \"Lead\",\n      recordId: \"00Q5f...\",\n      data: { \"Notes\": [full transcript content written out again] }\n   )\n```\n\nThe transcript flows through the LLM's context window **twice**. The model reads the entire document just to copy it to the next tool call. For lengthy documents like a 2-hour meeting transcript, this can mean processing tens of thousands of additional tokens. For even larger documents, this may exceed context limits entirely, breaking the workflow.\n\n### The Pattern Scales Poorly\n\nReal-world agent workflows often involve more than two steps. Each step forces the **entire intermediate result** through the model's context window, even when the LLM only needs a summary or a subset. You're paying for tokens that serve no reasoning purpose: they're just being copied from point A to point B.\n\nAdditionally, models are more likely to make mistakes when copying large documents or complex data structures between tool calls.\n\n### Why Not Write a Combined Tool?\n\nYou might think: \"Just create a tool that handles both steps internally.\"\n\nThis approach doesn't scale well. With many tools that might chain together in various combinations, you'd need many combination tools. Your MCP server becomes harder to maintain, and the tool definitions themselves consume the context you're trying to save.\n\n## Solutions: Keeping Intermediates Out of Context\n\nBoth Anthropic and Cloudflare have converged on the same insight: **let the LLM write code instead of making tool calls directly**.\n\n### Code Execution as Orchestration\n\nInstead of the LLM invoking tools one-by-one through special tokens, it writes a script that orchestrates the entire workflow. The script runs in a sandboxed environment, calling tools via API bindings. Only the **final result** enters the LLM's context.\n\n```\n# LLM generates this code:\ntranscript = await gdrive.get_document(\"abc123\")\nawait salesforce.update_record(\n   object_type=\"Lead\",\n   record_id=\"00Q5f...\",\n   data={\"Notes\": transcript}\n)\nprint(\"Lead updated successfully\")\n```\n\nThe transcript never touches the LLM's context window. It flows from Google Drive to Salesforce entirely within the execution environment. The model only sees the final output.\n\nThis approach leverages a key strength: LLMs have seen enormous amounts of real-world code in their training data. Tool calling, by contrast, relies on synthetic training data created specifically to teach the model a format it has rarely encountered.\n\n### Programmatic Tool Calling\n\nAnthropic's Programmatic Tool Calling formalizes this pattern. Tools are marked with **`allowed_callers: [\"code_execution\"]`**, enabling them to be invoked from within a sandboxed Python environment.\n\nWhen the code calls a tool, the result is processed by the script and not the model:\n\n* Average token usage dropped from 43,588 to 27,297 tokens (37% reduction) on complex research tasks\n* Internal knowledge retrieval accuracy improved from 25.6% to 28.5%\n* GIA benchmark scores improved from 46.5% to 51.2%\n\n### Illustrative Example: Budget Compliance Check\n\nLet's look at a concrete example: _\"Which team members exceeded their Q3 travel budget?\"_\n\n**Traditional approach:**\n\n```\nFetch team members                              →  Tool result enters context\nFor each member, fetch Q3 expenses              →  All expense line items enter context\nFetch budget limits                             →  More context consumption\nLLM manually sums and compares each person      →  Error-prone, slow\n```\n\n**With code execution:**\n\n```\nteam = await get_team_members(\"engineering\")\nexpenses = await asyncio.gather(*[\n   get_expenses(m[\"id\"], \"Q3\") for m in team\n])\nbudgets = {level: await get_budget(level) for level in set(m[\"level\"] for m in team)}\n\nexceeded = [\n   {\"name\": m[\"name\"], \"spent\": sum(e[\"amount\"] for e in exp), \"limit\": budgets[m[\"level\"]]}\n   for m, exp in zip(team, expenses)\n   if sum(e[\"amount\"] for e in exp) > budgets[m[\"level\"]][\"travel_limit\"]\n]\nprint(json.dumps(exceeded))\n```\n\nThe LLM sees only the filtered final result: not every expense line item processed along the way.\n\n### Token Caching: A Complementary Strategy\n\nToken caching helps when the same tool definitions or prompts appear across multiple requests.\n\nHowever, caching doesn't solve the intermediate results problem: it addresses repeated static content, not the dynamic data flowing between tools. Use both strategies together.\n\n### File-Based Intermediate Storage\n\nFor workflows where the LLM needs to inspect intermediate results selectively, consider writing them to files:\n\n```\n# Write large result to file\nwith open(\"\u002Fworkspace\u002Fdata.json\", \"w\") as f:\n   json.dump(large_result, f)\n\n# LLM can now use file tools to read specific portions\n# Only what's actually needed enters context\n```\n\nThis pattern works especially well when combined with tools like **`jq`** for JSON processing or standard text utilities for filtering: letting the agent extract exactly what it needs.\n\n## Implementation Considerations\n\n### Sandboxing\n\nRunning LLM-generated code requires secure isolation. Cloudflare's approach uses V8 isolates, which they describe as \"far more lightweight than containers\": an isolate can start in a handful of milliseconds using only a few megabytes of memory. Other options include containers or serverless functions.\n\n### Privacy Benefits\n\nCode execution can enhance privacy. Intermediate results stay in the sandbox by default: sensitive data never enters the model's context unless explicitly logged. The MCP client can even tokenize PII before it reaches the model, detokenizing only when writing to approved destinations. Production deployments should include additional hardening: resource limits, network isolation, and regular security audits of the execution environment.\n\n### When To Use Each Approach\n\n**Traditional tool calling** works well for:\n\n* Simple single-tool invocations\n* Tasks where the LLM needs to reason about intermediate results\n* Quick lookups with small responses\n\n**Code execution** is beneficial when:\n\n* Processing large datasets where you need aggregates or summaries\n* Running multi-step workflows with dependent tool calls\n* Filtering or transforming results before the LLM sees them\n* Parallelizing operations across many items\n\n## Conclusion\n\nThe intermediate results problem becomes visible when agents move beyond simple single-tool queries to complex, multi-step workflows.\n\nThe key insight from both Anthropic and Cloudflare: **LLMs don't need to see data they're not reasoning about**. When a tool result is just passing through to another tool, keep it in an execution environment where code can handle the transfer.\n\nAs agents take on more complex workflows, managing context efficiently becomes critical. The combination of on-demand tool discovery and code-based orchestration provides building blocks for agents that can scale.",[194,307],"2026-03-04","2026-05-03T13:47:55.579Z","2026-05-04T12:52:05.988Z","2026-05-04T12:52:05.994Z",[348,349],{"id":208,"documentId":209,"name":194,"slug":210,"description":12,"createdAt":211,"updatedAt":212,"publishedAt":213},{"id":315,"documentId":316,"name":307,"slug":317,"description":12,"createdAt":318,"updatedAt":319,"publishedAt":320},{"id":86,"documentId":351,"title":352,"slug":353,"excerpt":354,"body":355,"coverImageUrl":220,"readingMinutes":157,"tags":356,"publishedDate":357,"createdAt":358,"updatedAt":359,"publishedAt":360,"coverImage":12,"categories":361},"imq36sly7qfch0qxqbyhc928","Why 87% of Your Prompt Isn't Your Prompt","why-87-of-your-prompt-isnt-your-prompt","Loading every available tool definition upfront causes significant performance degradation and wastes the model's limited attention budget.","When OpenAI introduced function calling in June 2023, it felt like the missing piece for building useful AI agents. Finally, LLMs could interact with the real world. But anyone who shipped production systems quickly learned the truth: it was finicky. You had to manage the tool call loop yourself, handle errors gracefully, and hope the model picked the right function from your carefully crafted definitions.\n\nThen came MCP.\n\nIn November 2024, Anthropic open-sourced the Model Context Protocol: a universal adapter for connecting LLMs to external systems. Instead of building N×M custom integrations (N applications × M data sources), you build N+M: each application implements the MCP client once, each tool implements the server once, and everything interoperates.\n\nWithin a year, MCP achieved something rare: cross-competitor adoption. OpenAI, Google, and Microsoft all support it. SDKs exist for Python, TypeScript, Go, Rust, and more. The community has built thousands of servers covering everything from GitHub to Salesforce to local filesystems.\n\n## The MCP Protocol\n\nMCP servers allow LLMs to discover and use tools at runtime. For every tool, you get a description and information about the input- and output-formats. The LLM uses this information to select the appropriate tool for the current task.\n\nThis allows you to dynamically implement tools and provide them to the LLM. But while MCP solves the problem of tool discovery, it introduces a new one: context composition.\n\nHere's what happens when you invoke an MCP-enabled agent: the context window gets composed of three parts: The user prompt, your system instructions, and all connected MCP tool definitions.\n\nAt Hyground, we connected our AI Ops agents to the tools they needed: log and metric analysis, documentation integration, infrastructure provider integration, and more.\n\n## The 87% context that is not your prompt\n\n87% of our context was MCP tool definitions. 11.4% was instructions. The user's actual prompt? 1.6%.\n\nThis isn't a Hyground-specific problem. The MCP specification requires all tool definitions to be loaded upfront. There's no native mechanism for semantic filtering or lazy loading. Every connected server dumps its full schema into context before the LLM sees a single user token.\n\nThe consequences compound. Major clients have imposed hard limits: Cursor caps at 40 tools, GitHub Copilot at 128. These caps exist because LLM performance degrades when selecting from large, flat tool lists. The model wastes attention on irrelevant tool descriptions, and intermediate tool results further bloat the context.\n\n## The Solution: Dynamic tool discovery\n\nThe industry is converging on a pattern: don't load all tools upfront. Instead, give the agent a discovery mechanism.\n\nThe idea is straightforward: instead of injecting every tool definition into context, provide a discovery-tool that can query what's available. The agent first discovers which servers and capabilities exist, then selectively loads only the schemas it needs for the current task.\n\nAnthropic recently published their approach: present MCP servers as code APIs with TypeScript wrappers. The agent discovers tools by exploring a filesystem, reads only the definitions it needs, and processes results in an execution environment before returning to the model. Their reported result: \"context usage dropped from 150,000 tokens to 2,000—a 98.7% reduction.\"\n\nCloudflare arrived at the same insight independently, calling it \"Code Mode\". Their insight: LLMs have seen millions of lines of real TypeScript in training, but only synthetic examples of tool calls. Wrapping MCP tools as TypeScript APIs lets the model leverage that deep familiarity.\n\nMCP solved the right problem—standardizing how AI systems connect to tools. But the current architecture has a scaling ceiling that becomes painfully obvious in production. If you're building MCP integrations today, measure your context composition. You might be surprised how little of your prompt budget actually goes to the prompt. Consider implementing layered discovery patterns rather than flat tool loading.\n\nThe good news: the community is actively working on this. Proposals for hierarchical tool management and lazy loading are under discussion. Until then, measure your context—you might find that 87% of your prompt isn't your prompt either.",[194,307],"2026-02-25","2026-05-03T13:47:55.586Z","2026-05-04T12:52:06.006Z","2026-05-04T12:52:06.013Z",[362,363],{"id":208,"documentId":209,"name":194,"slug":210,"description":12,"createdAt":211,"updatedAt":212,"publishedAt":213},{"id":315,"documentId":316,"name":307,"slug":317,"description":12,"createdAt":318,"updatedAt":319,"publishedAt":320},{"id":365,"documentId":366,"title":367,"slug":368,"excerpt":369,"body":370,"coverImageUrl":220,"readingMinutes":117,"tags":371,"publishedDate":373,"createdAt":374,"updatedAt":375,"publishedAt":376,"coverImage":12,"categories":377},63,"lbqbnsao81xiv16tbpf4v1c4","Welcome to the Hyground Blog","welcome-to-the-hyground-blog","Join our journey as we explore how AI is transforming the landscape for DevOps and platform engineering teams.","We're thrilled to launch the official Hyground blog! This is where we'll share our journey, insights, and expertise in the world of AI-powered software operations.\n\n## What to Expect\n\nHere's what you can look forward to:\n\n* **Technical Deep-Dives**: Detailed explorations of how we build and deploy our AI agents\n* **Industry Insights**: Our perspective on the evolving landscape of software operations\n* **Product Updates**: News about new features and capabilities\n* **Team Stories**: Get to know the people behind Hyground\n\n## Why We Started This Blog\n\nAt Hyground, we believe in transparency and knowledge sharing. The challenges we face in building autonomous software operations are fascinating, and we want to bring you along for the ride.\n\nWhether you're a DevOps engineer, a platform team lead, or just curious about how AI is transforming IT operations, we hope you'll find value in our content.\n\n## Stay Connected\n\nFollow us for updates on LinkedIn and don't hesitate to reach out if there are topics you'd like us to cover.\n\nWelcome aboard!\n\n_The Hyground Team_",[372],"News","2026-02-18","2026-05-03T13:47:55.591Z","2026-05-04T12:52:06.024Z","2026-05-04T12:52:06.030Z",[378],{"id":379,"documentId":380,"name":372,"slug":381,"description":12,"createdAt":382,"updatedAt":383,"publishedAt":384},25,"yw1sko7ykjhkp9dcjndkmaro","news","2026-05-03T19:00:36.659Z","2026-05-04T12:52:05.806Z","2026-05-04T12:52:05.810Z",{"pagination":386},{"page":113,"pageSize":379,"pageCount":113,"total":387},11]