<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Containers &#8211; Noise</title>
	<atom:link href="https://noise.getoto.net/tag/containers/feed/" rel="self" type="application/rss+xml" />
	<link>https://noise.getoto.net</link>
	<description>The collective thoughts of the interwebz</description>
	<lastBuildDate>Fri, 21 Nov 2025 21:34:45 +0000</lastBuildDate>
	<language>en-US</language>
	<sy:updatePeriod>
	hourly	</sy:updatePeriod>
	<sy:updateFrequency>
	1	</sy:updateFrequency>
	<generator>https://wordpress.org/?v=6.8.2</generator>
	<item>
		<title>Build production-ready applications without infrastructure complexity using Amazon ECS Express Mode</title>
		<link>https://noise.getoto.net/2025/11/21/build-production-ready-applications-without-infrastructure-complexity-using-amazon-ecs-express-mode/</link>
		
		<dc:creator><![CDATA[Donnie Prakoso]]></dc:creator>
		<pubDate>Fri, 21 Nov 2025 21:34:45 +0000</pubDate>
				<category><![CDATA[Amazon Elastic Container Registry]]></category>
		<category><![CDATA[Amazon Elastic Container Service]]></category>
		<category><![CDATA[Compute]]></category>
		<category><![CDATA[Containers]]></category>
		<category><![CDATA[news]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=8e9f731aabc7c5cb8f026e205cafb756</guid>

					<description><![CDATA[Amazon ECS Express Mode simplifies containerized application deployment by automating infrastructure setup through a single command, allowing developers to focus on building applications while following AWS best practices.]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Amazon Elastic Kubernetes Service gets independent affirmation of its zero operator access design</title>
		<link>https://noise.getoto.net/2025/11/12/amazon-elastic-kubernetes-service-gets-independent-affirmation-of-its-zero-operator-access-design/</link>
		
		<dc:creator><![CDATA[Manuel Mazarredo]]></dc:creator>
		<pubDate>Wed, 12 Nov 2025 18:31:16 +0000</pubDate>
				<category><![CDATA[Amazon EKS]]></category>
		<category><![CDATA[Amazon Elastic Kubernetes Service]]></category>
		<category><![CDATA[announcements]]></category>
		<category><![CDATA[Containers]]></category>
		<category><![CDATA[Foundational (100)]]></category>
		<category><![CDATA[security]]></category>
		<category><![CDATA[Security Blog]]></category>
		<category><![CDATA[Security, Identity & Compliance]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=a258594169b5c08ee09bc871cfdda63a</guid>

					<description><![CDATA[Today, we’re excited to announce the Amazon Elastic Kubernetes Service (Amazon EKS) zero operator access posture. Because security is our top priority at Amazon Web Services (AWS), we designed an operational architecture to meet the data privacy posture our regulated and most stringent customers want in a managed Kubernetes service, giving them continued confidence to […]]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>BASF Digital Farming builds a STAC-based solution on Amazon EKS</title>
		<link>https://noise.getoto.net/2025/10/22/basf-digital-farming-builds-a-stac-based-solution-on-amazon-eks/</link>
		
		<dc:creator><![CDATA[Kevin S. Ridolfi]]></dc:creator>
		<pubDate>Wed, 22 Oct 2025 16:21:09 +0000</pubDate>
				<category><![CDATA[agriculture]]></category>
		<category><![CDATA[Amazon API Gateway]]></category>
		<category><![CDATA[Amazon Elastic Kubernetes Service]]></category>
		<category><![CDATA[Amazon RDS]]></category>
		<category><![CDATA[Amazon Simple Storage Service (S3)]]></category>
		<category><![CDATA[AWS Lambda]]></category>
		<category><![CDATA[Compute]]></category>
		<category><![CDATA[Containers]]></category>
		<category><![CDATA[RDS for PostgreSQL]]></category>
		<category><![CDATA[Satellite]]></category>
		<category><![CDATA[serverless]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=07946d9584759d54cbb6817f8ea71165</guid>

					<description><![CDATA[This post was co-written with Frederic Haase and Julian Blau with BASF Digital Farming GmbH. At xarvio – BASF Digital Farming, our mission is to empower farmers around the world with cutting-edge digital agronomic decision-making tools. Central to this mission is our crop optimization platform, xarvio FIELD MANAGER, which delivers actionable insights through a range […]]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Deploy your own AI vibe coding platform — in one click!</title>
		<link>https://noise.getoto.net/2025/09/23/deploy-your-own-ai-vibe-coding-platform-in-one-click/</link>
		
		<dc:creator><![CDATA[Ashish Kumar Singh]]></dc:creator>
		<pubDate>Tue, 23 Sep 2025 14:00:00 +0000</pubDate>
				<category><![CDATA[AI Week]]></category>
		<category><![CDATA[Cloudflare for SaaS]]></category>
		<category><![CDATA[Cloudflare Workers]]></category>
		<category><![CDATA[Containers]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=37929fc345ba369f2aa0f0d205345e7e</guid>

					<description><![CDATA[Introducing VibeSDK, an open-source AI "vibe coding" platform that anyone can deploy to build their own custom platform. Comes ready with code generation, sandbox environment, and project deployment.]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Partnering with OpenAI to bring their new open models onto Cloudflare Workers AI</title>
		<link>https://noise.getoto.net/2025/08/06/partnering-with-openai-to-bring-their-new-open-models-onto-cloudflare-workers-ai/</link>
		
		<dc:creator><![CDATA[Michelle Chen]]></dc:creator>
		<pubDate>Tue, 05 Aug 2025 21:05:00 +0000</pubDate>
				<category><![CDATA[AI]]></category>
		<category><![CDATA[Containers]]></category>
		<category><![CDATA[Developers]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=a23d23c4094a40f631d1c6373fc7d8b2</guid>

					<description><![CDATA[OpenAI’s newest open-source models are now available on Cloudflare Workers AI on Day 0, with support for Responses API, Code Interpreter and Web Search (coming soon).]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Optimize traffic costs of Amazon MSK consumers on Amazon EKS with rack awareness</title>
		<link>https://noise.getoto.net/2025/07/30/optimize-traffic-costs-of-amazon-msk-consumers-on-amazon-eks-with-rack-awareness/</link>
		
		<dc:creator><![CDATA[Austin Groeneveld]]></dc:creator>
		<pubDate>Wed, 30 Jul 2025 16:35:14 +0000</pubDate>
				<category><![CDATA[Advanced (300)]]></category>
		<category><![CDATA[Amazon Elastic Kubernetes Service]]></category>
		<category><![CDATA[Amazon Managed Streaming for Apache Kafka (Amazon MSK)]]></category>
		<category><![CDATA[Analytics]]></category>
		<category><![CDATA[Containers]]></category>
		<category><![CDATA[Technical How-to]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=d84cd540d2cdc76b2a5b5d22ce639504</guid>

					<description><![CDATA[In this post, we walk you through a solution for implementing rack awareness in consumer applications that are dynamically deployed across multiple Availability Zones using Amazon EKS.]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Accelerate safe software releases with new built-in blue/green deployments in Amazon ECS</title>
		<link>https://noise.getoto.net/2025/07/17/accelerate-safe-software-releases-with-new-built-in-blue-green-deployments-in-amazon-ecs/</link>
		
		<dc:creator><![CDATA[Donnie Prakoso]]></dc:creator>
		<pubDate>Thu, 17 Jul 2025 17:02:39 +0000</pubDate>
				<category><![CDATA[Amazon Elastic Container Service]]></category>
		<category><![CDATA[announcements]]></category>
		<category><![CDATA[AWS Summit New York]]></category>
		<category><![CDATA[Compute]]></category>
		<category><![CDATA[Containers]]></category>
		<category><![CDATA[Featured]]></category>
		<category><![CDATA[launch]]></category>
		<category><![CDATA[news]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=e69a977be55873df44cfb2b1fb329dfb</guid>

					<description><![CDATA[Perform safer container application deployments without custom deployment tooling, enabling you to ship software updates more frequently with near-instantaneous rollback capability.]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Containers are available in public beta for simple, global, and programmable compute</title>
		<link>https://noise.getoto.net/2025/06/24/containers-are-available-in-public-beta-for-simple-global-and-programmable-compute/</link>
		
		<dc:creator><![CDATA[Gabi Villalonga Simón]]></dc:creator>
		<pubDate>Tue, 24 Jun 2025 16:00:22 +0000</pubDate>
				<category><![CDATA[AI]]></category>
		<category><![CDATA[Cloudflare Workers]]></category>
		<category><![CDATA[Containers]]></category>
		<category><![CDATA[Developers]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=e603c9f11c6cc8b65b5936693d412f6f</guid>

					<description><![CDATA[Cloudflare Containers are now available in public beta. Deploy simple, global, and programmable containers alongside your Workers.]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Enhance AI-assisted development with Amazon ECS, Amazon EKS and AWS Serverless MCP server</title>
		<link>https://noise.getoto.net/2025/05/29/enhance-ai-assisted-development-with-amazon-ecs-amazon-eks-and-aws-serverless-mcp-server/</link>
		
		<dc:creator><![CDATA[Elizabeth Fuentes]]></dc:creator>
		<pubDate>Thu, 29 May 2025 19:11:49 +0000</pubDate>
				<category><![CDATA[Amazon API Gateway]]></category>
		<category><![CDATA[Amazon Elastic Container Registry]]></category>
		<category><![CDATA[Amazon Elastic Container Service]]></category>
		<category><![CDATA[Amazon Elastic Kubernetes Service]]></category>
		<category><![CDATA[Amazon EMR]]></category>
		<category><![CDATA[Amazon EMR on EKS]]></category>
		<category><![CDATA[Amazon EventBridge]]></category>
		<category><![CDATA[Amazon Q]]></category>
		<category><![CDATA[Amazon Q Developer]]></category>
		<category><![CDATA[Application Services*]]></category>
		<category><![CDATA[AWS Lambda]]></category>
		<category><![CDATA[AWS Step Functions]]></category>
		<category><![CDATA[Compute]]></category>
		<category><![CDATA[Containers]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=4b4b555c621c02a8339a4c0bb1151391</guid>

					<description><![CDATA[AWS introduces specialized Model Context Protocol (MCP) servers for Amazon ECS, EKS, Finch and AWS Serverless, providing real-time contextual responses and service-specific guidance to guide AI assisted application development.]]></description>
		
		
		<enclosure url="https://github.com/aws-samples/amazon-nova-samples/blob/main/multimodal-understanding/getting-started/media/the-sea.mp4" length="0" type="video/mp4" />

			</item>
		<item>
		<title>Amazon Inspector enhances container security by mapping Amazon ECR images to running containers</title>
		<link>https://noise.getoto.net/2025/05/19/amazon-inspector-enhances-container-security-by-mapping-amazon-ecr-images-to-running-containers/</link>
		
		<dc:creator><![CDATA[Elizabeth Fuentes]]></dc:creator>
		<pubDate>Mon, 19 May 2025 17:50:22 +0000</pubDate>
				<category><![CDATA[Amazon Elastic Container Registry]]></category>
		<category><![CDATA[Amazon Elastic Container Service]]></category>
		<category><![CDATA[Amazon Inspector]]></category>
		<category><![CDATA[announcements]]></category>
		<category><![CDATA[Compute]]></category>
		<category><![CDATA[Containers]]></category>
		<category><![CDATA[Featured]]></category>
		<category><![CDATA[launch]]></category>
		<category><![CDATA[news]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=06cd656a80ec23138d9e35cb20ca33ff</guid>

					<description><![CDATA[Amazon ECR image-to-container mapping that shows which images are actively running in containers and how widely they're deployed, and extended vulnerability scanning support for minimal base images including scratch, distroless, and Chainguard containers.]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Simple, scalable, and global: Containers are coming to Cloudflare Workers in June 2025</title>
		<link>https://noise.getoto.net/2025/04/11/simple-scalable-and-global-containers-are-coming-to-cloudflare-workers-in-june-2025/</link>
		
		<dc:creator><![CDATA[Mike Nomitch]]></dc:creator>
		<pubDate>Fri, 11 Apr 2025 14:00:00 +0000</pubDate>
				<category><![CDATA[Cloudflare Workers]]></category>
		<category><![CDATA[Containers]]></category>
		<category><![CDATA[Developer Week]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=f6e583506edb58146d9c9d1c27dbef3c</guid>

					<description><![CDATA[Cloudflare Containers are coming this June. Run new types of workloads on our network with an experience that is simple, scalable, global and deeply integrated with Workers.]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>How Netflix Accurately Attributes eBPF Flow Logs</title>
		<link>https://noise.getoto.net/2025/04/08/how-netflix-accurately-attributes-ebpf-flow-logs/</link>
		
		<dc:creator><![CDATA[Netflix Technology Blog]]></dc:creator>
		<pubDate>Tue, 08 Apr 2025 17:50:58 +0000</pubDate>
				<category><![CDATA[Containers]]></category>
		<category><![CDATA[eBPF]]></category>
		<category><![CDATA[linux]]></category>
		<category><![CDATA[networking]]></category>
		<category><![CDATA[observability]]></category>
		<guid isPermaLink="false">https://medium.com/p/afe6d644a3bc</guid>

					<description><![CDATA[By Cheng Xie, Bryan Shultz, and Christine XuIn a previous blog post, we described how Netflix uses eBPF to capture TCP flow logs at scale for enhanced network insights. In this post, we delve deeper into how Netflix solved a core problem: accurately at...]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Manage authorization within a containerized workload using Amazon Verified Permissions</title>
		<link>https://noise.getoto.net/2025/03/13/manage-authorization-within-a-containerized-workload-using-amazon-verified-permissions/</link>
		
		<dc:creator><![CDATA[Manuel Heinkel]]></dc:creator>
		<pubDate>Thu, 13 Mar 2025 19:26:58 +0000</pubDate>
				<category><![CDATA[Advanced (300)]]></category>
		<category><![CDATA[Amazon EKS]]></category>
		<category><![CDATA[Amazon Elastic Kubernetes Service]]></category>
		<category><![CDATA[Amazon Verified Permissions]]></category>
		<category><![CDATA[Best practices]]></category>
		<category><![CDATA[Containers]]></category>
		<category><![CDATA[Security Blog]]></category>
		<category><![CDATA[Security, Identity & Compliance]]></category>
		<category><![CDATA[Technical How-to]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=b0a2c8cb9e48f2e8b26abb9761ec2d32</guid>

					<description><![CDATA[Containerization offers organizations significant benefits such as portability, scalability, and efficient resource utilization. However, managing access control and authorization for containerized workloads across diverse environments—from on-premises to multi-cloud setups—can be challenging. This blog post explores four architectural patterns that use Amazon Verified Permissions for application authorization in Kubernetes environments. Verified Permissions is a scalable permissions management and fine-grained […]]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Container Insights with enhanced observability now available in Amazon ECS</title>
		<link>https://noise.getoto.net/2024/12/02/container-insights-with-enhanced-observability-now-available-in-amazon-ecs/</link>
		
		<dc:creator><![CDATA[Donnie Prakoso]]></dc:creator>
		<pubDate>Mon, 02 Dec 2024 03:28:56 +0000</pubDate>
				<category><![CDATA[Amazon CloudWatch]]></category>
		<category><![CDATA[Amazon Elastic Container Service]]></category>
		<category><![CDATA[announcements]]></category>
		<category><![CDATA[AWS re:Invent]]></category>
		<category><![CDATA[Containers]]></category>
		<category><![CDATA[launch]]></category>
		<category><![CDATA[Management Tools]]></category>
		<category><![CDATA[Monitoring and observability]]></category>
		<category><![CDATA[news]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=07fec3746709a50c67376da6cefee199</guid>

					<description><![CDATA[With granular visibility into container workloads, CloudWatch Container Insights with enhanced observability for Amazon ECS enables proactive monitoring and faster troubleshooting, enhancing observability and improving application performance.]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Streamline container application networking with built-in Amazon ECS support in Amazon VPC Lattice</title>
		<link>https://noise.getoto.net/2024/11/18/streamline-container-application-networking-with-built-in-amazon-ecs-support-in-amazon-vpc-lattice/</link>
		
		<dc:creator><![CDATA[Donnie Prakoso]]></dc:creator>
		<pubDate>Mon, 18 Nov 2024 21:55:42 +0000</pubDate>
				<category><![CDATA[Amazon Elastic Container Service]]></category>
		<category><![CDATA[Amazon VPC]]></category>
		<category><![CDATA[Amazon VPC lattice]]></category>
		<category><![CDATA[announcements]]></category>
		<category><![CDATA[Containers]]></category>
		<category><![CDATA[launch]]></category>
		<category><![CDATA[news]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=11a9286e0309616b9123eb550dfecec1</guid>

					<description><![CDATA[Simplify networking for containerized apps with native VPC Lattice-ECS integration, boosting productivity and flexibility across services.]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Noisy Neighbor Detection with eBPF</title>
		<link>https://noise.getoto.net/2024/09/10/noisy-neighbor-detection-with-ebpf/</link>
		
		<dc:creator><![CDATA[Netflix Technology Blog]]></dc:creator>
		<pubDate>Tue, 10 Sep 2024 18:00:21 +0000</pubDate>
				<category><![CDATA[Containers]]></category>
		<category><![CDATA[eBPF]]></category>
		<category><![CDATA[linux]]></category>
		<category><![CDATA[observability]]></category>
		<category><![CDATA[Performance]]></category>
		<guid isPermaLink="false">https://medium.com/p/64b1f4b3bbdd</guid>

					<description><![CDATA[<p><em>By </em><a href="https://www.linkedin.com/in/josefernandezmn/"><em>Jose Fernandez</em></a><em>, </em><a href="https://www.linkedin.com/in/sebastien-dabdoub-2a5a0958/"><em>Sebastien Dabdoub</em></a><em>, </em><a href="https://www.linkedin.com/in/jason-koch-5692172/"><em>Jason Koch</em></a><em>, </em><a href="https://www.linkedin.com/in/artemtkachuk/"><em>Artem Tkachuk</em></a></p><p>The Compute and Performance Engineering teams at Netflix regularly investigate performance issues in our multi-tenant environment. The first step is determining whether the problem originates from the application or the underlying infrastructure. One issue that often complicates this process is the "noisy neighbor" problem. On <a href="https://netflixtechblog.com/titus-the-netflix-container-management-platform-is-now-open-source-f868c9fb5436">Titus</a>, our multi-tenant compute platform, a "noisy neighbor" refers to a container or system service that heavily utilizes the server's resources, causing performance degradation in adjacent containers. We usually focus on CPU utilization because it is our workload's most frequent source of noisy neighbor issues.</p><p>Detecting the effects of noisy neighbors is complex. Traditional performance analysis tools such as<a href="https://www.brendangregg.com/perf.html"> perf</a> can introduce significant overhead, risking further performance degradation. Additionally, these tools are typically deployed after the fact, which is too late for effective investigation.<em> </em>Another challenge is that debugging noisy neighbor issues requires significant low-level expertise and specialized tooling<em>. </em>In this blog post, we'll reveal how we leveraged eBPF to achieve continuous, low-overhead instrumentation of the Linux scheduler, enabling effective self-serve monitoring of noisy neighbor issues. Learn how Linux kernel instrumentation can improve your infrastructure observability with deeper insights and enhanced monitoring.</p><h3>Continuous Instrumentation of the Linux Scheduler</h3><p>To ensure the reliability of our workloads that depend on low latency responses, we instrumented the <a href="https://en.wikipedia.org/wiki/Run_queue">run queue</a> latency for each container, which measures the time processes spend in the scheduling queue before being dispatched to the CPU. Extended waiting in this queue can be a telltale of performance issues, especially when containers are not utilizing their total CPU allocation. Continuous instrumentation is critical to catching such matters as they emerge, and eBPF, with its hooks into the Linux scheduler with minimal overhead, enabled us to monitor run queue latency efficiently.</p><p>To emit a run queue latency metric, we leveraged three eBPF hooks: <strong>sched_wakeup, sched_wakeup_new,</strong> and <strong>sched_switch</strong>.</p><figure><img alt="" src="https://cdn-images-1.medium.com/max/1024/1*6bapyclfXZPsUIaXFM-xaQ.png"></figure><p>The <strong>sched_wakeup </strong>and <strong>sched_wakeup_new</strong> hooks are invoked when a process changes state from 'sleeping' to 'runnable.' They let us identify when a process is ready to run and is waiting for CPU time. During this event, we generate a timestamp and store it in an eBPF hash map using the process ID as the key.</p><pre>struct {<br>    __uint(type, BPF_MAP_TYPE_HASH);<br>    __uint(max_entries, MAX_TASK_ENTRIES);<br>    __uint(key_size, sizeof(u32));<br>    __uint(value_size, sizeof(u64));<br>} runq_lat SEC(".maps");<br><br>SEC("tp_btf/sched_wakeup")<br>int tp_sched_wakeup(u64 *ctx)<br>{<br>    struct task_struct *task = (void *)ctx[0];<br>    u32 pid = task-&#62;pid;<br>    u64 ts = bpf_ktime_get_ns();<br><br>    bpf_map_update_elem(&#38;runq_lat, &#38;pid, &#38;ts, BPF_NOEXIST);<br>    return 0;<br>}</pre><p>Conversely, the <strong>sched_switch</strong> hook is triggered when the CPU switches between processes. This hook provides pointers to the process currently utilizing the CPU and the process about to take over. We use the upcoming task's process ID (PID) to fetch the timestamp from the eBPF map. This timestamp represents when the process entered the queue, which we had previously stored. We then calculate the run queue latency by simply subtracting the timestamps.</p><pre>SEC("tp_btf/sched_switch")<br>int tp_sched_switch(u64 *ctx)<br>{<br>    struct task_struct *prev = (struct task_struct *)ctx[1];<br>    struct task_struct *next = (struct task_struct *)ctx[2];<br>    u32 prev_pid = prev-&#62;pid;<br>    u32 next_pid = next-&#62;pid;<br> <br>    // fetch timestamp of when the next task was enqueued<br>    u64 *tsp = bpf_map_lookup_elem(&#38;runq_lat, &#38;next_pid);<br>    if (tsp == NULL) {<br>        return 0; // missed enqueue<br>    }<br><br>    // calculate runq latency before deleting the stored timestamp<br>    u64 now = bpf_ktime_get_ns();<br>    u64 runq_lat = now - *tsp;<br><br>    // delete pid from enqueued map<br>    bpf_map_delete_elem(&#38;runq_lat, &#38;next_pid);<br>    ....</pre><p>One of the advantages of eBPF is its ability to provide pointers to the actual kernel data structures representing processes or threads, also known as tasks in kernel terminology. This feature enables access to a wealth of information stored about a process. We required the process's cgroup ID to associate it with a container for our specific use case. However, the cgroup information in the struct is safeguarded by an<a href="https://elixir.bootlin.com/linux/v6.6.16/source/include/linux/sched.h#L1225"> RCU (Read Copy Update) lock</a>.</p><p>To safely access this RCU-protected information, we can leverage <a href="https://docs.kernel.org/bpf/kfuncs.html">kfuncs</a> in eBPF. kfuncs are kernel functions that can be called from eBPF programs. There are kfuncs available to lock and unlock RCU read-side critical sections. These functions ensure that our eBPF program remains safe and efficient while retrieving the cgroup ID from the task struct.</p><pre>void bpf_rcu_read_lock(void) __ksym;<br>void bpf_rcu_read_unlock(void) __ksym;<br><br>u64 get_task_cgroup_id(struct task_struct *task)<br>{<br>    struct css_set *cgroups;<br>    u64 cgroup_id;<br>    bpf_rcu_read_lock();<br>    cgroups = task-&#62;cgroups;<br>    cgroup_id = cgroups-&#62;dfl_cgrp-&#62;kn-&#62;id;<br>    bpf_rcu_read_unlock();<br>    return cgroup_id;<br>}</pre><p>Having the data ready, we must package it and send it to userspace. For this purpose, we chose the eBPF <a href="https://nakryiko.com/posts/bpf-ringbuf/">ring buffer</a>. It is efficient, high-performing, and user-friendly. It can handle variable-length data records and allows data reading without necessitating extra memory copying or syscalls. However, the sheer amount of data points was causing the userspace program to use too much CPU, so we implemented a rate limiter in eBPF to sample the data effectively.</p><pre>struct {<br>    __uint(type, BPF_MAP_TYPE_RINGBUF);<br>    __uint(max_entries, 256 * 1024);<br>} events SEC(".maps");<br><br>struct {<br>    __uint(type, BPF_MAP_TYPE_PERCPU_HASH);<br>    __uint(max_entries, MAX_TASK_ENTRIES);<br>    __uint(key_size, sizeof(u64));<br>    __uint(value_size, sizeof(u64));<br>} cgroup_id_to_last_event_ts SEC(".maps");<br><br>struct runq_event {<br>    u64 prev_cgroup_id;<br>    u64 cgroup_id;<br>    u64 runq_lat;<br>    u64 ts;<br>};<br><br>SEC("tp_btf/sched_switch")<br>int tp_sched_switch(u64 *ctx)<br>{<br>    // ....<br>    // The previous code<br>    // ....<br> <br>    u64 prev_cgroup_id = get_task_cgroup_id(prev);<br>    u64 cgroup_id = get_task_cgroup_id(next);<br> <br>    // per-cgroup-id-per-CPU rate-limiting <br>    // to balance observability with performance overhead<br>    u64 *last_ts = <br>        bpf_map_lookup_elem(&#38;cgroup_id_to_last_event_ts, &#38;cgroup_id);<br>    u64 last_ts_val = last_ts == NULL ? 0 : *last_ts;<br><br>    // check the rate limit for the cgroup_id in consideration<br>    // before doing more work<br>    if (now - last_ts_val &#60; RATE_LIMIT_NS) {<br>        // Rate limit exceeded, drop the event<br>        return 0;<br>    }<br><br>    struct runq_event *event;<br>    event = bpf_ringbuf_reserve(&#38;events, sizeof(*event), 0);<br>  <br>    if (event) {<br>        event-&#62;prev_cgroup_id = prev_cgroup_id;<br>        event-&#62;cgroup_id = cgroup_id;<br>        event-&#62;runq_lat = runq_lat;<br>        event-&#62;ts = now;<br>        bpf_ringbuf_submit(event, 0);<br>        // Update the last event timestamp for the current cgroup_id<br>        bpf_map_update_elem(&#38;cgroup_id_to_last_event_ts, &#38;cgroup_id,<br>            &#38;now, BPF_ANY);<br><br>    }<br><br>    return 0;<br>}</pre><p>Our userspace application, developed in Go, processes events from the ring buffer to emit metrics to our metrics backend, <a href="https://netflixtechblog.com/introducing-atlas-netflixs-primary-telemetry-platform-bd31f4d8ed9a">Atlas</a>. Each event includes a run queue latency sample with a cgroup ID, which we associate with running containers on the host. We categorize it as a system service if no such association is found. When a cgroup ID correlates with a container, we emit a percentile timer Atlas metric (runq.latency) for that container. We also increment a counter metric (sched.switch.out) to monitor preemptions occurring for the container's processes. Access to the prev_cgroup_id of the preempted process allows us to tag the metric with the cause of the preemption, whether it's due to a process within the same container (or cgroup), a process in another container, or a system service.</p><p>It's important to highlight that both the runq.latency metric and the sched.switch.out metrics are needed to determine if a container is affected by noisy neighbors, which is the goal we aim to achieve — relying solely on the runq.latency metric can lead to misconceptions. For example, if a container is at or over its cgroup CPU limit, the scheduler will throttle it, resulting in an apparent spike in run queue latency due to delays in the queue. If we were only to consider this metric, we might incorrectly attribute the performance degradation to noisy neighbors when it's actually because the container is hitting its CPU request limits. However, simultaneous spikes in both metrics, mainly when the cause is a different container or system process, clearly indicate a noisy neighbor issue.</p><h3>A Noisy Neighbor Story</h3><p>Below is the runq.latency metric for a server running a single container with ample CPU overhead. The 99th percentile averages 83.4µs (microseconds), serving as our baseline. Although there are some spikes reaching 400µs, the latency remains within acceptable parameters.</p><figure><img alt="" src="https://cdn-images-1.medium.com/max/1024/1*_DcYxRgeDwX5i07IrdTZyA.png"><figcaption>container1’s 99th percentile runq.latency averages 83µs (microseconds), with spikes up to 400µs, without adjacent containers. This serves as our baseline for a container not contending for CPU on a host.</figcaption></figure><p>At 10:35, launching container2, which fully utilized all CPUs on the host, caused a significant 131-millisecond spike (131,000 microseconds) in container1's P99 run queue latency. This spike would be noticeable in the userspace application if it were serving HTTP traffic. If userspace app owners reported an unexplained latency spike, we could quickly identify the noisy neighbor issue through run queue latency metrics.</p><figure><img alt="" src="https://cdn-images-1.medium.com/max/1024/1*DJrwEbrWPOxVMS0JP7uE9A.png"><figcaption>Launching container2 at 10:35, which maxes out all CPUs on the host, <strong>caused a 131-millisecond spike in container1’s P99 run queue latency</strong> due to increased preemptions by system processes. This indicates a noisy neighbor issue, where system services compete for CPU time with containers.</figcaption></figure><p>The sched.switch.out metric indicates that the spike was due to increased preemptions by system processes, highlighting a noisy neighbor issue where system services compete with containers for CPU time. Our metrics show that the noisy neighbors were actually system processes, likely triggered by container2 consuming all available CPU capacity.</p><h3>Optimizing eBPF Code</h3><p>We developed an open-source eBPF process monitor called <a href="https://netflixtechblog.com/announcing-bpftop-streamlining-ebpf-performance-optimization-6a727c1ae2e5">bpftop</a> to measure the overhead of eBPF code in this hot kernel path. Our estimates suggest that the instrumentation adds less than 600 nanoseconds to each sched_* hook. We conducted a performance analysis on a Java service running in a container, and the instrumentation did not introduce significant overhead. The performance variance with the run queue profiling code active versus inactive was not measurable in milliseconds.</p><p>During our research on how eBPF statistics are measured in the kernel, we identified an opportunity to improve its calculation. We submitted this <a href="https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ce09cbdd988887662546a1175bcfdfc6c8fdd150">patch</a>, which was included in the Linux kernel 6.10 release.</p><figure><img alt="" src="https://cdn-images-1.medium.com/max/1024/1*YD6hkXce9a70AgvSHstgWA.gif"></figure><p>Through trial and error and using bpftop, we identified several optimizations that helped maintain low overhead for this code:</p><ul><li>We found that BPF_MAP_TYPE_HASH was the most performant for storing enqueued timestamps. Using BPF_MAP_TYPE_TASK_STORAGE resulted in nearly a twofold performance decline. BPF_MAP_TYPE_PERCPU_HASH was slightly less performant than BPF_MAP_TYPE_HASH, which was unexpected and requires further investigation.</li><li>The BPF_CORE_READ helper adds 20–30 nanoseconds per invocation. In the case of raw tracepoints, specifically those that are "BTF-enabled" (tp_btf/*), it is safe and more efficient to access the task struct members directly. Andrii Nakryiko recommends this approach in this <a href="https://nakryiko.com/posts/bpf-core-reference-guide/#btf-enabled-bpf-program-types-with-direct-memory-reads">blog post</a>.</li><li>BPF_MAP_TYPE_LRU_HASH maps are 40–50 nanoseconds slower per operation than regular hash maps. Due to space concerns from PID churn, we initially used them for enqueued timestamps. We have since increased the map size, mitigating this risk.</li><li>The sched_switch, sched_wakeup, and sched_wakeup_new are all triggered for kernel tasks, which are identifiable by their PID of 0. We found monitoring these tasks unnecessary, so we implemented several early exit conditions and conditional logic to prevent executing costly operations, such as accessing BPF maps, when dealing with a kernel task. Notably, kernel tasks operate through the scheduler queue like any regular process.</li></ul><h3>Conclusion</h3><p>Our findings highlight the value of low-overhead continuous instrumentation of the Linux kernel with eBPF. We have integrated these metrics into customer dashboards, enabling actionable insights and guiding multitenancy performance discussions. We can also now use these metrics to refine CPU isolation strategies to minimize the impact of noisy neighbors. Additionally, thanks to these metrics, we've gained deeper insights into the Linux scheduler.</p><p>This project has also deepened our understanding of eBPF technology and underscored the importance of tools like bpftop for optimizing eBPF code. As eBPF adoption increases, we foresee more infrastructure observability and business logic shifting to it. One promising project in this space is <a href="https://github.com/sched-ext/scx">sched_ext</a>, potentially revolutionizing how scheduling decisions are made and tailored to specific workload needs.</p><img src="https://medium.com/_/stat?event=post.clientViewed&#38;referrerSource=full_rss&#38;postId=64b1f4b3bbdd" width="1" height="1" alt=""><hr><p><a href="https://netflixtechblog.com/noisy-neighbor-detection-with-ebpf-64b1f4b3bbdd">Noisy Neighbor Detection with eBPF</a> was originally published in <a href="https://netflixtechblog.com/">Netflix TechBlog</a> on Medium, where people are continuing the conversation by highlighting and responding to this story.</p>]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Making sense of secrets management on Amazon EKS for regulated institutions</title>
		<link>https://noise.getoto.net/2024/08/19/making-sense-of-secrets-management-on-amazon-eks-for-regulated-institutions/</link>
		
		<dc:creator><![CDATA[Piyush Mattoo]]></dc:creator>
		<pubDate>Mon, 19 Aug 2024 17:43:34 +0000</pubDate>
				<category><![CDATA[Advanced (300)]]></category>
		<category><![CDATA[Amazon EKS]]></category>
		<category><![CDATA[Amazon Elastic Kubernetes Service]]></category>
		<category><![CDATA[AWS Secrets Manager]]></category>
		<category><![CDATA[Best practices]]></category>
		<category><![CDATA[Containers]]></category>
		<category><![CDATA[Governance]]></category>
		<category><![CDATA[Security & Governance]]></category>
		<category><![CDATA[Security Blog]]></category>
		<category><![CDATA[Security, Identity & Compliance]]></category>
		<category><![CDATA[Technical How-to]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=2938b6a078d07f8e6390adbe7fcbb284</guid>

					<description><![CDATA[Amazon Web Services (AWS) customers operating in a regulated industry, such as the financial services industry (FSI) or healthcare, are required to meet their regulatory and compliance obligations, such as the Payment Card Industry Data Security Standard (PCI DSS) or Health Insurance Portability and Accountability Act (HIPPA). AWS offers regulated customers tools, guidance and third-party audit reports […]]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>How to create a pipeline for hardening Amazon EKS nodes and automate updates</title>
		<link>https://noise.getoto.net/2024/06/14/how-to-create-a-pipeline-for-hardening-amazon-eks-nodes-and-automate-updates/</link>
		
		<dc:creator><![CDATA[Nima Fotouhi]]></dc:creator>
		<pubDate>Fri, 14 Jun 2024 16:20:50 +0000</pubDate>
				<category><![CDATA[Amazon EKS]]></category>
		<category><![CDATA[Amazon Elastic Kubernetes Service]]></category>
		<category><![CDATA[Containers]]></category>
		<category><![CDATA[EC2 Image Builder]]></category>
		<category><![CDATA[hardening]]></category>
		<category><![CDATA[Intermediate (200)]]></category>
		<category><![CDATA[security]]></category>
		<category><![CDATA[Security Blog]]></category>
		<category><![CDATA[Security, Identity & Compliance]]></category>
		<category><![CDATA[Technical How-to]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=981ea2ad21da8be08e3dd692967f5bdc</guid>

					<description><![CDATA[Amazon Elastic Kubernetes Service (Amazon EKS) offers a powerful, Kubernetes-certified service to build, secure, operate, and maintain Kubernetes clusters on Amazon Web Services (AWS). It integrates seamlessly with key AWS services such as Amazon CloudWatch, Amazon EC2 Auto Scaling, and AWS Identity and Access Management (IAM), enhancing the monitoring, scaling, and load balancing of containerized […]]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Securing Amazon ECS workloads on AWS Fargate with customer managed keys</title>
		<link>https://noise.getoto.net/2024/06/11/securing-amazon-ecs-workloads-on-aws-fargate-with-customer-managed-keys/</link>
		
		<dc:creator><![CDATA[Maish Saidel-Keesing]]></dc:creator>
		<pubDate>Mon, 10 Jun 2024 21:16:19 +0000</pubDate>
				<category><![CDATA[Amazon Elastic Container Service]]></category>
		<category><![CDATA[AWS Fargate]]></category>
		<category><![CDATA[AWS Key Management Service*]]></category>
		<category><![CDATA[container]]></category>
		<category><![CDATA[Containers]]></category>
		<category><![CDATA[serverless]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=a70f8bcc3068d2eb3caa28ff154ffe3c</guid>

					<description><![CDATA[As Amazon CTO Werner Vogels said, “Encryption is the tool we have to make sure that nobody else has access to your data. Amazon Web Services (AWS) built encryption into nearly all of its 165 cloud services. Make use of it. Dance like nobody is watching. Encrypt like everyone is.” Security is the top priority […]]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Introducing Amazon EMR on EKS with Apache Flink: A scalable, reliable, and efficient data processing platform</title>
		<link>https://noise.getoto.net/2024/05/28/introducing-amazon-emr-on-eks-with-apache-flink-a-scalable-reliable-and-efficient-data-processing-platform/</link>
		
		<dc:creator><![CDATA[Kinnar Kumar Sen]]></dc:creator>
		<pubDate>Tue, 28 May 2024 17:07:19 +0000</pubDate>
				<category><![CDATA[Amazon Elastic Kubernetes Service]]></category>
		<category><![CDATA[Amazon EMR]]></category>
		<category><![CDATA[Amazon EMR on EKS]]></category>
		<category><![CDATA[Analytics]]></category>
		<category><![CDATA[Compute]]></category>
		<category><![CDATA[Containers]]></category>
		<category><![CDATA[Intermediate (200)]]></category>
		<category><![CDATA[Technical How-to]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=26e62e995a29a4b407fbaf8902800379</guid>

					<description><![CDATA[AWS recently announced that Apache Flink is generally available for Amazon EMR on Amazon Elastic Kubernetes Service (EKS). Apache Flink is a scalable, reliable, and efficient data processing framework that handles real-time streaming and batch workloads (but is most commonly used for real-time streaming). Amazon EMR on EKS is a deployment option for Amazon EMR […]]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
	</channel>
</rss>

<!--
Performance optimized by W3 Total Cache. Learn more: https://www.boldgrid.com/w3-total-cache/

Object Caching 71/422 objects using Memcached
Page Caching using Disk: Enhanced 
Lazy Loading (feed)
Database Caching using Memcached

Served from: noise.getoto.net @ 2025-12-06 06:25:38 by W3 Total Cache
-->