<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>KyungYong Shim &#8211; Noise</title>
	<atom:link href="https://noise.getoto.net/author/kyungyong-shim/feed/" rel="self" type="application/rss+xml" />
	<link>https://noise.getoto.net</link>
	<description>The collective thoughts of the interwebz</description>
	<lastBuildDate>Fri, 21 Nov 2025 03:42:56 +0000</lastBuildDate>
	<language>en-US</language>
	<sy:updatePeriod>
	hourly	</sy:updatePeriod>
	<sy:updateFrequency>
	1	</sy:updateFrequency>
	<generator>https://wordpress.org/?v=6.8.2</generator>
	<item>
		<title>Serverless strategies for streaming LLM responses</title>
		<link>https://noise.getoto.net/2025/11/21/serverless-strategies-for-streaming-llm-responses/</link>
		
		<dc:creator><![CDATA[KyungYong Shim]]></dc:creator>
		<pubDate>Fri, 21 Nov 2025 03:42:56 +0000</pubDate>
				<category><![CDATA[Advanced (300)]]></category>
		<category><![CDATA[Amazon API Gateway]]></category>
		<category><![CDATA[Amazon Bedrock]]></category>
		<category><![CDATA[Amazon Simple Queue Service (SQS)]]></category>
		<category><![CDATA[AWS AppSync]]></category>
		<category><![CDATA[AWS Lambda]]></category>
		<category><![CDATA[Compute]]></category>
		<category><![CDATA[generative AI]]></category>
		<category><![CDATA[serverless]]></category>
		<category><![CDATA[Technical How-to]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=fbbaddccc055d9270ddb66afc3fce11c</guid>

					<description><![CDATA[Modern generative AI applications often need to stream large language model (LLM) outputs to users in real-time. Instead of waiting for a complete response, streaming delivers partial results as they become available, which significantly improves the user experience for chat interfaces and long-running AI tasks. This post compares three serverless approaches to handle Amazon Bedrock LLM streaming on Amazon Web Services (AWS), which helps you choose the best fit for your application.]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
	</channel>
</rss>

<!--
Performance optimized by W3 Total Cache. Learn more: https://www.boldgrid.com/w3-total-cache/

Object Caching 42/44 objects using Memcached
Page Caching using Disk: Enhanced 
Lazy Loading (feed)
Database Caching using Memcached

Served from: noise.getoto.net @ 2026-03-13 12:44:59 by W3 Total Cache
-->