<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>reinforcement-learning &#8211; Noise</title>
	<atom:link href="https://noise.getoto.net/tag/reinforcement-learning/feed/" rel="self" type="application/rss+xml" />
	<link>https://noise.getoto.net</link>
	<description>The collective thoughts of the interwebz</description>
	<lastBuildDate>Sat, 25 Oct 2025 22:01:00 +0000</lastBuildDate>
	<language>en-US</language>
	<sy:updatePeriod>
	hourly	</sy:updatePeriod>
	<sy:updateFrequency>
	1	</sy:updateFrequency>
	<generator>https://wordpress.org/?v=6.8.2</generator>
	<item>
		<title>Post-Training Generative Recommenders with Advantage-Weighted Supervised Finetuning</title>
		<link>https://noise.getoto.net/2025/10/26/post-training-generative-recommenders-with-advantage-weighted-supervised-finetuning/</link>
		
		<dc:creator><![CDATA[Netflix Technology Blog]]></dc:creator>
		<pubDate>Sat, 25 Oct 2025 21:32:37 +0000</pubDate>
				<category><![CDATA[AI]]></category>
		<category><![CDATA[large language models]]></category>
		<category><![CDATA[Recommendations]]></category>
		<category><![CDATA[reinforcement-learning]]></category>
		<guid isPermaLink="false">https://medium.com/p/61a538d717a9</guid>

					<description><![CDATA[Author: Keertana Chidambaram, Qiuling Xu, Ko-Jen Hsiao, Moumita Bhattacharya(*The work was done when Keertana interned at Netflix.)IntroductionThis blog focuses on post-training generative recommender systems. Generative recommenders (GRs) represent a ...]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Reinforcement Learning for Budget Constrained Recommendations</title>
		<link>https://noise.getoto.net/2022/08/24/reinforcement-learning-for-budget-constrained-recommendations/</link>
		
		<dc:creator><![CDATA[Netflix Technology Blog]]></dc:creator>
		<pubDate>Wed, 24 Aug 2022 17:57:33 +0000</pubDate>
				<category><![CDATA[recommendation-system]]></category>
		<category><![CDATA[reinforcement-learning]]></category>
		<guid isPermaLink="false">https://medium.com/p/6cbc5263a32a</guid>

					<description><![CDATA[by Ehtsham Elahiwith James McInerney, Nathan Kallus, Dario Garcia Garcia and Justin BasilicoIntroductionThis writeup is about using reinforcement learning to construct an optimal list of recommendations when the user has a finite time budget to make a ...]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
		<item>
		<title>Emerging Solutions for Operations Research on AWS</title>
		<link>https://noise.getoto.net/2021/09/03/emerging-solutions-for-operations-research-on-aws/</link>
		
		<dc:creator><![CDATA[Randy DeFauw]]></dc:creator>
		<pubDate>Fri, 03 Sep 2021 17:14:45 +0000</pubDate>
				<category><![CDATA[Amazon Braket]]></category>
		<category><![CDATA[Amazon DynamoDB]]></category>
		<category><![CDATA[Amazon Elasticsearch Service]]></category>
		<category><![CDATA[Amazon Sagemaker]]></category>
		<category><![CDATA[Architecture]]></category>
		<category><![CDATA[AWS Glue]]></category>
		<category><![CDATA[AWS Lambda]]></category>
		<category><![CDATA[Expert (400)]]></category>
		<category><![CDATA[reinforcement-learning]]></category>
		<guid isPermaLink="false">http://noise.getoto.net/?guid=21bab6665a43a060591c238bef73000c</guid>

					<description><![CDATA[Operations research (OR) uses mathematical and analytical tools to arrive at optimal solutions for complex business problems like workforce scheduling. The mathematical techniques used to solve these problems, such as linear programming and mixed-integer programming, require the use of optimization software (solvers).  There are several popular and powerful solvers available, ranging from commercial options like […]]]></description>
		
		
		<enclosure url="" length="0" type="" />

			</item>
	</channel>
</rss>

<!--
Performance optimized by W3 Total Cache. Learn more: https://www.boldgrid.com/w3-total-cache/

Object Caching 34/89 objects using Memcached
Page Caching using Disk: Enhanced 
Lazy Loading (feed)
Database Caching using Memcached

Served from: noise.getoto.net @ 2025-12-08 18:13:59 by W3 Total Cache
-->