Path: blob/main/latex-templates/templates/cognitive-science/decision_making.tex
51 views
unlisted
% Decision Making Models Template1% Topics: Expected utility, prospect theory, drift-diffusion, Bayesian decisions, reinforcement learning2% Style: Computational neuroscience report with behavioral modeling34\documentclass[a4paper, 11pt]{article}5\usepackage[utf8]{inputenc}6\usepackage[T1]{fontenc}7\usepackage{amsmath, amssymb}8\usepackage{graphicx}9\usepackage{siunitx}10\usepackage{booktabs}11\usepackage{subcaption}12\usepackage[makestderr]{pythontex}1314% Theorem environments15\newtheorem{definition}{Definition}[section]16\newtheorem{theorem}{Theorem}[section]17\newtheorem{example}{Example}[section]18\newtheorem{remark}{Remark}[section]1920\title{Computational Models of Human Decision Making:\\21From Rational Choice to Bounded Rationality}22\author{Cognitive Neuroscience Laboratory}23\date{\today}2425\begin{document}26\maketitle2728\begin{abstract}29This report presents a comprehensive computational analysis of human decision-making processes,30spanning classical normative theories to modern descriptive models. We examine expected utility31theory as the rational benchmark, prospect theory's account of systematic deviations from32rationality, drift-diffusion models for response time distributions, Bayesian frameworks for33decision making under uncertainty, and reinforcement learning models of value-based choice.34Computational simulations reveal the parametric signatures that distinguish these frameworks35and their ability to capture key phenomena including loss aversion, probability weighting,36speed-accuracy tradeoffs, and learning dynamics.37\end{abstract}3839\section{Introduction}4041Decision making is a fundamental cognitive process that has been studied across economics,42psychology, and neuroscience. Classical economic theory posits that humans are rational agents43who maximize expected utility, but decades of empirical research have revealed systematic44deviations from this normative ideal. Modern computational approaches seek to characterize45both the mechanisms underlying choice behavior and the real-time dynamics of the decision process.4647\begin{definition}[Decision Problem]48A decision problem consists of a set of alternatives $\mathcal{A}$, a set of possible outcomes49$\mathcal{O}$, and a probability distribution $p(o|a)$ mapping actions to outcomes. The decision50maker selects an action $a^* \in \mathcal{A}$ to optimize some objective function.51\end{definition}5253\section{Expected Utility Theory}5455\subsection{Theoretical Foundation}5657Expected utility theory (EUT), formalized by von Neumann and Morgenstern, assumes that decision58makers assign utilities $u(o)$ to outcomes and choose actions that maximize expected utility.5960\begin{definition}[Expected Utility]61For a risky prospect that yields outcome $o_i$ with probability $p_i$, the expected utility is:62\begin{equation}63EU = \sum_{i=1}^{n} p_i \cdot u(o_i)64\end{equation}65where $u: \mathcal{O} \rightarrow \mathbb{R}$ is a utility function.66\end{definition}6768\begin{theorem}[Risk Aversion]69A decision maker is risk-averse if $u$ is concave ($u'' < 0$), risk-neutral if $u$ is linear,70and risk-seeking if $u$ is convex ($u'' > 0$).71\end{theorem}7273\subsection{Computational Analysis}7475We now examine the implications of different utility functions for choice behavior under risk.76The curvature of the utility function determines risk preferences: concave functions exhibit77diminishing marginal utility, leading to risk aversion. We simulate decisions over monetary78gambles with varying utility function parameters to demonstrate these effects.7980\begin{pycode}81import numpy as np82import matplotlib.pyplot as plt83from scipy.optimize import minimize, fsolve84from scipy.stats import norm, gamma85from scipy.integrate import odeint86import matplotlib.patches as mpatches8788np.random.seed(42)8990# Expected Utility Theory91def power_utility(x, alpha=0.5):92"""Power utility function with risk aversion parameter alpha"""93if alpha == 1.0:94return x95return np.sign(x) * np.abs(x)**alpha9697def expected_utility(outcomes, probabilities, alpha=0.5):98"""Calculate expected utility for a prospect"""99utilities = power_utility(outcomes, alpha)100return np.sum(probabilities * utilities)101102# Simulate choice behavior under different risk attitudes103wealth = 100.0104stake = 50.0105win_prob = 0.5106107# Range of risk aversion parameters108alphas = np.linspace(0.3, 1.5, 100)109certainty_equivalents = []110111for alpha in alphas:112# Gamble: win or lose $50 with equal probability113outcomes = np.array([wealth + stake, wealth - stake])114probs = np.array([win_prob, 1 - win_prob])115eu_gamble = expected_utility(outcomes, probs, alpha)116117# Find certainty equivalent: CE such that u(wealth + CE) = EU(gamble)118def objective(ce):119return power_utility(wealth + ce, alpha) - eu_gamble120121ce = fsolve(objective, 0)[0]122certainty_equivalents.append(ce)123124certainty_equivalents = np.array(certainty_equivalents)125126# Risk premium: expected value - certainty equivalent127expected_value = stake * win_prob - stake * (1 - win_prob) # = 0 for fair gamble128risk_premium = expected_value - certainty_equivalents129130# Create first figure131fig1, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))132133# Plot 1: Utility functions with different risk attitudes134x_wealth = np.linspace(0, 200, 500)135ax1.plot(x_wealth, power_utility(x_wealth, 0.5), 'b-', linewidth=2.5, label=r'$\alpha=0.5$ (Risk-averse)')136ax1.plot(x_wealth, power_utility(x_wealth, 1.0), 'g--', linewidth=2.5, label=r'$\alpha=1.0$ (Risk-neutral)')137ax1.plot(x_wealth, power_utility(x_wealth, 1.5), 'r-.', linewidth=2.5, label=r'$\alpha=1.5$ (Risk-seeking)')138ax1.axvline(wealth, color='gray', linestyle=':', alpha=0.5)139ax1.axvline(wealth + stake, color='orange', linestyle=':', alpha=0.5)140ax1.axvline(wealth - stake, color='purple', linestyle=':', alpha=0.5)141ax1.set_xlabel('Wealth (\$)', fontsize=11)142ax1.set_ylabel('Utility $u(x)$', fontsize=11)143ax1.set_title('Expected Utility Theory: Risk Attitudes', fontsize=12, fontweight='bold')144ax1.legend(fontsize=10)145ax1.grid(True, alpha=0.3)146147# Plot 2: Risk premium as function of risk aversion148ax2.plot(alphas, risk_premium, 'b-', linewidth=2.5)149ax2.axhline(0, color='black', linestyle='--', alpha=0.5)150ax2.axvline(1.0, color='gray', linestyle=':', alpha=0.5, label='Risk-neutral')151ax2.fill_between(alphas, risk_premium, 0, where=(alphas < 1.0), alpha=0.3, color='red', label='Risk premium (averse)')152ax2.fill_between(alphas, risk_premium, 0, where=(alphas > 1.0), alpha=0.3, color='green', label='Risk premium (seeking)')153ax2.set_xlabel(r'Risk Aversion Parameter $\alpha$', fontsize=11)154ax2.set_ylabel('Risk Premium (\$)', fontsize=11)155ax2.set_title('Risk Premium vs. Risk Attitude', fontsize=12, fontweight='bold')156ax2.legend(fontsize=10)157ax2.grid(True, alpha=0.3)158159plt.tight_layout()160plt.savefig('decision_making_eut.pdf', dpi=150, bbox_inches='tight')161plt.close()162163# Calculate key statistics for reporting164alpha_moderate = 0.5165eu_gamble_moderate = expected_utility(166np.array([wealth + stake, wealth - stake]),167np.array([win_prob, 1 - win_prob]),168alpha_moderate169)170ce_moderate = fsolve(lambda ce: power_utility(wealth + ce, alpha_moderate) - eu_gamble_moderate, 0)[0]171rp_moderate = expected_value - ce_moderate172\end{pycode}173174\begin{figure}[htbp]175\centering176\includegraphics[width=\textwidth]{decision_making_eut.pdf}177\caption{Expected utility theory predictions for risky choice. Left panel shows utility functions178with varying risk attitudes: concave (risk-averse, $\alpha=0.5$), linear (risk-neutral, $\alpha=1.0$),179and convex (risk-seeking, $\alpha=1.5$). Vertical lines indicate current wealth (\$100) and potential180outcomes (\$150, \$50) from a 50-50 gamble. Right panel displays the risk premium as a function of181the risk aversion parameter $\alpha$, demonstrating that risk-averse agents ($\alpha < 1$) demand182compensation to accept fair gambles, while risk-seeking agents ($\alpha > 1$) accept unfavorable183gambles. For $\alpha=0.5$, the risk premium is approximately \$\py{f"{abs(rp_moderate):.2f}"},184indicating the agent requires the gamble's expected value to exceed this amount.}185\label{fig:eut}186\end{figure}187188The analysis demonstrates that a moderately risk-averse agent ($\alpha = \py{f"{alpha_moderate}"}$)189has a certainty equivalent of \$\py{f"{ce_moderate:.2f}"} for the gamble, yielding a risk premium190of \$\py{f"{abs(rp_moderate):.2f}"}. This quantifies the intuition that risk-averse individuals191prefer certain outcomes over risky prospects with the same expected value.192193\section{Prospect Theory}194195\subsection{Value Function and Loss Aversion}196197Kahneman and Tversky's prospect theory revolutionized the study of decision making by documenting198systematic violations of expected utility theory. The theory proposes that people evaluate outcomes199relative to a reference point and exhibit loss aversion: losses loom larger than gains.200201\begin{definition}[Prospect Theory Value Function]202The value function is defined as:203\begin{equation}204v(x) = \begin{cases}205x^\alpha & \text{if } x \geq 0 \\206-\lambda |x|^\beta & \text{if } x < 0207\end{cases}208\end{equation}209where $\alpha, \beta \in (0,1)$ capture diminishing sensitivity, and $\lambda > 1$ represents210loss aversion (typical value: $\lambda \approx 2.25$).211\end{definition}212213\subsection{Probability Weighting}214215Prospect theory also incorporates nonlinear probability weighting, whereby small probabilities216are overweighted and large probabilities are underweighted.217218\begin{definition}[Probability Weighting Function]219The one-parameter Prelec weighting function is:220\begin{equation}221w(p) = \exp\{-(-\ln p)^\gamma\}222\end{equation}223where $\gamma < 1$ produces the characteristic inverse-S shape.224\end{definition}225226\subsection{Computational Simulation}227228We now implement the full prospect theory model to analyze how loss aversion and probability229weighting jointly affect decision making. The model predicts specific phenomena such as the230fourfold pattern of risk attitudes and preference reversals that cannot be explained by231expected utility theory.232233\begin{pycode}234# Prospect Theory Implementation235def prospect_value_function(x, alpha=0.88, beta=0.88, lam=2.25):236"""Kahneman-Tversky value function with loss aversion"""237if isinstance(x, np.ndarray):238v = np.zeros_like(x)239v[x >= 0] = x[x >= 0]**alpha240v[x < 0] = -lam * np.abs(x[x < 0])**beta241return v242else:243if x >= 0:244return x**alpha245else:246return -lam * np.abs(x)**beta247248def prelec_weighting(p, gamma=0.61):249"""Prelec probability weighting function"""250return np.exp(-(-np.log(p))**gamma)251252def prospect_theory_value(outcomes, probabilities, alpha=0.88, beta=0.88, lam=2.25, gamma=0.61):253"""Calculate prospect theory value for a gamble"""254values = prospect_value_function(outcomes, alpha, beta, lam)255weights = prelec_weighting(probabilities, gamma)256return np.sum(weights * values)257258# Parameters from Tversky & Kahneman (1992)259alpha_tk = 0.88260beta_tk = 0.88261lambda_tk = 2.25262gamma_tk = 0.61263264# Create value function plot265x_range = np.linspace(-100, 100, 500)266v_values = prospect_value_function(x_range, alpha_tk, beta_tk, lambda_tk)267268fig2, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(14, 11))269270# Plot 1: Value function271ax1.plot(x_range, v_values, 'b-', linewidth=2.5)272ax1.axhline(0, color='black', linestyle='-', linewidth=0.5)273ax1.axvline(0, color='black', linestyle='-', linewidth=0.5)274# Add reference lines showing loss aversion275x_ref = 50276v_gain = prospect_value_function(x_ref, alpha_tk, beta_tk, lambda_tk)277v_loss = prospect_value_function(-x_ref, alpha_tk, beta_tk, lambda_tk)278ax1.plot([x_ref, x_ref], [0, v_gain], 'g--', alpha=0.5)279ax1.plot([-x_ref, -x_ref], [v_loss, 0], 'r--', alpha=0.5)280ax1.plot([0, x_ref], [v_gain, v_gain], 'g--', alpha=0.5)281ax1.plot([0, -x_ref], [v_loss, v_loss], 'r--', alpha=0.5)282ax1.text(x_ref+5, v_gain/2, f'Gain: {v_gain:.1f}', fontsize=9, color='green')283ax1.text(-x_ref-25, v_loss/2, f'Loss: {v_loss:.1f}', fontsize=9, color='red')284ax1.set_xlabel('Outcome (Relative to Reference Point)', fontsize=11)285ax1.set_ylabel('Subjective Value $v(x)$', fontsize=11)286ax1.set_title(f'Prospect Theory Value Function ($\\lambda={lambda_tk}$)', fontsize=12, fontweight='bold')287ax1.grid(True, alpha=0.3)288289# Plot 2: Probability weighting290p_range = np.linspace(0.01, 0.99, 100)291w_values = prelec_weighting(p_range, gamma_tk)292ax2.plot(p_range, w_values, 'b-', linewidth=2.5, label=f'$\\gamma={gamma_tk}$')293ax2.plot([0, 1], [0, 1], 'k--', linewidth=1, label='Identity (no weighting)')294ax2.fill_between(p_range, p_range, w_values, where=(w_values > p_range),295alpha=0.3, color='red', label='Overweighting')296ax2.fill_between(p_range, p_range, w_values, where=(w_values < p_range),297alpha=0.3, color='blue', label='Underweighting')298ax2.set_xlabel('Objective Probability $p$', fontsize=11)299ax2.set_ylabel('Decision Weight $w(p)$', fontsize=11)300ax2.set_title('Prelec Probability Weighting Function', fontsize=12, fontweight='bold')301ax2.legend(fontsize=9)302ax2.grid(True, alpha=0.3)303304# Plot 3: Fourfold pattern of risk attitudes305# Gains domain306gain_amounts = np.linspace(10, 100, 20)307p_low = 0.01308p_high = 0.90309310ce_gain_low = []311ce_gain_high = []312313for gain in gain_amounts:314# Low probability gain315pt_val_low = prospect_theory_value(np.array([gain, 0]), np.array([p_low, 1-p_low]),316alpha_tk, beta_tk, lambda_tk, gamma_tk)317# Find CE318ce_low = fsolve(lambda x: prospect_value_function(x, alpha_tk, beta_tk, lambda_tk) - pt_val_low, gain*p_low)[0]319ce_gain_low.append(ce_low)320321# High probability gain322pt_val_high = prospect_theory_value(np.array([gain, 0]), np.array([p_high, 1-p_high]),323alpha_tk, beta_tk, lambda_tk, gamma_tk)324ce_high = fsolve(lambda x: prospect_value_function(x, alpha_tk, beta_tk, lambda_tk) - pt_val_high, gain*p_high)[0]325ce_gain_high.append(ce_high)326327expected_gain_low = gain_amounts * p_low328expected_gain_high = gain_amounts * p_high329330ax3.plot(expected_gain_low, ce_gain_low, 'b-', linewidth=2.5, label=f'Low prob ({p_low})')331ax3.plot(expected_gain_high, ce_gain_high, 'g-', linewidth=2.5, label=f'High prob ({p_high})')332ax3.plot([0, 100], [0, 100], 'k--', linewidth=1, label='Risk-neutral')333ax3.set_xlabel('Expected Value', fontsize=11)334ax3.set_ylabel('Certainty Equivalent', fontsize=11)335ax3.set_title('Fourfold Pattern: Gains Domain', fontsize=12, fontweight='bold')336ax3.legend(fontsize=10)337ax3.grid(True, alpha=0.3)338339# Plot 4: Loss aversion magnitude340lambda_values = np.linspace(1.0, 3.5, 50)341mixed_gamble_acceptance = []342343for lam in lambda_values:344# Mixed gamble: 50% chance to win $100, 50% chance to lose $100345outcomes = np.array([100, -100])346probs = np.array([0.5, 0.5])347pt_val = prospect_theory_value(outcomes, probs, alpha_tk, beta_tk, lam, gamma_tk)348mixed_gamble_acceptance.append(pt_val)349350mixed_gamble_acceptance = np.array(mixed_gamble_acceptance)351352ax4.plot(lambda_values, mixed_gamble_acceptance, 'b-', linewidth=2.5)353ax4.axhline(0, color='red', linestyle='--', linewidth=2, label='Indifference threshold')354ax4.axvline(lambda_tk, color='gray', linestyle=':', alpha=0.7, label=f'Typical $\\lambda={lambda_tk}$')355ax4.fill_between(lambda_values, mixed_gamble_acceptance, 0,356where=(mixed_gamble_acceptance > 0), alpha=0.3, color='green', label='Accept')357ax4.fill_between(lambda_values, mixed_gamble_acceptance, 0,358where=(mixed_gamble_acceptance < 0), alpha=0.3, color='red', label='Reject')359ax4.set_xlabel('Loss Aversion Parameter $\\lambda$', fontsize=11)360ax4.set_ylabel('Prospect Value', fontsize=11)361ax4.set_title('Mixed Gamble Acceptance vs. Loss Aversion', fontsize=12, fontweight='bold')362ax4.legend(fontsize=10)363ax4.grid(True, alpha=0.3)364365plt.tight_layout()366plt.savefig('decision_making_prospect.pdf', dpi=150, bbox_inches='tight')367plt.close()368369# Calculate key statistics370loss_aversion_ratio = abs(v_loss / v_gain)371overweighting_01 = prelec_weighting(0.01, gamma_tk) / 0.01372underweighting_90 = prelec_weighting(0.90, gamma_tk) / 0.90373\end{pycode}374375\begin{figure}[htbp]376\centering377\includegraphics[width=\textwidth]{decision_making_prospect.pdf}378\caption{Prospect theory components and predictions. (a) Value function showing loss aversion:379losses loom larger than equivalent gains by a factor of $\lambda=\py{f"{lambda_tk}"}$, resulting380in a loss aversion ratio of \py{f"{loss_aversion_ratio:.2f}"}. The function is steeper for losses381than gains, demonstrating diminishing sensitivity in both domains. (b) Prelec probability weighting382function ($\gamma=\py{f"{gamma_tk}"}$) exhibits inverse-S shape: small probabilities (e.g., 0.01)383are overweighted by a factor of \py{f"{overweighting_01:.2f}"}, while large probabilities (e.g., 0.90)384are underweighted to \py{f"{underweighting_90:.3f}"} of their objective value. (c) Fourfold pattern385in gains domain: risk-seeking for low-probability gains (lottery tickets) and risk-averse for386high-probability gains (insurance). (d) Mixed gamble acceptance threshold: agents with typical loss387aversion ($\lambda \approx \py{f"{lambda_tk}"}$) reject 50-50 mixed gambles even when expected value388is zero, requiring gains to be approximately twice as large as losses to accept.}389\label{fig:prospect}390\end{figure}391392The prospect theory analysis reveals that with parameters $\alpha = \py{f"{alpha_tk}"}$,393$\beta = \py{f"{beta_tk}"}$, $\lambda = \py{f"{lambda_tk}"}$, and $\gamma = \py{f"{gamma_tk}"}$,394losses are experienced as \py{f"{loss_aversion_ratio:.2f}"} times more impactful than equivalent395gains. This loss aversion explains the rejection of symmetric mixed gambles and the endowment effect.396397\section{Drift-Diffusion Model}398399\subsection{Sequential Sampling Framework}400401The drift-diffusion model (DDM) provides a mechanistic account of two-alternative forced choice402decisions, including both choice and response time distributions. Evidence accumulates over time403as a noisy process until reaching a decision boundary.404405\begin{definition}[Drift-Diffusion Process]406The accumulated evidence $x(t)$ evolves according to:407\begin{equation}408dx = \mu \, dt + \sigma \, dW409\end{equation}410where $\mu$ is the drift rate (evidence quality), $\sigma$ is diffusion noise, and $dW$ is a411Wiener increment. Decision occurs when $x(t)$ reaches boundary $\pm a$.412\end{definition}413414\begin{theorem}[Mean Decision Time]415For symmetric boundaries at $\pm a$ with starting point $z=0$, the mean decision time is:416\begin{equation}417\mathbb{E}[T] = \frac{a}{\mu} \tanh\left(\frac{a\mu}{\sigma^2}\right) + T_{er}418\end{equation}419where $T_{er}$ is non-decision time (encoding and response execution).420\end{theorem}421422\subsection{Simulation of Decision Dynamics}423424We simulate the drift-diffusion process using Euler-Maruyama discretization to generate response425time distributions and examine the speed-accuracy tradeoff. The model predicts that increasing426boundary separation increases accuracy but slows responses, while drift rate affects both.427428\begin{pycode}429# Drift-Diffusion Model Simulation430def simulate_ddm_trial(drift_rate, boundary, noise=1.0, dt=0.001, non_decision_time=0.3):431"""Simulate a single DDM trial"""432evidence = 0433t = 0434max_time = 10.0 # Prevent infinite loops435436while abs(evidence) < boundary and t < max_time:437evidence += drift_rate * dt + noise * np.sqrt(dt) * np.random.randn()438t += dt439440choice = 1 if evidence >= boundary else -1441reaction_time = t + non_decision_time442return choice, reaction_time443444def simulate_ddm_experiment(drift_rate, boundary, n_trials=1000, noise=1.0, non_decision_time=0.3):445"""Simulate multiple DDM trials"""446choices = []447rts = []448449for _ in range(n_trials):450choice, rt = simulate_ddm_trial(drift_rate, boundary, noise, non_decision_time=non_decision_time)451choices.append(choice)452rts.append(rt)453454return np.array(choices), np.array(rts)455456# Simulate DDM with different parameters457drift_rates = [0.1, 0.3, 0.5]458boundaries = [0.8, 1.2, 1.6]459n_trials = 2000460461fig3 = plt.figure(figsize=(14, 10))462463# Drift rate manipulation464ax1 = plt.subplot(2, 3, 1)465for drift in drift_rates:466choices, rts = simulate_ddm_experiment(drift, boundary=1.2, n_trials=n_trials)467correct = choices[choices == 1]468correct_rts = rts[choices == 1]469ax1.hist(correct_rts, bins=50, alpha=0.6, density=True, label=f'$\\mu={drift}$')470471ax1.set_xlabel('Response Time (s)', fontsize=11)472ax1.set_ylabel('Density', fontsize=11)473ax1.set_title('RT Distributions: Drift Rate Effect', fontsize=12, fontweight='bold')474ax1.legend(fontsize=10)475ax1.set_xlim(0, 5)476477# Boundary manipulation478ax2 = plt.subplot(2, 3, 2)479for bound in boundaries:480choices, rts = simulate_ddm_experiment(drift_rate=0.3, boundary=bound, n_trials=n_trials)481correct = choices[choices == 1]482correct_rts = rts[choices == 1]483ax2.hist(correct_rts, bins=50, alpha=0.6, density=True, label=f'$a={bound}$')484485ax2.set_xlabel('Response Time (s)', fontsize=11)486ax2.set_ylabel('Density', fontsize=11)487ax2.set_title('RT Distributions: Boundary Effect', fontsize=12, fontweight='bold')488ax2.legend(fontsize=10)489ax2.set_xlim(0, 5)490491# Speed-accuracy tradeoff492ax3 = plt.subplot(2, 3, 3)493boundaries_sac = np.linspace(0.5, 2.0, 10)494accuracy_sac = []495mean_rt_sac = []496497for bound in boundaries_sac:498choices, rts = simulate_ddm_experiment(drift_rate=0.3, boundary=bound, n_trials=1000)499accuracy_sac.append(np.mean(choices == 1))500mean_rt_sac.append(np.mean(rts))501502ax3.plot(mean_rt_sac, accuracy_sac, 'bo-', linewidth=2, markersize=6)503ax3.set_xlabel('Mean Response Time (s)', fontsize=11)504ax3.set_ylabel('Accuracy', fontsize=11)505ax3.set_title('Speed-Accuracy Tradeoff', fontsize=12, fontweight='bold')506ax3.grid(True, alpha=0.3)507508# Sample trajectories509ax4 = plt.subplot(2, 3, 4)510drift_demo = 0.4511bound_demo = 1.5512n_trajectories = 20513514for _ in range(n_trajectories):515evidence_path = [0]516t_path = [0]517evidence = 0518t = 0519dt = 0.01520521while abs(evidence) < bound_demo and t < 5:522evidence += drift_demo * dt + 1.0 * np.sqrt(dt) * np.random.randn()523t += dt524evidence_path.append(evidence)525t_path.append(t)526527color = 'green' if evidence >= bound_demo else 'red'528alpha = 0.3529ax4.plot(t_path, evidence_path, color=color, alpha=alpha, linewidth=1)530531ax4.axhline(bound_demo, color='blue', linestyle='--', linewidth=2, label='Upper boundary')532ax4.axhline(-bound_demo, color='orange', linestyle='--', linewidth=2, label='Lower boundary')533ax4.axhline(0, color='black', linestyle='-', linewidth=0.5)534ax4.set_xlabel('Time (s)', fontsize=11)535ax4.set_ylabel('Evidence $x(t)$', fontsize=11)536ax4.set_title(f'Sample Trajectories ($\\mu={drift_demo}$, $a={bound_demo}$)', fontsize=12, fontweight='bold')537ax4.legend(fontsize=10)538ax4.set_xlim(0, 5)539ax4.grid(True, alpha=0.3)540541# Quantile probability plot542ax5 = plt.subplot(2, 3, 5)543choices_qp, rts_qp = simulate_ddm_experiment(drift_rate=0.4, boundary=1.2, n_trials=5000)544correct_rts = rts_qp[choices_qp == 1]545error_rts = rts_qp[choices_qp == -1]546547quantiles = [0.1, 0.3, 0.5, 0.7, 0.9]548correct_quantiles = np.quantile(correct_rts, quantiles)549error_quantiles = np.quantile(error_rts, quantiles) if len(error_rts) > 10 else np.zeros(len(quantiles))550551ax5.plot(quantiles, correct_quantiles, 'go-', linewidth=2, markersize=8, label='Correct')552if len(error_rts) > 10:553ax5.plot(quantiles, error_quantiles, 'ro-', linewidth=2, markersize=8, label='Error')554ax5.set_xlabel('Quantile', fontsize=11)555ax5.set_ylabel('Response Time (s)', fontsize=11)556ax5.set_title('Quantile-Probability Plot', fontsize=12, fontweight='bold')557ax5.legend(fontsize=10)558ax5.grid(True, alpha=0.3)559560# Drift rate vs accuracy/RT561ax6 = plt.subplot(2, 3, 6)562drift_range = np.linspace(0.05, 0.8, 15)563acc_by_drift = []564rt_by_drift = []565566for drift in drift_range:567choices_d, rts_d = simulate_ddm_experiment(drift, boundary=1.2, n_trials=500)568acc_by_drift.append(np.mean(choices_d == 1))569rt_by_drift.append(np.mean(rts_d))570571ax6_twin = ax6.twinx()572line1 = ax6.plot(drift_range, acc_by_drift, 'b-o', linewidth=2, markersize=5, label='Accuracy')573line2 = ax6_twin.plot(drift_range, rt_by_drift, 'r-s', linewidth=2, markersize=5, label='Mean RT')574575ax6.set_xlabel('Drift Rate $\\mu$', fontsize=11)576ax6.set_ylabel('Accuracy', fontsize=11, color='blue')577ax6_twin.set_ylabel('Mean RT (s)', fontsize=11, color='red')578ax6.set_title('Drift Rate Effects', fontsize=12, fontweight='bold')579ax6.tick_params(axis='y', labelcolor='blue')580ax6_twin.tick_params(axis='y', labelcolor='red')581ax6.grid(True, alpha=0.3)582583lines = line1 + line2584labels = [l.get_label() for l in lines]585ax6.legend(lines, labels, fontsize=10, loc='center right')586587plt.tight_layout()588plt.savefig('decision_making_ddm.pdf', dpi=150, bbox_inches='tight')589plt.close()590591# Calculate key DDM statistics592drift_key = 0.4593boundary_key = 1.2594choices_key, rts_key = simulate_ddm_experiment(drift_key, boundary_key, n_trials=5000)595accuracy_key = np.mean(choices_key == 1)596mean_rt_key = np.mean(rts_key)597std_rt_key = np.std(rts_key)598median_rt_key = np.median(rts_key)599\end{pycode}600601\begin{figure}[htbp]602\centering603\includegraphics[width=\textwidth]{decision_making_ddm.pdf}604\caption{Drift-diffusion model simulations of two-alternative forced choice. (a) Response time605distributions for different drift rates ($\mu$): higher evidence quality produces faster, more606accurate decisions with reduced RT variability. (b) Boundary separation effects: increasing the607decision threshold ($a$) from 0.8 to 1.6 produces slower but more conservative responses with608broader RT distributions. (c) Speed-accuracy tradeoff: adjusting boundary separation produces609the characteristic inverted-U relationship between mean RT and accuracy. (d) Twenty sample610evidence accumulation trajectories (green = correct, red = error) showing the stochastic nature611of the decision process. (e) Quantile-probability plot showing that correct responses are612systematically faster than errors at all quantiles. (f) Drift rate effects on both accuracy613and mean RT: with $a=\py{f"{boundary_key}"}$, a drift rate of $\mu=\py{f"{drift_key}"}$ yields614accuracy = \py{f"{accuracy_key:.3f}"} and mean RT = \py{f"{mean_rt_key:.3f}"} s (SD = \py{f"{std_rt_key:.3f}"} s).}615\label{fig:ddm}616\end{figure}617618The drift-diffusion analysis with $\mu = \py{f"{drift_key}"}$ and $a = \py{f"{boundary_key}"}$619produces mean RT = \py{f"{mean_rt_key:.2f}"} s (median = \py{f"{median_rt_key:.2f}"} s) with620accuracy = \py{f"{accuracy_key:.1%}"}. The model successfully captures empirical RT distributions,621including their characteristic positive skew and the speed-accuracy tradeoff.622623\section{Bayesian Decision Making}624625\subsection{Decision Making Under Uncertainty}626627Bayesian decision theory provides a normative framework for optimal decision making when beliefs628must be updated from noisy evidence. The decision maker maintains a posterior distribution over629states and chooses actions to minimize expected loss.630631\begin{definition}[Bayes' Rule]632Given evidence $e$ and hypotheses $h_i$, the posterior probability is:633\begin{equation}634P(h_i | e) = \frac{P(e | h_i) P(h_i)}{\sum_j P(e | h_j) P(h_j)}635\end{equation}636\end{definition}637638\begin{theorem}[Optimal Bayesian Decision]639The action $a^*$ that minimizes expected loss $L(a, h)$ is:640\begin{equation}641a^* = \arg\min_a \sum_h L(a, h) P(h | e)642\end{equation}643\end{theorem}644645\subsection{Sequential Evidence Accumulation}646647We simulate a medical diagnosis scenario where a physician must decide whether a patient has648a disease based on sequential test results. The Bayesian framework prescribes how beliefs should649be updated and when sufficient evidence has accumulated to make a decision.650651\begin{pycode}652# Bayesian Decision Making653def bayesian_update(prior, likelihood_pos, likelihood_neg, evidence):654"""Update beliefs using Bayes' rule"""655if evidence == 1: # Positive test656posterior_unnorm = prior * likelihood_pos657else: # Negative test658posterior_unnorm = prior * likelihood_neg659660return posterior_unnorm / np.sum(posterior_unnorm)661662# Medical diagnosis scenario663n_tests = 20664sensitivity = 0.85 # P(test+ | disease+)665specificity = 0.90 # P(test- | disease-)666prior_prob_disease = 0.10667668# True state: patient has disease669true_state = 1670671# Generate test results672np.random.seed(123)673test_results = []674for _ in range(n_tests):675if true_state == 1:676test_results.append(1 if np.random.rand() < sensitivity else 0)677else:678test_results.append(0 if np.random.rand() < specificity else 1)679680# Sequential belief updating681beliefs = [prior_prob_disease]682for test in test_results:683# Prior over disease states684prior = np.array([1 - beliefs[-1], beliefs[-1]])685686# Likelihood: P(test result | disease state)687if test == 1:688likelihood = np.array([1 - specificity, sensitivity])689else:690likelihood = np.array([specificity, 1 - sensitivity])691692# Bayesian update693posterior = bayesian_update(prior, likelihood, likelihood, test)694beliefs.append(posterior[1])695696beliefs = np.array(beliefs)697698# Create Bayesian decision figure699fig4, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(14, 10))700701# Plot 1: Sequential belief updating702ax1.plot(range(len(beliefs)), beliefs, 'b-o', linewidth=2, markersize=4)703ax1.axhline(0.5, color='red', linestyle='--', linewidth=2, label='Decision threshold')704ax1.axhline(prior_prob_disease, color='gray', linestyle=':', alpha=0.7, label='Prior')705ax1.fill_between(range(len(beliefs)), 0.5, beliefs, where=(beliefs > 0.5),706alpha=0.3, color='green', label='Diagnose disease')707ax1.fill_between(range(len(beliefs)), 0.5, beliefs, where=(beliefs <= 0.5),708alpha=0.3, color='blue', label='Diagnose healthy')709ax1.set_xlabel('Number of Tests', fontsize=11)710ax1.set_ylabel('P(Disease | Evidence)', fontsize=11)711ax1.set_title('Sequential Bayesian Belief Updating', fontsize=12, fontweight='bold')712ax1.legend(fontsize=9)713ax1.grid(True, alpha=0.3)714ax1.set_ylim(0, 1)715716# Plot 2: Likelihood ratio and evidence strength717log_likelihood_ratios = []718for test in test_results:719if test == 1:720lr = sensitivity / (1 - specificity)721else:722lr = (1 - sensitivity) / specificity723log_likelihood_ratios.append(np.log(lr))724725cumulative_log_lr = np.cumsum(log_likelihood_ratios)726727ax2.plot(range(1, len(cumulative_log_lr)+1), cumulative_log_lr, 'b-o', linewidth=2, markersize=4)728ax2.axhline(0, color='black', linestyle='-', linewidth=0.5)729ax2.axhline(np.log(3), color='green', linestyle='--', alpha=0.7, label='Moderate evidence (3:1)')730ax2.axhline(-np.log(3), color='red', linestyle='--', alpha=0.7, label='Moderate evidence (1:3)')731ax2.set_xlabel('Test Number', fontsize=11)732ax2.set_ylabel('Cumulative Log Likelihood Ratio', fontsize=11)733ax2.set_title('Evidence Accumulation (Log Odds)', fontsize=12, fontweight='bold')734ax2.legend(fontsize=9)735ax2.grid(True, alpha=0.3)736737# Plot 3: Optimal decision boundary with costs738costs_fn = 1.0 # False negative (missed disease)739costs_fp_range = np.linspace(0.1, 2.0, 50)740optimal_thresholds = []741742for cost_fp in costs_fp_range:743# Optimal threshold: P(disease) where expected costs are equal744# cost_fp * (1 - p) = cost_fn * p745threshold = cost_fp / (cost_fp + costs_fn)746optimal_thresholds.append(threshold)747748ax3.plot(costs_fp_range, optimal_thresholds, 'b-', linewidth=2.5)749ax3.axvline(costs_fn, color='gray', linestyle=':', alpha=0.7, label='Equal costs')750ax3.axhline(0.5, color='gray', linestyle=':', alpha=0.7)751ax3.set_xlabel('Cost of False Positive', fontsize=11)752ax3.set_ylabel('Optimal Decision Threshold', fontsize=11)753ax3.set_title(f'Optimal Threshold vs. Cost Ratio (FN cost = {costs_fn})', fontsize=12, fontweight='bold')754ax3.legend(fontsize=10)755ax3.grid(True, alpha=0.3)756757# Plot 4: ROC curve and decision boundaries758specificities = np.linspace(0.01, 0.99, 100)759sensitivities_roc = []760761# For different decision thresholds762for spec in specificities:763# Implied sensitivity at this operating point (simplified model)764sens = 1 - (1 - spec) * 0.5765sensitivities_roc.append(max(0, min(1, sens)))766767ax4.plot(1 - specificities, sensitivities_roc, 'b-', linewidth=2.5, label='ROC curve')768ax4.plot([0, 1], [0, 1], 'k--', linewidth=1, label='Chance')769ax4.fill_between([0, 1], [0, 1], [1, 1], alpha=0.2, color='green', label='Better than chance')770ax4.scatter([1-specificity], [sensitivity], s=200, c='red', marker='*',771edgecolors='black', linewidths=2, label=f'Test performance ({sensitivity},{specificity})', zorder=5)772ax4.set_xlabel('False Positive Rate (1 - Specificity)', fontsize=11)773ax4.set_ylabel('True Positive Rate (Sensitivity)', fontsize=11)774ax4.set_title('ROC Curve: Diagnostic Performance', fontsize=12, fontweight='bold')775ax4.legend(fontsize=9)776ax4.grid(True, alpha=0.3)777778plt.tight_layout()779plt.savefig('decision_making_bayesian.pdf', dpi=150, bbox_inches='tight')780plt.close()781782# Calculate key statistics783final_belief = beliefs[-1]784n_positive_tests = np.sum(test_results)785final_log_odds = cumulative_log_lr[-1]786\end{pycode}787788\begin{figure}[htbp]789\centering790\includegraphics[width=\textwidth]{decision_making_bayesian.pdf}791\caption{Bayesian decision making for medical diagnosis. (a) Sequential belief updating: starting792from a prior of P(disease) = \py{f"{prior_prob_disease}"}, the posterior probability evolves with793each test result (sensitivity = \py{f"{sensitivity}"}, specificity = \py{f"{specificity}"}). After794\py{n_tests} tests with \py{n_positive_tests} positive results, final belief reaches \py{f"{final_belief:.3f}"}.795The decision threshold of 0.5 determines when to diagnose disease. (b) Cumulative log likelihood ratio796tracks total evidence strength: values above log(3) represent moderate evidence for disease. Final log797odds = \py{f"{final_log_odds:.2f}"}, indicating strong evidence. (c) Optimal decision threshold depends798on cost ratio: when false negatives are more costly than false positives, threshold should decrease to799favor diagnosing disease. With equal costs, threshold = 0.5. (d) ROC curve shows tradeoff between true800positive rate (sensitivity) and false positive rate (1-specificity). Test performance (red star) indicates801better-than-chance discrimination, with area under curve representing overall diagnostic accuracy.}802\label{fig:bayesian}803\end{figure}804805The Bayesian analysis shows that after \py{n_tests} sequential tests, the posterior probability806of disease is \py{f"{final_belief:.3f}"}, representing a log odds of \py{f"{final_log_odds:.2f}"}.807This demonstrates how weak evidence (single test) accumulates to produce strong diagnostic confidence,808formalizing the intuitive notion that multiple corroborating tests increase certainty.809810\section{Reinforcement Learning}811812\subsection{Value-Based Decision Making}813814Reinforcement learning provides a framework for learning to make decisions through trial and error.815Agents learn value functions that predict expected future reward and use these to guide action selection.816817\begin{definition}[Q-Learning]818The action-value function $Q(s, a)$ represents expected return from taking action $a$ in state $s$.819The Q-learning update rule is:820\begin{equation}821Q(s_t, a_t) \leftarrow Q(s_t, a_t) + \alpha \left[ r_t + \gamma \max_{a'} Q(s_{t+1}, a') - Q(s_t, a_t) \right]822\end{equation}823where $\alpha$ is learning rate and $\gamma$ is discount factor.824\end{definition}825826\begin{definition}[Softmax Action Selection]827Actions are selected probabilistically according to:828\begin{equation}829P(a|s) = \frac{\exp(\beta Q(s,a))}{\sum_{a'} \exp(\beta Q(s,a'))}830\end{equation}831where $\beta$ is inverse temperature controlling exploration-exploitation.832\end{definition}833834\subsection{Multi-Armed Bandit Simulation}835836We simulate a classic reinforcement learning task where an agent must learn which of several slot837machines (arms) provides the highest reward. The agent faces the exploration-exploitation dilemma:838whether to exploit known good options or explore alternatives that might be better.839840\begin{pycode}841# Reinforcement Learning: Multi-armed bandit842class MultiArmedBandit:843def __init__(self, n_arms, true_values):844self.n_arms = n_arms845self.true_values = true_values846847def pull_arm(self, arm):848"""Pull arm and receive reward"""849return self.true_values[arm] + np.random.randn() * 0.5850851class QLearningAgent:852def __init__(self, n_arms, learning_rate=0.1, temperature=1.0):853self.n_arms = n_arms854self.Q = np.zeros(n_arms) # Q-values855self.alpha = learning_rate856self.beta = temperature857self.action_counts = np.zeros(n_arms)858859def select_action(self):860"""Softmax action selection"""861exp_Q = np.exp(self.beta * self.Q)862probs = exp_Q / np.sum(exp_Q)863return np.random.choice(self.n_arms, p=probs)864865def update(self, action, reward):866"""Q-learning update"""867self.Q[action] += self.alpha * (reward - self.Q[action])868self.action_counts[action] += 1869870# Setup bandit task871n_arms = 4872true_values = np.array([1.0, 2.5, 1.5, 0.5]) # True mean rewards873n_trials = 500874875# Simulate different learning rates876learning_rates = [0.01, 0.1, 0.5]877temperature = 2.0878879fig5, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(14, 10))880881for lr in learning_rates:882bandit = MultiArmedBandit(n_arms, true_values)883agent = QLearningAgent(n_arms, learning_rate=lr, temperature=temperature)884885Q_history = [agent.Q.copy()]886rewards_history = []887888for trial in range(n_trials):889action = agent.select_action()890reward = bandit.pull_arm(action)891agent.update(action, reward)892893Q_history.append(agent.Q.copy())894rewards_history.append(reward)895896Q_history = np.array(Q_history)897898# Plot Q-value learning899for arm in range(n_arms):900if lr == 0.1: # Only plot for one learning rate to avoid clutter901ax1.plot(Q_history[:, arm], label=f'Arm {arm+1} (true: {true_values[arm]:.1f})', alpha=0.8)902903ax1.axhline(0, color='black', linestyle='-', linewidth=0.5)904for arm in range(n_arms):905ax1.axhline(true_values[arm], color='gray', linestyle=':', alpha=0.5)906ax1.set_xlabel('Trial', fontsize=11)907ax1.set_ylabel('Q-value', fontsize=11)908ax1.set_title(f'Q-Learning Dynamics ($\\alpha={0.1}$, $\\beta={temperature}$)', fontsize=12, fontweight='bold')909ax1.legend(fontsize=9)910ax1.grid(True, alpha=0.3)911912# Compare learning rates913ax2_data = []914for lr in learning_rates:915bandit = MultiArmedBandit(n_arms, true_values)916agent = QLearningAgent(n_arms, learning_rate=lr, temperature=temperature)917918cumulative_reward = 0919cumulative_rewards = []920921for trial in range(n_trials):922action = agent.select_action()923reward = bandit.pull_arm(action)924agent.update(action, reward)925cumulative_reward += reward926cumulative_rewards.append(cumulative_reward)927928ax2.plot(cumulative_rewards, linewidth=2, label=f'$\\alpha={lr}$')929930# Optimal cumulative reward931optimal_reward_per_trial = np.max(true_values)932optimal_cumulative = np.arange(1, n_trials+1) * optimal_reward_per_trial933ax2.plot(optimal_cumulative, 'k--', linewidth=2, label='Optimal')934935ax2.set_xlabel('Trial', fontsize=11)936ax2.set_ylabel('Cumulative Reward', fontsize=11)937ax2.set_title('Learning Rate Comparison', fontsize=12, fontweight='bold')938ax2.legend(fontsize=10)939ax2.grid(True, alpha=0.3)940941# Temperature effects on exploration942temperatures = [0.5, 2.0, 5.0]943ax3_colors = ['blue', 'green', 'red']944945for temp, color in zip(temperatures, ax3_colors):946bandit = MultiArmedBandit(n_arms, true_values)947agent = QLearningAgent(n_arms, learning_rate=0.1, temperature=temp)948949action_history = []950951for trial in range(n_trials):952action = agent.select_action()953reward = bandit.pull_arm(action)954agent.update(action, reward)955action_history.append(action)956957# Plot action selection frequency over time958window = 50959action_probs = []960for i in range(window, n_trials):961recent_actions = action_history[i-window:i]962probs = [recent_actions.count(a) / window for a in range(n_arms)]963action_probs.append(probs)964965action_probs = np.array(action_probs)966best_arm = np.argmax(true_values)967968ax3.plot(range(window, n_trials), action_probs[:, best_arm],969color=color, linewidth=2, label=f'$\\beta={temp}$')970971ax3.set_xlabel('Trial', fontsize=11)972ax3.set_ylabel('P(Select Best Arm)', fontsize=11)973ax3.set_title('Temperature Effects: Exploration vs. Exploitation', fontsize=12, fontweight='bold')974ax3.legend(fontsize=10)975ax3.grid(True, alpha=0.3)976ax3.set_ylim(0, 1)977978# Final Q-values and action selection probabilities979ax4_agent = QLearningAgent(n_arms, learning_rate=0.1, temperature=2.0)980bandit_final = MultiArmedBandit(n_arms, true_values)981982for trial in range(n_trials):983action = ax4_agent.select_action()984reward = bandit_final.pull_arm(action)985ax4_agent.update(action, reward)986987x_pos = np.arange(n_arms)988width = 0.35989990bars1 = ax4.bar(x_pos - width/2, true_values, width, label='True values', color='steelblue', edgecolor='black')991bars2 = ax4.bar(x_pos + width/2, ax4_agent.Q, width, label='Learned Q-values', color='coral', edgecolor='black')992993# Add action selection probabilities as text994exp_Q = np.exp(ax4_agent.beta * ax4_agent.Q)995action_probs_final = exp_Q / np.sum(exp_Q)996for i, (bar, prob) in enumerate(zip(bars2, action_probs_final)):997height = bar.get_height()998ax4.text(bar.get_x() + bar.get_width()/2., height + 0.1,999f'P={prob:.2f}', ha='center', va='bottom', fontsize=9)10001001ax4.set_xlabel('Arm', fontsize=11)1002ax4.set_ylabel('Value', fontsize=11)1003ax4.set_title(f'Learned Values After {n_trials} Trials', fontsize=12, fontweight='bold')1004ax4.set_xticks(x_pos)1005ax4.set_xticklabels([f'Arm {i+1}' for i in range(n_arms)])1006ax4.legend(fontsize=10)1007ax4.grid(True, alpha=0.3, axis='y')10081009plt.tight_layout()1010plt.savefig('decision_making_rl.pdf', dpi=150, bbox_inches='tight')1011plt.close()10121013# Calculate key RL statistics1014final_Q = ax4_agent.Q1015Q_errors = np.abs(final_Q - true_values)1016mean_Q_error = np.mean(Q_errors)1017best_arm = np.argmax(true_values)1018learned_best = np.argmax(final_Q)1019correct_identification = (learned_best == best_arm)1020\end{pycode}10211022\begin{figure}[htbp]1023\centering1024\includegraphics[width=\textwidth]{decision_making_rl.pdf}1025\caption{Reinforcement learning in a four-armed bandit task with true arm values of1026[\py{', '.join([f"{v:.1f}" for v in true_values])}]. (a) Q-learning dynamics with $\alpha=0.1$1027and $\beta=\py{f"{temperature}"}$: Q-values converge toward true values through reward feedback,1028with learning speed depending on action selection frequency. Dotted lines show true values.1029(b) Learning rate effects on cumulative reward: faster learning ($\alpha=0.5$) initially1030outperforms but may be less stable; slower learning ($\alpha=0.01$) produces smoother but1031delayed convergence. Black dashed line shows optimal performance (always selecting best arm).1032(c) Temperature parameter controls exploration-exploitation balance: low temperature ($\beta=0.5$)1033produces high exploration and slow convergence to optimal arm; high temperature ($\beta=5.0$)1034rapidly exploits current best estimate but may get stuck on suboptimal choices. (d) Final learned1035Q-values after \py{n_trials} trials (orange) compared to true values (blue), with action selection1036probabilities shown above bars. Agent correctly identifies best arm (Arm 2) with Q-value1037\py{f"{final_Q[1]:.2f}"} vs. true \py{f"{true_values[1]:.2f}"}, mean absolute error = \py{f"{mean_Q_error:.2f}"}.}1038\label{fig:rl}1039\end{figure}10401041The reinforcement learning simulation demonstrates value-based decision making through Q-learning.1042After \py{n_trials} trials with $\alpha = 0.1$ and $\beta = \py{f"{temperature}"}$, the agent's1043learned Q-values have mean absolute error of \py{f"{mean_Q_error:.3f}"} relative to true values,1044and it correctly identifies the best arm (Arm \py{best_arm + 1}) with selection probability1045\py{f"{action_probs_final[best_arm]:.2f}"}. This illustrates how experience-based learning1046converges to near-optimal policies.10471048\section{Model Comparison and Integration}10491050The five computational frameworks presented here address different aspects of decision making:10511052\begin{itemize}1053\item \textbf{Expected Utility Theory} provides the normative benchmark for rational choice under risk1054\item \textbf{Prospect Theory} captures systematic deviations including loss aversion and probability distortion1055\item \textbf{Drift-Diffusion Models} explain response time distributions and speed-accuracy tradeoffs1056\item \textbf{Bayesian Decision Making} prescribes optimal belief updating under uncertainty1057\item \textbf{Reinforcement Learning} accounts for value learning through experience1058\end{itemize}10591060\begin{remark}[Integration of Frameworks]1061Recent research integrates these approaches: drift-diffusion can be derived from Bayesian sequential1062analysis; reinforcement learning models can incorporate prospect theory's asymmetric value function;1063neural implementations suggest the brain approximates Bayesian inference through sampling mechanisms.1064\end{remark}10651066\section{Conclusions}10671068This computational analysis reveals the mechanistic underpinnings of human decision making:10691070\begin{enumerate}1071\item Expected utility with $\alpha = \py{f"{alpha_moderate}"}$ predicts risk premium of1072\$\py{f"{abs(rp_moderate):.2f}"} for a 50-50 gamble of $\pm$\$501073\item Prospect theory with $\lambda = \py{f"{lambda_tk}"}$ captures loss aversion, with losses1074weighted \py{f"{loss_aversion_ratio:.2f}"} times gains1075\item Drift-diffusion with $\mu = \py{f"{drift_key}"}$ and $a = \py{f"{boundary_key}"}$ produces1076mean RT = \py{f"{mean_rt_key:.2f}"} s and accuracy = \py{f"{accuracy_key:.1%}"}1077\item Bayesian updating with sensitivity = \py{f"{sensitivity}"} and specificity = \py{f"{specificity}"}1078achieves posterior confidence of \py{f"{final_belief:.3f}"} after \py{n_tests} tests1079\item Q-learning with $\alpha = 0.1$ converges to within \py{f"{mean_Q_error:.3f}"} of true values1080after \py{n_trials} trials1081\end{enumerate}10821083These computational models provide quantitative frameworks for understanding both optimal decision1084strategies and systematic deviations from rationality observed in human behavior.10851086\section*{References}10871088\begin{itemize}1089\item von Neumann, J., \& Morgenstern, O. (1944). \textit{Theory of Games and Economic Behavior}. Princeton University Press.1090\item Kahneman, D., \& Tversky, A. (1979). Prospect theory: An analysis of decision under risk. \textit{Econometrica}, 47(2), 263-291.1091\item Tversky, A., \& Kahneman, D. (1992). Advances in prospect theory: Cumulative representation of uncertainty. \textit{Journal of Risk and Uncertainty}, 5(4), 297-323.1092\item Ratcliff, R., \& McKoon, G. (2008). The diffusion decision model: Theory and data for two-choice decision tasks. \textit{Neural Computation}, 20(4), 873-922.1093\item Ratcliff, R., Smith, P. L., Brown, S. D., \& McKoon, G. (2016). Diffusion decision model: Current issues and history. \textit{Trends in Cognitive Sciences}, 20(4), 260-281.1094\item Bogacz, R., Brown, E., Moehlis, J., Holmes, P., \& Cohen, J. D. (2006). The physics of optimal decision making: A formal analysis of models of performance in two-alternative forced-choice tasks. \textit{Psychological Review}, 113(4), 700-765.1095\item Edwards, W., Lindman, H., \& Savage, L. J. (1963). Bayesian statistical inference for psychological research. \textit{Psychological Review}, 70(3), 193-242.1096\item Sutton, R. S., \& Barto, A. G. (2018). \textit{Reinforcement Learning: An Introduction} (2nd ed.). MIT Press.1097\item Daw, N. D., O'Doherty, J. P., Dayan, P., Seymour, B., \& Dolan, R. J. (2006). Cortical substrates for exploratory decisions in humans. \textit{Nature}, 441(7095), 876-879.1098\item Dayan, P., \& Daw, N. D. (2008). Decision theory, reinforcement learning, and the brain. \textit{Cognitive, Affective, \& Behavioral Neuroscience}, 8(4), 429-453.1099\item Gold, J. I., \& Shadlen, M. N. (2007). The neural basis of decision making. \textit{Annual Review of Neuroscience}, 30, 535-574.1100\item Busemeyer, J. R., \& Diederich, A. (2010). \textit{Cognitive Modeling}. Sage Publications.1101\item Prelec, D. (1998). The probability weighting function. \textit{Econometrica}, 66(3), 497-527.1102\item Rescorla, R. A., \& Wagner, A. R. (1972). A theory of Pavlovian conditioning: Variations in the effectiveness of reinforcement and nonreinforcement. In A. H. Black \& W. F. Prokasy (Eds.), \textit{Classical Conditioning II: Current Research and Theory} (pp. 64-99). Appleton-Century-Crofts.1103\item Glimcher, P. W. (2011). \textit{Foundations of Neuroeconomic Analysis}. Oxford University Press.1104\item Lee, M. D., \& Wagenmakers, E. J. (2013). \textit{Bayesian Cognitive Modeling: A Practical Course}. Cambridge University Press.1105\item Wald, A. (1947). \textit{Sequential Analysis}. John Wiley \& Sons.1106\item Tversky, A., \& Fox, C. R. (1995). Weighing risk and uncertainty. \textit{Psychological Review}, 102(2), 269-283.1107\item Forstmann, B. U., Ratcliff, R., \& Wagenmakers, E. J. (2016). Sequential sampling models in cognitive neuroscience: Advantages, applications, and extensions. \textit{Annual Review of Psychology}, 67, 641-666.1108\item Niv, Y. (2009). Reinforcement learning in the brain. \textit{Journal of Mathematical Psychology}, 53(3), 139-154.1109\end{itemize}11101111\end{document}111211131114