\documentclass{article}
\usepackage{amsmath, amssymb, amsfonts}
\usepackage{geometry}
\geometry{a4paper, margin=2.5cm}

\title{Legacy GAT: From $o_i$ to $\pi_i$}
\author{}
\date{}

\begin{document}
\maketitle

\section*{Notation}
\begin{itemize}
    \item $B$: batch size
    \item $N_a = 12$: number of agents
    \item $M = 10$: number of tasks
    \item $C = 4$: capability dimension
    \item GAT hidden dim $= 256$, output dim $= 128$
    \item $o_i$: Dict observation of agent $i$
    \item $\bar{o}_i$: flattened observation of agent $i$
\end{itemize}

\section*{Step 1: From Dict Observation to Graph Nodes}

\begin{equation}
o_i = \left\{ \text{ego}_i \in \mathbb{R}^{C+7},\; \text{tasks}_i \in \mathbb{R}^{M \times (C+4)},\; \text{others}_i \in \mathbb{R}^{N_a \times (C+7)} \right\}
\end{equation}

\begin{equation}
\mathbf{x}_a^{(t)} = \text{BuildAgentNodes}(\{\text{ego}_i^{(t)}\}_{i=1}^{N_a}, \{\text{others}_i^{(t)}\}_{i=1}^{N_a}) \in \mathbb{R}^{N_a \times 9}
\end{equation}

\begin{equation}
\mathbf{x}_\tau^{(t)} = \text{BuildTaskNodes}(\{\text{tasks}_i^{(t)}\}_{i=1}^{N_a}) \in \mathbb{R}^{M \times 9}
\end{equation}

\begin{equation}
\mathbf{x}^{(t)} = \begin{bmatrix} \mathbf{x}_a^{(t)} \\ \mathbf{x}_\tau^{(t)} \end{bmatrix} \in \mathbb{R}^{(N_a+M) \times 9}
\end{equation}

\begin{equation}
\mathbf{x} = \begin{bmatrix} \mathbf{x}^{(1)} \\ \vdots \\ \mathbf{x}^{(B)} \end{bmatrix} \in \mathbb{R}^{B(N_a+M) \times 9}
\end{equation}

\section*{Step 2: Graph Construction}

\begin{equation}
|\mathcal{E}_{a \to \tau}| = |\mathcal{E}_{\tau \to a}| = N_a M, \quad |\mathcal{E}_{a \to a}| = N_a(N_a-1)
\end{equation}

\begin{equation}
E = 2N_aM + N_a(N_a-1) = 372
\end{equation}

\begin{equation}
\mathbf{e}_{ij} =
\begin{cases}
\mathbf{x}_{\tau,j} - \mathbf{x}_{a,i}, & (i,j) \in \mathcal{E}_{a \to \tau} \\
\mathbf{x}_{a,i} - \mathbf{x}_{\tau,j}, & (i,j) \in \mathcal{E}_{\tau \to a} \\
-\text{similarity}(i,j) \cdot \mathbf{1}_9, & (i,j) \in \mathcal{E}_{a \to a}
\end{cases}
\end{equation}

\begin{equation}
\text{similarity}(i,j) = \frac{1}{1 + \frac{1}{9}\sum_{d=1}^{9} |\mathbf{x}_{a,j}^{(d)} - \mathbf{x}_{a,i}^{(d)}|}
\end{equation}

\section*{Step 3: First GATConv Layer (1-hop)}

\begin{equation}
\alpha_{ij}^{(0)} = \frac{\exp\left( \text{LeakyReLU}\left( \mathbf{a}^{(0)\top} \left[ W^{(0)}\mathbf{x}_i \;\|\; W^{(0)}\mathbf{x}_j \;\|\; \mathbf{e}_{ij} \right] \right) \right)}{\sum_{k \in \mathcal{N}(i)} \exp\left( \text{LeakyReLU}\left( \mathbf{a}^{(0)\top} \left[ W^{(0)}\mathbf{x}_i \;\|\; W^{(0)}\mathbf{x}_k \;\|\; \mathbf{e}_{ik} \right] \right) \right)}
\end{equation}

\begin{equation}
\mathbf{h}_i^{(1)} = \text{ReLU}\left( \sum_{j \in \mathcal{N}(i)} \alpha_{ij}^{(0)} W^{(0)} \mathbf{x}_j \right), \quad W^{(0)} \in \mathbb{R}^{256 \times 9}
\end{equation}

\section*{Step 4: Second GATConv Layer (2-hop)}

\begin{equation}
\alpha_{ij}^{(1)} = \frac{\exp\left( \text{LeakyReLU}\left( \mathbf{a}^{(1)\top} \left[ W^{(1)}\mathbf{h}_i^{(1)} \;\|\; W^{(1)}\mathbf{h}_j^{(1)} \;\|\; \mathbf{e}_{ij} \right] \right) \right)}{\sum_{k \in \mathcal{N}(i)} \exp\left( \text{LeakyReLU}\left( \mathbf{a}^{(1)\top} \left[ W^{(1)}\mathbf{h}_i^{(1)} \;\|\; W^{(1)}\mathbf{h}_k^{(1)} \;\|\; \mathbf{e}_{ik} \right] \right) \right)}
\end{equation}

\begin{equation}
\mathbf{h}_i^{(2)} = \sum_{j \in \mathcal{N}(i)} \alpha_{ij}^{(1)} W^{(1)} \mathbf{h}_j^{(1)}, \quad W^{(1)} \in \mathbb{R}^{128 \times 256}
\end{equation}

\section*{Step 5: Global Pooling}

\begin{equation}
\mathbf{h}_{\text{pool}} = \frac{1}{N_a+M} \sum_{i=1}^{N_a+M} \mathbf{h}_i^{(2)} \in \mathbb{R}^{128}
\end{equation}

\begin{equation}
\mathbf{z}_i = \begin{bmatrix} \bar{o}_i \\ \mathbf{h}_i^{(2)} \\ \mathbf{h}_{\text{pool}} \end{bmatrix} \in \mathbb{R}^{479}
\end{equation}

\section*{Step 6: Actor MLP to Logits}

\textbf{gat mode:}
\begin{equation}
\mathbf{z}_i = \mathbf{h}_i^{(2)} \in \mathbb{R}^{128}
\end{equation}

\textbf{cat mode:}
\begin{equation}
\mathbf{z}_i = \begin{bmatrix} \bar{o}_i \\ \mathbf{h}_i^{(2)} \end{bmatrix} \in \mathbb{R}^{351}
\end{equation}

\textbf{gated mode:}
\begin{equation}
\mathbf{z}_i = \begin{bmatrix} \bar{o}_i \\ \tanh(\alpha) \cdot \mathbf{h}_i^{(2)} \end{bmatrix} \in \mathbb{R}^{351}
\end{equation}

\begin{equation}
\text{logits}_i = \text{MLP}_{\text{actor}}(\mathbf{z}_i) \in \mathbb{R}^{M}
\end{equation}

\section*{Step 7: Action Distribution}

\begin{equation}
\pi_i = \text{softmax}(\text{logits}_i) = \frac{\exp(\text{logits}_i)}{\sum_{m=1}^{M} \exp(\text{logits}_i[m])} \in \mathbb{R}^{M}
\end{equation}

\section*{Full Pipeline}

\begin{equation}
o_i \xrightarrow{\text{flatten}} \bar{o}_i \xrightarrow{\text{build nodes}} \mathbf{x} \xrightarrow{\text{GATConv}_1} \mathbf{h}^{(1)} \xrightarrow{\text{GATConv}_2} \mathbf{h}^{(2)} \xrightarrow{\text{pool}} [\mathbf{h}_i^{(2)}, \mathbf{h}_{\text{pool}}] \xrightarrow{\text{MLP}} \text{logits}_i \xrightarrow{\text{softmax}} \pi_i
\end{equation}

\end{document}