\documentclass[12pt,a4paper]{article}

% Essential packages
\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{amssymb}
\usepackage{mathtools}
\usepackage{physics}
\usepackage{geometry}
\usepackage{hyperref}

% Page layout
\geometry{
    margin=1in,
    includeheadfoot
}

% Title information
\title{\Large\textbf{Statistical Mechanics}}
\author{Ying Nian Wu\\
    Department of Statistics\\
    University of California, Los Angeles\\
    \texttt{ywu@stat.ucla.edu}}
\date{}

\begin{document}

\maketitle

\section{Uniform distribution of isolated system}

Consider an isolated big system $X$ (e.g., the universe) whose energy $E(X)$ is fixed at $E$. $X$ evolves deterministically within the state space
\begin{equation*}
\Omega(E) = \{X: E(X) = E\}
\end{equation*}

Assume that with enough time, $X$ will visit every state in $\Omega(E)$ with equal frequency, an assumption called ergodicity, then at a random time, $X$ follows a uniform distribution over $\Omega(E)$.

\section{Gibbs distribution of a subsystem}

Now consider a small subsystem $x$ (e.g., a molecule), which is part of $X$, and let $Y$ be the rest of $X$, i.e., $X = (x, Y)$, and
\begin{equation*}
E(X) = E(x) + E(Y) = E
\end{equation*}

Then under the uniform distribution of $X$, the distribution of $x$ is
\begin{equation*}
p(x) = \frac{1}{Z(T)} \exp(-E(x)/T)
\end{equation*}
where $T$ is the temperature. This is the origin of the Gibbs distribution, or Boltzmann distribution, or energy-based model.

The reason is as follows. For a given value $x$ of the small subsystem, let $y$ be the rest of the large system $X$, then
\begin{equation*}
E(Y) = E(X) - E(x) = E - E(x)
\end{equation*}

Among all the states in $\Omega(E) = \{X: E(X) = E\}$, the number of states where the subsystem is $x$ is the same as the number of states in $\Omega_1(E-E(x)) = \{Y: E(Y) = E - E(x)\}$. Then
\begin{equation*}
p(x) = |\Omega_1(E-E(x))| / |\Omega(E)|
\end{equation*}

Thus
\begin{align*}
\log p(x) &= \log |\Omega_1(E-E(x))| + \text{const}_1 \\
&= -\beta E(x) + \text{const}_2
\end{align*}
using the first-order Taylor expansion, where
\begin{equation*}
\beta = \frac{d}{dE} \log |\Omega_1(E)|
\end{equation*}
is the derivative term in the first-order Taylor expansion.

We call $\beta = 1/T$. Then
\begin{equation*}
\log p(x) = -E(x)/T + \text{const}_2
\end{equation*}
and
\begin{equation*}
p(x) = \frac{1}{Z(T)} \exp(-E(x)/T)
\end{equation*}

We can interpret $p(x)$ to be the distribution of a system exchanging heat with a big environment at temperature $T$, i.e., a heat bath.

\section{Micro-canonical ensemble and canonical ensemble}

$\Omega(E) = \{X: E(X) = E\}$ or the uniform distribution over $\Omega(E)$ is called micro-canonical ensemble, and $p(x)$ is called canonical ensemble. Under $p(x)$, the energy $E(x)$ fluctuates, because $x$ exchanges heat with its environment $y$. But if $x$ is large enough with a large number of degrees of freedom, the average energy per degree of freedom converges to a constant due to law of large number or concentration of measure, and $p(x)$ behaves like a micro-canonical ensemble.

\section{Heat and entropy}

For micro-canonical ensemble (or a large canonical ensemble),
\begin{equation*}
S = \log |\Omega(E)|
\end{equation*}
is called entropy.

Since
\begin{equation*}
\beta = \frac{1}{T} = \frac{dS}{dE}, \text{ or } dS = \beta dE
\end{equation*}
the change of energy causes the change of entropy. The change of energy is in the form of heat. So if we inject heat into the system, its entropy will increase. If the system releases heat, its entropy will decrease.

\section{Free energy}

Suppose we want to extract the energy of a system $x$ for work. Let us assume $x$ is large enough so that we can approximate it by a micro-canonical ensemble. We hope to extract all the energy $E(x) = e$, to make it to zero. However, at $E(x) = 0$, the ensemble becomes $\{x: E(x) = 0\}$, and the entropy is also reduced to a minimum $S_0$. For the system $x$ and its environment $y$, the total entropy should not decrease. Thus if the system reduces its entropy, the environment must increase its entropy. That is, the system must release heat to the environment. It is like you are tidying up your living room to make it organized, and in doing so, you reduce the entropy of your living room. However, you generate heat to the environment to increase the entropy of the environment.

At a fixed temperature, the change of entropy $S - S_0$ is thus transferred to its environment in the form of heat $T(S-S_0)$. Therefore, we can only extract work
\begin{equation*}
e - T(S-S_0)
\end{equation*}

Since $S_0$ is a constant, we can define $e - TS$ as free energy.

When extracting work, it involves the change of volume $V$ of the piston of the steam engine under a certain pressure $p$. Thermodynamics is mainly to study how to convert heat to work. In this process, we have to consider the change of entropy, i.e., the count of configurations in the ensemble.

\section{Free energy = -T log Z}

For canonical ensemble $p(x)$, its energy $E(x) = \text{energy}(x)$ fluctuates, so we define free energy as the expectation of energy $-T$ entropy, i.e.,
\begin{align*}
\mathbb{E}_p[\text{energy}(x)] - T\text{ entropy}(p) &= \mathbb{E}_p[\text{energy}(x) + T \log p(x)] \\
&= \mathbb{E}_p[\text{energy}(x) + T((-\text{energy}(x)/T) - \log Z)] \\
&= -T \log Z
\end{align*}

\section{Variational approximation}

For a normalizing flow model $q(x)$,
\begin{align*}
\text{KL}(q|p) &= \mathbb{E}_q[\log q] - \mathbb{E}_q[-\text{energy}(x)/T - \log Z] \\
&= -\text{entropy}(q) + \mathbb{E}_q[\text{energy}(x)/T] + \log Z
\end{align*}

Thus
\begin{equation*}
-T \log Z = -T\text{ entropy}(q) + \mathbb{E}_q[\text{energy}(x)] - \text{KL}(q|p)
\end{equation*}

So the free energy is upper-bounded by
\begin{equation*}
F(q) = \mathbb{E}_p[\text{energy}(x)] - T\text{ entropy}(q)
\end{equation*}

We can minimize $F(q)$ to obtain $q$.

\section{Reinforcement learning}

A related problem arises in reinforcement learning, where we want to find policy $q(x)$ ($x$ now becomes action, and we make state implicit):
\begin{equation*}
F(q) = \mathbb{E}_q[\text{cost}(x)] + \lambda \text{KL}(q|p_0)
\end{equation*}
where $p_0$ is a base policy.

The optimal $q$ is
\begin{equation*}
p(x) = \frac{1}{Z} \exp(-\beta \text{cost}(x)) p_0(x)
\end{equation*}
with $\beta = 1/\lambda$, because
\begin{align*}
\text{KL}(q|p) &= \mathbb{E}_q[\log q + \beta \text{cost}(x) - \log p_0 + \log Z] \\
&= \text{KL}(q|p_0) + \beta \mathbb{E}_q[\text{cost}(x)] + \log Z \\
&= F(q)/\lambda + \log Z
\end{align*}
which is minimized at $q = p$.

\end{document}