2 years ago · 6588017ee0
--- a/papers/grad.pdf
+++ b/papers/grad.pdf
--- a/papers/grad.tex
+++ b/papers/grad.tex
@@ -102,6 +102,8 @@ Let's compute the derivatives of all our models. Throughout the entire paper $n$
 
				   y &= \sigma(a^{(1)}w^{(2)} + b^{(2)})
			
 
				 \end{align}
			
 
				 
			
 
				+The superscript in parenthesis denotes the current layer. For example $a_i^{(l)}$ denotes the activation from the $l$-th layer on $i$-th sample.
			
 
				+
			
 
				 \subsubsection{Feed-Forward}
			
 
				 
			
 
				 \begin{align}