2-3,自动微分机
神经网络通常依赖反向传播求梯度来更新网络参数,求梯度过程通常是一件非常复杂而容易出错的事情。
而深度学习框架可以帮助我们自动地完成这种求梯度运算。
Tensorflow一般使用梯度磁带tf.GradientTape来记录正向运算过程,然后反播磁带自动得到梯度值。
这种利用tf.GradientTape求微分的方法叫做Tensorflow的自动微分机制。
一,利用梯度磁带求导数
import tensorflow as tfimport numpy as np# f(x) = a*x**2 + b*x + c的导数x = tf.Variable(0.0,name = "x",dtype = tf.float32)a = tf.constant(1.0)b = tf.constant(-2.0)c = tf.constant(1.0)with tf.GradientTape() as tape:y = a*tf.pow(x,2) + b*x + cdy_dx = tape.gradient(y,x)print(dy_dx)
tf.Tensor(-2.0, shape=(), dtype=float32)
# 对常量张量也可以求导,需要增加watchwith tf.GradientTape() as tape:tape.watch([a,b,c])y = a*tf.pow(x,2) + b*x + cdy_dx,dy_da,dy_db,dy_dc = tape.gradient(y,[x,a,b,c])print(dy_da)print(dy_dc)
tf.Tensor(0.0, shape=(), dtype=float32)tf.Tensor(1.0, shape=(), dtype=float32)
# 可以求二阶导数with tf.GradientTape() as tape2:with tf.GradientTape() as tape1:y = a*tf.pow(x,2) + b*x + cdy_dx = tape1.gradient(y,x)dy2_dx2 = tape2.gradient(dy_dx,x)print(dy2_dx2)
tf.Tensor(2.0, shape=(), dtype=float32)
# 可以在autograph中使用@tf.functiondef f(x):a = tf.constant(1.0)b = tf.constant(-2.0)c = tf.constant(1.0)# 自变量转换成tf.float32x = tf.cast(x,tf.float32)with tf.GradientTape() as tape:tape.watch(x)y = a*tf.pow(x,2)+b*x+cdy_dx = tape.gradient(y,x)return((dy_dx,y))tf.print(f(tf.constant(0.0)))tf.print(f(tf.constant(1.0)))
(-2, 1)(0, 0)
二,利用梯度磁带和优化器求最小值
# 求f(x) = a*x**2 + b*x + c的最小值# 使用optimizer.apply_gradientsx = tf.Variable(0.0,name = "x",dtype = tf.float32)a = tf.constant(1.0)b = tf.constant(-2.0)c = tf.constant(1.0)optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)for _ in range(1000):with tf.GradientTape() as tape:y = a*tf.pow(x,2) + b*x + cdy_dx = tape.gradient(y,x)optimizer.apply_gradients(grads_and_vars=[(dy_dx,x)])tf.print("y =",y,"; x =",x)
y = 0 ; x = 0.999998569
# 求f(x) = a*x**2 + b*x + c的最小值# 使用optimizer.minimize# optimizer.minimize相当于先用tape求gradient,再apply_gradientx = tf.Variable(0.0,name = "x",dtype = tf.float32)#注意f()无参数def f():a = tf.constant(1.0)b = tf.constant(-2.0)c = tf.constant(1.0)y = a*tf.pow(x,2)+b*x+creturn(y)optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)for _ in range(1000):optimizer.minimize(f,[x])tf.print("y =",f(),"; x =",x)
y = 0 ; x = 0.999998569
# 在autograph中完成最小值求解# 使用optimizer.apply_gradientsx = tf.Variable(0.0,name = "x",dtype = tf.float32)optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)@tf.functiondef minimizef():a = tf.constant(1.0)b = tf.constant(-2.0)c = tf.constant(1.0)for _ in tf.range(1000): #注意autograph时使用tf.range(1000)而不是range(1000)with tf.GradientTape() as tape:y = a*tf.pow(x,2) + b*x + cdy_dx = tape.gradient(y,x)optimizer.apply_gradients(grads_and_vars=[(dy_dx,x)])y = a*tf.pow(x,2) + b*x + creturn ytf.print(minimizef())tf.print(x)
00.999998569
# 在autograph中完成最小值求解# 使用optimizer.minimizex = tf.Variable(0.0,name = "x",dtype = tf.float32)optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)@tf.functiondef f():a = tf.constant(1.0)b = tf.constant(-2.0)c = tf.constant(1.0)y = a*tf.pow(x,2)+b*x+creturn(y)@tf.functiondef train(epoch):for _ in tf.range(epoch):optimizer.minimize(f,[x])return(f())tf.print(train(1000))tf.print(x)
00.999998569
