【深度学习笔记 Ⅰ】3 step by step (jupyter)

1. 导包

import numpy as np
import h5py
import matplotlib.pyplot as plt
from testCases_v2 import *
from dnn_utils_v2 import sigmoid, sigmoid_backward, relu, relu_backward% matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0)  # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'% reload_ext autoreload
% autoreload 2np.random.seed(1)

2. 整体流程图

整体流程图

3. 初始化

3.1 创建2层神经网络

def initialize_parameters(n_x, n_h, n_y):"""Argument:n_x -- size of the input layern_h -- size of the hidden layern_y -- size of the output layerReturns:parameters -- python dictionary containing your parameters:W1 -- weight matrix of shape (n_h, n_x)b1 -- bias vector of shape (n_h, 1)W2 -- weight matrix of shape (n_y, n_h)b2 -- bias vector of shape (n_y, 1)"""np.random.seed(1)W1 = np.random.randn(n_h, n_x) * 0.01b1 = np.zeros((n_h, 1))W2 = np.random.randn(n_y, n_h) * 0.01b2 = np.zeros((n_y, 1))parameters = {"W1": W1,"b1": b1,"W2": W2,"b2": b2}return parameters

3.2 隐藏层神经网络

def initialize_parameters_deep(layer_dims):"""Arguments:layer_dims -- python array (list) containing the dimensions of each layer in our networkReturns:parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])bl -- bias vector of shape (layer_dims[l], 1)"""np.random.seed(3)parameters = {}L = len(layer_dims)  # number of layers in the networkfor l in range(1, L):parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1]) * 0.01parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))return parameters

4. 前向传播

4.1 线性前向传播

def linear_forward(A, W, b):"""Implement the linear part of a layer's forward propagation.Arguments:A -- activations from previous layer (or input data): (size of previous layer, number of examples)W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)b -- bias vector, numpy array of shape (size of the current layer, 1)Returns:Z -- the input of the activation function, also called pre-activation parameter cache -- a python dictionary containing "A", "W" and "b" ; stored for computing the backward pass efficiently"""Z = np.dot(W, A) + bcache = (A, W, b)return Z, cache

4.2 激活函数


def linear_activation_forward(A_prev, W, b, activation):"""Implement the forward propagation for the LINEAR->ACTIVATION layerArguments:A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)b -- bias vector, numpy array of shape (size of the current layer, 1)activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"Returns:A -- the output of the activation function, also called the post-activation value cache -- a python dictionary containing "linear_cache" and "activation_cache";stored for computing the backward pass efficiently"""if activation == "sigmoid":Z, linear_cache = linear_forward(A_prev, W, b)A, activation_cache = sigmoid(Z)elif activation == "relu":Z, linear_cache = linear_forward(A_prev, W, b)A, activation_cache = relu(Z)cache = (linear_cache, activation_cache)return A, cache

隐藏层前向传播

# GRADED FUNCTION: L_model_forwarddef L_model_forward(X, parameters):"""Implement forward propagation for the [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID computationArguments:X -- data, numpy array of shape (input size, number of examples)parameters -- output of initialize_parameters_deep()Returns:AL -- last post-activation valuecaches -- list of caches containing:every cache of linear_relu_forward() (there are L-1 of them, indexed from 0 to L-2)the cache of linear_sigmoid_forward() (there is one, indexed L-1)"""caches = []A = XL = len(parameters) // 2  for l in range(1, L):A_prev = AA, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)],activation="relu")caches.append(cache)AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)],activation="sigmoid")caches.append(cache)return AL, caches

5. 损失函数

def compute_cost(AL, Y):"""Implement the cost function defined by equation (7).Arguments:AL -- probability vector corresponding to your label predictions, shape (1, number of examples)Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples)Returns:cost -- cross-entropy cost"""m = Y.shape[1]cost = - np.sum(Y * np.log(AL) + (1 - Y) * np.log(1 - AL)) / mcost = np.squeeze(cost)  # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).return cost

6. 反向传播

6.1 线性反向传播

def linear_backward(dZ, cache):"""Implement the linear portion of backward propagation for a single layer (layer l)Arguments:dZ -- Gradient of the cost with respect to the linear output (of current layer l)cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layerReturns:dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prevdW -- Gradient of the cost with respect to W (current layer l), same shape as Wdb -- Gradient of the cost with respect to b (current layer l), same shape as b"""A_prev, W, b = cachem = A_prev.shape[1]dW = np.dot(dZ,A_prev.T)/mdb = np.sum(dZ, axis = 1).reshape(dZ.shape[0], 1) / mdA_prev = np.dot(W.T,dZ)return dA_prev, dW, db

6.2 反向激活函数

# GRADED FUNCTION: linear_activation_backwarddef linear_activation_backward(dA, cache, activation):"""Implement the backward propagation for the LINEAR->ACTIVATION layer.Arguments:dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficientlyactivation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"Returns:dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prevdW -- Gradient of the cost with respect to W (current layer l), same shape as Wdb -- Gradient of the cost with respect to b (current layer l), same shape as b"""linear_cache, activation_cache = cacheif activation == "relu":dZ = relu_backward(dA, activation_cache)dA_prev, dW, db = linear_backward(dZ, linear_cache)elif activation == "sigmoid":dZ = sigmoid_backward(dA, activation_cache)dA_prev, dW, db = linear_backward(dZ, linear_cache)return dA_prev, dW, db

6.3 L层神经网络 *

# GRADED FUNCTION: L_model_backwarddef L_model_backward(AL, Y, caches):"""Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID groupArguments:AL -- probability vector, output of the forward propagation (L_model_forward())Y -- true "label" vector (containing 0 if non-cat, 1 if cat)caches -- list of caches containing:every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2)the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1])Returns:grads -- A dictionary with the gradientsgrads["dA" + str(l)] = ...grads["dW" + str(l)] = ...grads["db" + str(l)] = ..."""grads = {}L = len(caches)  # the number of layersm = AL.shape[1]Y = Y.reshape(AL.shape)  # after this line, Y is the same shape as ALdAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))current_cache = caches[L - 1]grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, 'sigmoid')for l in reversed(range(L - 1)):current_cache = caches[l]dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, 'relu')grads["dA" + str(l)] = dA_prev_tempgrads["dW" + str(l + 1)] = dW_tempgrads["db" + str(l + 1)] = db_tempreturn grads

6.4 更新参数


def update_parameters(parameters, grads, learning_rate):"""Update parameters using gradient descentArguments:parameters -- python dictionary containing your parameters grads -- python dictionary containing your gradients, output of L_model_backwardReturns:parameters -- python dictionary containing your updated parameters parameters["W" + str(l)] = ... parameters["b" + str(l)] = ..."""L = len(parameters) // 2  # number of layers in the neural networkfor l in range(L):parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]return parameters

7 - Conclusion

Congrats on implementing all the functions required for building a deep neural network!

We know it was a long assignment but going forward it will only get better. The next part of the assignment is easier.

In the next assignment you will put all these together to build two models:

  • A two-layer neural network
  • An L-layer neural network

You will in fact use these models to classify cat vs non-cat images!


# GRADED FUNCTION: L_layer_modeldef L_layer_model(X, Y, layers_dims, learning_rate=0.0075, num_iterations=3000, print_cost=False):  #lr was 0.009"""Implements a L-layer neural network: [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID.Arguments:X -- data, numpy array of shape (number of examples, num_px * num_px * 3)Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)layers_dims -- list containing the input size and each layer size, of length (number of layers + 1).learning_rate -- learning rate of the gradient descent update rulenum_iterations -- number of iterations of the optimization loopprint_cost -- if True, it prints the cost every 100 stepsReturns:parameters -- parameters learnt by the model. They can then be used to predict."""np.random.seed(1)costs = []  # keep track of cost# Parameters initialization.### START CODE HERE ###parameters = initialize_parameters_deep(layers_dims)### END CODE HERE #### Loop (gradient descent)for i in range(0, num_iterations):# Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID.### START CODE HERE ### (≈ 1 line of code)AL, cache = L_model_forward(X, parameters)### END CODE HERE #### Compute cost.### START CODE HERE ### (≈ 1 line of code)cost = compute_cost(AL, Y)### END CODE HERE #### Backward propagation.### START CODE HERE ### (≈ 1 line of code)grads = L_model_backward(AL, Y, cache)### END CODE HERE #### Update parameters.### START CODE HERE ### (≈ 1 line of code)parameters = update_parameters(parameters,grads,learning_rate)### END CODE HERE #### Print the cost every 100 training exampleif print_cost and i % 100 == 0:print("Cost after iteration %i: %f" % (i, cost))if print_cost and i % 100 == 0:costs.append(cost)# plot the costplt.plot(np.squeeze(costs))plt.ylabel('cost')plt.xlabel('iterations (per tens)')plt.title("Learning rate =" + str(learning_rate))plt.show()return parameters

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。
如若转载,请注明出处:http://www.pswp.cn/news/915391.shtml
繁体地址,请注明出处:http://hk.pswp.cn/news/915391.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

前端流式渲染流式SSR详解

以下是关于前端流式渲染及流式SSR(Server-Side Rendering)的详细解析,结合核心原理、技术实现、优化策略及实际应用场景展开说明:⚙️ 一、流式渲染基础原理 核心概念 ◦ 流式渲染:数据通过分块传输(Chunke…

Redis通用常见命令(含面试题)

核心命令get 根据key取valueset 把key和vlaue存入进去key和value本事上都是字符串,但在操作的时候可以不用加上引号""Redis作为键值对的结构,key固定就是字符串,value实际上会有多种类型(字符串哈希表,列表&…

react/vue vite ts项目中,自动引入路由文件、 import.meta.glob动态引入路由 无需手动引入

utils/autoRouteHelper.ts // src/utils/autoRouteHelper.ts import { lazy } from "react"; import withLoading from "/components/router/withLoading";/** 自动生成某个文件夹下的子路由 */ interface RouteItem {path: string;element?: any;childre…

Linux简单了解历史

一、引言Linux是计算机经久不衰的一个计算机操作系统,在那个unix、苹果macOS、微软Window神仙打架的年代拼出自己的一席之地。最初的Linux完全就是一个unix的一个翻版,并且最开始的版本(0.01)就是一个差不多一万行简单到不能再简单的版本。那现在Linux是…

lua(xlua)基础知识点记录二

1. 关于lua函数传参参数在lua中给function传递参数的时候一般分为两种情况:值传递和引用传递值传递:值传递:数字、字符串、布尔值、nil等基本类型通过值传递。函数内部接收的是外部变量的副本,修改副本不会影响原始变量。 虽然我们…

分治算法---归并

1、排序数组 class Solution {vector<int> tmp; public:vector<int> sortArray(vector<int>& nums) {tmp.resize(nums.size());mergeSort(nums,0,nums.size() - 1);return nums;}void mergeSort(vector<int>& nums, int left , int right){if…

《计算机网络》实验报告三 UDP协议分析

目 录 1、实验目的 2、实验环境 3、实验内容 3.1 DNS查询UDP数据分析 3.2 QQ通信UDP数据分析 4、实验结果与分析 4.1 DNS查询UDP数据分析 4.2 QQ通信UDP数据分析 4.3 根据捕获的数据包&#xff0c;分析UDP的报文结构&#xff0c;将UDP协议中个字段名&#xff0c;字段…

Mysql 学习总结(90)—— Mysql 8.0 25 条性能优化实战指南

1. 内存配置优化 # my.cnf 关键内存参数 innodb_buffer_pool_size = 8G # 建议设置为物理内存的70-80% innodb_log_buffer_size = 64M # 日志缓冲区大小 query_cache_size = 0 # MySQL 8.0已移除,确保关闭 tmp_table_size = 256M # 临时表大小 max_…

嵌入式通信DQ单总线协议及UART(一)

文章目录一、DS18B20--DQ单总线1.1 单总线时序结构分析1.1.1 初始化&#xff1a;1.1.2 发送一位1.1.3 接收一位1.1.5 发送字节1.1.6 操作流程1.1.7 数据帧的理解1.1.8 数据帧的理解二、UART2.1 同步通信和异步通信2.2 双工通信2.3 串行通信常用数据校验方式2.3.1 奇偶检验2.3.2…

2025年SEVC SCI2区,利用增强粒子群算法(MR-MPSO)优化MapReduce效率和降低复杂性,深度解析+性能实测

目录1.摘要2.MapReduce-Modified Particle Swarm Optimization (MR-MPSO)3.结果展示4.参考文献5.算法辅导应用定制读者交流1.摘要 大数据的迅猛增长带来了严峻的数据管理挑战&#xff0c;尤其是在数据分布不均的庞大数据库中。由于这种不匹配&#xff0c;传统软件系统的效率大…

10-day07文本分类

文本分类使用场景文本分类任务 文本分类-机器学习贝叶斯算法应用在NLP中的应用 用贝叶斯公式处理文本分类任务 一个合理假设&#xff1a; 文本属于哪个类别&#xff0c;与文本中包含哪些词相关 任务&#xff1a; 知道文本中有哪些词&#xff0c;预测文本属于某类别的概率 贝叶斯…

Apache SeaTunnel详解与部署(最新版本2.3.11)

目录 一、概述 1.1、软件介绍 1.2、解决问题​ 1.3、软件特性​ 1.4、使用用户 1.5、产品对比 二、架构 2.1、运行流程 2.2、连接器​ 2.3、引擎 2.3.1、设计理念 2.3.2、集群管理​ 2.3.3、核心功能​ 2.3.4、引擎对比 三、软件部署 3.1、Docker部署 3.2、发…

pytorch | minist手写数据集

一、神经网络神经网络&#xff08;Neural Network&#xff09;是一种受生物神经系统&#xff08;尤其是大脑神经元连接方式&#xff09;启发的机器学习模型&#xff0c;是深度学习的核心基础。它通过模拟大量 “人工神经元” 的互联结构&#xff0c;学习数据中的复杂模式和规律…

[C/C++安全编程]_[中级]_[如何避免出现野指针]

场景 在Rust里不会出现野指针的情况&#xff0c;那么在C里能避免吗&#xff1f; 说明 野指针是指指向无效内存地址的指针&#xff0c;访问它会导致未定义行为&#xff0c;可能引发程序崩溃、数据损坏或安全漏洞。它是 C/C 等手动内存管理语言中的常见错误&#xff0c;而 Rust…

机器学习基础:从数据到智能的入门指南

一、何谓机器学习​ 在我们的日常生活中&#xff0c;机器学习的身影无处不在。当你打开购物软件&#xff0c;它总能精准推荐你可能喜欢的商品&#xff1b;当你解锁手机&#xff0c;人脸识别瞬间完成&#xff1b;当你使用语音助手&#xff0c;它能准确理解你的指令。这些背后&a…

steam游戏搬砖项目超完整版实操分享

大家好&#xff0c;我是阿阳&#xff0c;今天再次最详细的给大家综合全面的分析讲解下steam搬砖&#xff0c;可以点击后面跳转往期文章了再次解下阿阳网客&#xff1a;关于steam游戏搬砖项目&#xff0c;我想说&#xff01;最早是21年5月份公开朋友圈&#xff0c;初次接触是在2…

vue2 面试题及详细答案150道(21 - 40)

《前后端面试题》专栏集合了前后端各个知识模块的面试题&#xff0c;包括html&#xff0c;javascript&#xff0c;css&#xff0c;vue&#xff0c;react&#xff0c;java&#xff0c;Openlayers&#xff0c;leaflet&#xff0c;cesium&#xff0c;mapboxGL&#xff0c;threejs&…

原生前端JavaScript/CSS与现代框架(Vue、React)的联系与区别(详细版)

原生前端JavaScript/CSS与现代框架&#xff08;Vue、React&#xff09;的联系与区别&#xff0c;以及运行环境和条件 目录 引言原生前端技术概述 JavaScript基础CSS基础 现代框架概述 Vue.jsReact 联系与相似性主要区别对比运行环境和条件选择建议总结 引言 在现代Web开发中&…

基于机器视觉的迈克耳孙干涉环自动计数系统设计与实现

基于机器视觉的迈克耳孙干涉环自动计数系统设计与实现 前些天发现了一个巨牛的人工智能学习网站&#xff0c;通俗易懂&#xff0c;风趣幽默&#xff0c;忍不住分享一下给大家。点击跳转到网站。 摘要 本文设计并实现了一种基于机器视觉的迈克耳孙干涉环自动计数系统。该系统…

设计模式笔记(1)简单工厂模式

最近在看程杰的《大话设计模式》&#xff0c;在这里做一点笔记。 书中主要有两个角色&#xff1a; 小菜&#xff1a;初学者&#xff0c;学生&#xff1b; 大鸟&#xff1a;小菜表哥&#xff0c;大佬。 也按图中的对话形式 01 简单工厂模式 要求&#xff1a;使用c、Java、C#或VB…