张量基础

张量（Tensor）是 TensorFlow 中的核心数据结构。本章将详细介绍张量的创建、操作和运算。

什么是张量？

张量是多维数组的泛化形式。根据维度的不同，张量有不同的名称：

维度	名称	示例
0	标量（Scalar）	`5`
1	向量（Vector）	`[1, 2, 3]`
2	矩阵（Matrix）	`[[1, 2], [3, 4]]`
3+	张量（Tensor）	`[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]`

创建张量

使用 tf.constant

tf.constant 用于创建不可变的张量：

import tensorflow as tf

# 标量
scalar = tf.constant(5)
print(scalar)  # tf.Tensor(5, shape=(), dtype=int32)

# 向量
vector = tf.constant([1, 2, 3])
print(vector)  # tf.Tensor([1 2 3], shape=(3,), dtype=int32)

# 矩阵
matrix = tf.constant([[1, 2], [3, 4]])
print(matrix)
# tf.Tensor(
# [[1 2]
#  [3 4]], shape=(2, 2), dtype=int32)

# 指定数据类型
float_tensor = tf.constant([1, 2, 3], dtype=tf.float32)
print(float_tensor.dtype)  # <dtype: 'float32'>

# 指定形状创建
tensor = tf.constant(0, shape=(2, 3))
print(tensor)
# tf.Tensor(
# [[0 0 0]
#  [0 0 0]], shape=(2, 3), dtype=int32)

使用 tf.Variable

tf.Variable 用于创建可变的张量，通常用于存储模型参数：

# 创建变量
var = tf.Variable([1.0, 2.0, 3.0])
print(var)
# <tf.Variable 'Variable:0' shape=(3,) dtype=float32, numpy=array([1., 2., 3.], dtype=float32)>

# 修改变量值
var.assign([4.0, 5.0, 6.0])
print(var)  # [4. 5. 6.]

# 增量修改
var.assign_add([1.0, 1.0, 1.0])
print(var)  # [5. 6. 7.]

var.assign_sub([1.0, 1.0, 1.0])
print(var)  # [4. 5. 6.]

# 变量的属性
print(var.shape)   # (3,)
print(var.dtype)   # <dtype: 'float32'>
print(var.numpy()) # [4. 5. 6.]

特殊张量

# 全零张量
zeros = tf.zeros([2, 3])
print(zeros)
# [[0. 0. 0.]
#  [0. 0. 0.]]

# 全一张量
ones = tf.ones([2, 3])
print(ones)
# [[1. 1. 1.]
#  [1. 1. 1.]]

# 全填充张量
filled = tf.fill([2, 3], 9)
print(filled)
# [[9 9 9]
#  [9 9 9]]

# 单位矩阵
identity = tf.eye(3)
print(identity)
# [[1. 0. 0.]
#  [0. 1. 0.]
#  [0. 0. 1.]]

# 随机张量（均匀分布）
random_uniform = tf.random.uniform([2, 3], minval=0, maxval=1)
print(random_uniform)

# 随机张量（正态分布）
random_normal = tf.random.normal([2, 3], mean=0, stddev=1)
print(random_normal)

# 截断正态分布（去除过大或过小的值）
truncated = tf.random.truncated_normal([2, 3], mean=0, stddev=1)
print(truncated)

从其他数据创建

import numpy as np

# 从 NumPy 数组创建
np_array = np.array([1, 2, 3])
tensor = tf.convert_to_tensor(np_array)
print(tensor)  # [1 2 3]

# 从列表创建
tensor = tf.convert_to_tensor([[1, 2], [3, 4]])
print(tensor)

# 从张量创建新张量
original = tf.constant([1, 2, 3])
new_tensor = tf.convert_to_tensor(original, dtype=tf.float32)
print(new_tensor)  # [1. 2. 3.]

张量运算

基本数学运算

a = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
b = tf.constant([[5, 6], [7, 8]], dtype=tf.float32)

# 加法
print(a + b)
# [[ 6.  8.]
#  [10. 12.]]

# 减法
print(a - b)
# [[-4. -4.]
#  [-4. -4.]]

# 乘法（逐元素）
print(a * b)
# [[ 5. 12.]
#  [21. 32.]]

# 除法
print(a / b)
# [[0.2        0.33333333]
#  [0.42857143 0.5       ]]

# 整除
print(a // b)
# [[0. 0.]
#  [0. 0.]]

# 取余
print(a % b)
# [[1. 2.]
#  [3. 4.]]

# 幂运算
print(tf.pow(a, 2))
# [[ 1.  4.]
#  [ 9. 16.]]

# 平方根
print(tf.sqrt(a))
# [[1.        1.4142135]
#  [1.7320508 2.       ]]

矩阵运算

a = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
b = tf.constant([[5, 6], [7, 8]], dtype=tf.float32)

# 矩阵乘法
print(tf.matmul(a, b))
# [[19. 22.]
#  [43. 50.]]

# 使用 @ 运算符
print(a @ b)
# [[19. 22.]
#  [43. 50.]]

# 转置
print(tf.transpose(a))
# [[1. 3.]
#  [2. 4.]]

# 逆矩阵
print(tf.linalg.inv(a))
# [[-2.   1. ]
#  [ 1.5 -0.5]]

# 行列式
print(tf.linalg.det(a))  # -2.0

# 矩阵对角线
print(tf.linalg.diag([1, 2, 3]))
# [[1. 0. 0.]
#  [0. 2. 0.]
#  [0. 0. 3.]]

# 特征值和特征向量
eigenvalues, eigenvectors = tf.linalg.eigh(a)
print("特征值:", eigenvalues)
print("特征向量:", eigenvectors)

归约运算

归约运算将张量的某些维度压缩，得到一个标量或低维张量：

x = tf.constant([[1, 2, 3], [4, 5, 6]], dtype=tf.float32)

# 求和
print(tf.reduce_sum(x))       # 21.0（所有元素求和）
print(tf.reduce_sum(x, axis=0))  # [5. 7. 9.]（按列求和）
print(tf.reduce_sum(x, axis=1))  # [ 6. 15.]（按行求和）

# 均值
print(tf.reduce_mean(x))      # 3.5
print(tf.reduce_mean(x, axis=0))  # [2.5 3.5 4.5]

# 最大值、最小值
print(tf.reduce_max(x))       # 6.0
print(tf.reduce_min(x))       # 1.0

# 乘积
print(tf.reduce_prod(x))      # 720.0

# 逻辑运算
bool_tensor = tf.constant([True, False, True])
print(tf.reduce_all(bool_tensor))  # False（逻辑与）
print(tf.reduce_any(bool_tensor))  # True（逻辑或）

# 保持维度
print(tf.reduce_sum(x, axis=0, keepdims=True))
# [[5. 7. 9.]]（保持二维）

广播机制

当两个张量形状不同时，TensorFlow 会自动进行广播：

# 标量与张量
a = tf.constant([1, 2, 3])
b = tf.constant(2)
print(a + b)  # [3 4 5]

# 向量与矩阵
a = tf.constant([[1, 2, 3], [4, 5, 6]])
b = tf.constant([10, 20, 30])
print(a + b)
# [[11 22 33]
#  [14 25 36]]

# 不同形状的向量
a = tf.constant([[1], [2], [3]])  # shape: (3, 1)
b = tf.constant([10, 20])          # shape: (2,)
print(a + b)
# [[11 21]
#  [12 22]
#  [13 23]]

广播规则：

从最右边的维度开始比较
维度大小相等，或其中一个为 1，或其中一个不存在
缺失的维度会被扩展

张量操作

索引和切片

x = tf.constant([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

# 基本索引
print(x[0])      # [1 2 3]（第一行）
print(x[0, 1])   # 2（第一行第二列）
print(x[-1])     # [7 8 9]（最后一行）

# 切片
print(x[0:2])    # [[1 2 3], [4 5 6]]（前两行）
print(x[:, 0])   # [1 4 7]（第一列）
print(x[1:, 1:]) # [[5 6], [8 9]]
print(x[::2])    # [[1 2 3], [7 8 9]]（每隔一行）
print(x[:, ::-1]) # [[3 2 1], [6 5 4], [9 8 7]]（列反转）

形状变换

x = tf.constant([[1, 2, 3], [4, 5, 6]])

# reshape
print(tf.reshape(x, [3, 2]))
# [[1 2]
#  [3 4]
#  [5 6]]

print(tf.reshape(x, [-1]))  # 展平
# [1 2 3 4 5 6]

print(tf.reshape(x, [-1, 2]))  # -1 表示自动计算
# [[1 2]
#  [3 4]
#  [5 6]]

# 转置
print(tf.transpose(x))
# [[1 4]
#  [2 5]
#  [3 6]]

# 扩展维度
print(tf.expand_dims(x, 0))  # 在位置 0 扩展
# shape: (1, 2, 3)

print(tf.expand_dims(x, -1))  # 在最后扩展
# shape: (2, 3, 1)

# 压缩维度
x = tf.constant([[[1, 2, 3]]])  # shape: (1, 1, 3)
print(tf.squeeze(x))  # [1 2 3]

拼接和分割

a = tf.constant([[1, 2], [3, 4]])
b = tf.constant([[5, 6], [7, 8]])

# 拼接
print(tf.concat([a, b], axis=0))  # 按行拼接
# [[1 2]
#  [3 4]
#  [5 6]
#  [7 8]]

print(tf.concat([a, b], axis=1))  # 按列拼接
# [[1 2 5 6]
#  [3 4 7 8]]

# 堆叠（增加新维度）
print(tf.stack([a, b], axis=0))  # shape: (2, 2, 2)
# [[[1 2]
#   [3 4]]
#  [[5 6]
#   [7 8]]]

# 分割
x = tf.constant([1, 2, 3, 4, 5, 6])
print(tf.split(x, 3))  # 分成 3 份
# [<tf.Tensor: shape=(2,), ...>, <tf.Tensor: shape=(2,), ...>, <tf.Tensor: shape=(2,), ...>]

print(tf.split(x, [2, 4]))  # 按指定大小分割
# 第一份 2 个元素，第二份 4 个元素

其他常用操作

x = tf.constant([3, 1, 4, 1, 5, 9, 2, 6])

# 排序
print(tf.sort(x))  # [1 1 2 3 4 5 6 9]
print(tf.argsort(x))  # [1 3 6 0 2 4 7 5]（排序后的索引）

# 查找最大/最小值索引
print(tf.argmax(x))  # 5（最大值索引）
print(tf.argmin(x))  # 1（最小值索引）

# 去重
print(tf.unique(x))
# Unique(y=<tf.Tensor: ...>, idx=<tf.Tensor: ...>)

# 条件选择
a = tf.constant([1, 2, 3])
b = tf.constant([4, 5, 6])
condition = tf.constant([True, False, True])
print(tf.where(condition, a, b))  # [1 5 3]

# 填充
x = tf.constant([[1, 2], [3, 4]])
print(tf.pad(x, [[1, 1], [2, 2]]))
# [[0 0 0 0 0 0]
#  [0 0 1 2 0 0]
#  [0 0 3 4 0 0]
#  [0 0 0 0 0 0]]

数据类型

TensorFlow 支持多种数据类型：

# 整数类型
tf.int8, tf.int16, tf.int32, tf.int64
tf.uint8, tf.uint16, tf.uint32, tf.uint64

# 浮点类型
tf.float16, tf.float32, tf.float64

# 复数类型
tf.complex64, tf.complex128

# 布尔类型
tf.bool

# 字符串类型
tf.string

# 类型转换
x = tf.constant([1, 2, 3], dtype=tf.int32)
y = tf.cast(x, tf.float32)
print(y.dtype)  # <dtype: 'float32'>

张量与 NumPy 互操作

import numpy as np

# TensorFlow 张量转 NumPy
tensor = tf.constant([1, 2, 3])
np_array = tensor.numpy()
print(type(np_array))  # <class 'numpy.ndarray'>

# NumPy 数组转 TensorFlow 张量
np_array = np.array([1, 2, 3])
tensor = tf.convert_to_tensor(np_array)
print(type(tensor))  # <class 'tensorflow.python.framework.ops.EagerTensor'>

# TensorFlow 操作可以接受 NumPy 数组
result = tf.add(np.array([1, 2]), np.array([3, 4]))
print(result)  # [4 6]

小结

本章介绍了 TensorFlow 中张量的基本操作，包括：

张量的创建方式（constant、Variable、特殊张量）
张量的数学运算和矩阵运算
归约运算和广播机制
张量的索引、切片和形状变换
张量的拼接、分割和其他操作
数据类型转换和 NumPy 互操作

熟练掌握张量操作是使用 TensorFlow 进行深度学习的基础。下一章我们将学习自动微分和梯度计算。

什么是张量？​

创建张量​

使用 tf.constant​

使用 tf.Variable​

特殊张量​

从其他数据创建​

张量运算​

基本数学运算​

矩阵运算​

归约运算​

广播机制​

张量操作​

索引和切片​

形状变换​

拼接和分割​

其他常用操作​

数据类型​

张量与 NumPy 互操作​

小结​