优化理论¶

优化理论是机器人学中寻找最优解的数学基础。本章将讲解机器人学所需的优化知识。

学习目标¶

完成本章后，你将能够：

理解优化问题的基本概念
掌握无约束优化方法
理解有约束优化方法
应用优化理论解决机器人学问题

1. 优化基础¶

1.1 优化问题定义¶

一般优化问题：
minimize    f(x)
subject to  gᵢ(x) ≤ 0, i = 1, ..., m
            hⱼ(x) = 0, j = 1, ..., p

其中：
- f(x): 目标函数 (objective function)
- gᵢ(x): 不等式约束 (inequality constraints)
- hⱼ(x): 等式约束 (equality constraints)
- x: 决策变量 (decision variables)

1.2 优化问题分类¶

"""
优化问题分类：

1. 无约束优化 vs 有约束优化
2. 线性优化 vs 非线性优化
3. 凸优化 vs 非凸优化
4. 连续优化 vs 离散优化
5. 单目标优化 vs 多目标优化
"""

1.3 最优性条件¶

import numpy as np

def gradient(f, x, h=1e-5):
    """数值梯度"""
    n = len(x)
    grad = np.zeros(n)

    for i in range(n):
        x_plus = x.copy()
        x_minus = x.copy()
        x_plus[i] += h
        x_minus[i] -= h

        grad[i] = (f(x_plus) - f(x_minus)) / (2 * h)

    return grad

def hessian(f, x, h=1e-5):
    """数值海森矩阵"""
    n = len(x)
    H = np.zeros((n, n))

    for i in range(n):
        for j in range(n):
            x_pp = x.copy()
            x_pm = x.copy()
            x_mp = x.copy()
            x_mm = x.copy()

            x_pp[i] += h
            x_pp[j] += h

            x_pm[i] += h
            x_pm[j] -= h

            x_mp[i] -= h
            x_mp[j] += h

            x_mm[i] -= h
            x_mm[j] -= h

            H[i, j] = (f(x_pp) - f(x_pm) - f(x_mp) + f(x_mm)) / (4 * h**2)

    return H

# 一阶最优性条件：梯度为零
def check_first_order(f, x, tol=1e-6):
    """检查一阶最优性条件"""
    grad = gradient(f, x)
    return np.linalg.norm(grad) < tol

# 二阶最优性条件：海森矩阵正定
def check_second_order(f, x, tol=1e-6):
    """检查二阶最优性条件"""
    H = hessian(f, x)
    eigenvalues = np.linalg.eigvalsh(H)
    return np.all(eigenvalues > -tol)

# 示例
def f(x):
    return x[0]**2 + x[1]**2

x = np.array([0.0, 0.0])
print("一阶最优性条件:", check_first_order(f, x))
print("二阶最优性条件:", check_second_order(f, x))

2. 无约束优化¶

2.1 梯度下降法¶

import numpy as np

def gradient_descent(f, x0, learning_rate=0.01, max_iter=1000, tol=1e-6):
    """
    梯度下降法

    参数：
    f: 目标函数
    x0: 初始点
    learning_rate: 学习率
    max_iter: 最大迭代次数
    tol: 收敛阈值

    返回：
    x: 最优解
    history: 迭代历史
    """
    x = x0.copy()
    history = [x.copy()]

    for i in range(max_iter):
        # 计算梯度
        grad = gradient(f, x)

        # 更新
        x_new = x - learning_rate * grad

        # 记录历史
        history.append(x_new.copy())

        # 检查收敛
        if np.linalg.norm(x_new - x) < tol:
            print(f"收敛于第 {i+1} 次迭代")
            break

        x = x_new

    return x, history

# 示例：最小化 f(x, y) = x² + y²
def f(x):
    return x[0]**2 + x[1]**2

x0 = np.array([2.0, 3.0])
x_opt, history = gradient_descent(f, x0, learning_rate=0.1)

print(f"最优解: {x_opt}")
print(f"最优值: {f(x_opt):.6f}")

2.2 牛顿法¶

import numpy as np

def newton_method(f, x0, max_iter=100, tol=1e-6):
    """
    牛顿法

    参数：
    f: 目标函数
    x0: 初始点
    max_iter: 最大迭代次数
    tol: 收敛阈值

    返回：
    x: 最优解
    history: 迭代历史
    """
    x = x0.copy()
    history = [x.copy()]

    for i in range(max_iter):
        # 计算梯度和海森矩阵
        grad = gradient(f, x)
        H = hessian(f, x)

        # 计算牛顿方向
        try:
            d = -np.linalg.solve(H, grad)
        except np.linalg.LinAlgError:
            # 海森矩阵奇异，使用梯度下降
            d = -grad

        # 线搜索（简化版）
        alpha = 1.0

        # 更新
        x_new = x + alpha * d

        # 记录历史
        history.append(x_new.copy())

        # 检查收敛
        if np.linalg.norm(x_new - x) < tol:
            print(f"收敛于第 {i+1} 次迭代")
            break

        x = x_new

    return x, history

# 示例
def f(x):
    return x[0]**2 + x[1]**2

x0 = np.array([2.0, 3.0])
x_opt, history = newton_method(f, x0)

print(f"最优解: {x_opt}")
print(f"最优值: {f(x_opt):.6f}")

2.3 拟牛顿法 (BFGS)¶

import numpy as np

def bfgs(f, x0, max_iter=100, tol=1e-6):
    """
    BFGS 拟牛顿法

    参数：
    f: 目标函数
    x0: 初始点
    max_iter: 最大迭代次数
    tol: 收敛阈值

    返回：
    x: 最优解
    history: 迭代历史
    """
    n = len(x0)
    x = x0.copy()
    H = np.eye(n)  # 初始近似海森矩阵
    history = [x.copy()]

    grad = gradient(f, x)

    for i in range(max_iter):
        # 计算搜索方向
        d = -H @ grad

        # 线搜索（简化版）
        alpha = 1.0

        # 更新
        x_new = x + alpha * d
        grad_new = gradient(f, x_new)

        # 计算 s 和 y
        s = x_new - x
        y = grad_new - grad

        # BFGS 更新
        rho = 1.0 / (y @ s)
        I = np.eye(n)
        H = (I - rho * np.outer(s, y)) @ H @ (I - rho * np.outer(y, s)) + rho * np.outer(s, s)

        # 记录历史
        history.append(x_new.copy())

        # 检查收敛
        if np.linalg.norm(x_new - x) < tol:
            print(f"收敛于第 {i+1} 次迭代")
            break

        x = x_new
        grad = grad_new

    return x, history

# 示例
def f(x):
    return x[0]**2 + x[1]**2

x0 = np.array([2.0, 3.0])
x_opt, history = bfgs(f, x0)

print(f"最优解: {x_opt}")
print(f"最优值: {f(x_opt):.6f}")

2.4 共轭梯度法¶

import numpy as np

def conjugate_gradient(f, x0, max_iter=100, tol=1e-6):
    """
    共轭梯度法

    参数：
    f: 目标函数
    x0: 初始点
    max_iter: 最大迭代次数
    tol: 收敛阈值

    返回：
    x: 最优解
    history: 迭代历史
    """
    x = x0.copy()
    grad = gradient(f, x)
    d = -grad  # 初始搜索方向
    history = [x.copy()]

    for i in range(max_iter):
        # 线搜索（简化版）
        alpha = 1.0

        # 更新
        x_new = x + alpha * d
        grad_new = gradient(f, x_new)

        # 计算 beta (Fletcher-Reeves)
        beta = np.dot(grad_new, grad_new) / np.dot(grad, grad)

        # 更新搜索方向
        d = -grad_new + beta * d

        # 记录历史
        history.append(x_new.copy())

        # 检查收敛
        if np.linalg.norm(x_new - x) < tol:
            print(f"收敛于第 {i+1} 次迭代")
            break

        x = x_new
        grad = grad_new

    return x, history

# 示例
def f(x):
    return x[0]**2 + x[1]**2

x0 = np.array([2.0, 3.0])
x_opt, history = conjugate_gradient(f, x0)

print(f"最优解: {x_opt}")
print(f"最优值: {f(x_opt):.6f}")

3. 有约束优化¶

3.1 拉格朗日乘数法¶

import numpy as np
from scipy.optimize import minimize

def lagrangian_method(f, g, x0, method='SLSQP'):
    """
    拉格朗日乘数法

    参数：
    f: 目标函数
    g: 约束函数列表 (等式约束)
    x0: 初始点
    method: 优化方法

    返回：
    result: 优化结果
    """
    # 定义约束
    constraints = [{'type': 'eq', 'fun': gi} for gi in g]

    # 优化
    result = minimize(f, x0, method=method, constraints=constraints)

    return result

# 示例：最小化 f(x,y) = x² + y²，约束 x + y = 1
def f(x):
    return x[0]**2 + x[1]**2

def g(x):
    return x[0] + x[1] - 1

x0 = np.array([0.5, 0.5])
result = lagrangian_method(f, [g], x0)

print(f"最优解: {result.x}")
print(f"最优值: {result.fun:.6f}")
print(f"约束满足: {g(result.x):.6f}")

3.2 罚函数法¶

import numpy as np

def penalty_method(f, g, x0, penalty_weight=1.0, max_iter=100, tol=1e-6):
    """
    罚函数法

    参数：
    f: 目标函数
    g: 约束函数列表
    x0: 初始点
    penalty_weight: 罚函数权重
    max_iter: 最大迭代次数
    tol: 收敛阈值

    返回：
    x: 最优解
    history: 迭代历史
    """
    x = x0.copy()
    history = [x.copy()]

    for i in range(max_iter):
        # 定义罚函数
        def penalty_function(x):
            penalty = 0
            for gi in g:
                penalty += max(0, gi(x)) ** 2
            return f(x) + penalty_weight * penalty

        # 优化罚函数
        result = minimize(penalty_function, x, method='BFGS')
        x_new = result.x

        # 记录历史
        history.append(x_new.copy())

        # 检查收敛
        if np.linalg.norm(x_new - x) < tol:
            print(f"收敛于第 {i+1} 次迭代")
            break

        # 增加罚函数权重
        penalty_weight *= 2

        x = x_new

    return x, history

# 示例
def f(x):
    return x[0]**2 + x[1]**2

def g(x):
    return x[0] + x[1] - 1  # 约束: x + y >= 1

x0 = np.array([0.0, 0.0])
x_opt, history = penalty_method(f, [g], x0)

print(f"最优解: {x_opt}")
print(f"最优值: {f(x_opt):.6f}")
print(f"约束满足: {g(x_opt):.6f}")

3.3 内点法¶

import numpy as np
from scipy.optimize import minimize

def interior_point_method(f, g_ineq, g_eq, x0, barrier_weight=1.0, 
                          max_iter=100, tol=1e-6):
    """
    内点法

    参数：
    f: 目标函数
    g_ineq: 不等式约束函数列表
    g_eq: 等式约束函数列表
    x0: 初始点
    barrier_weight: 障碍函数权重
    max_iter: 最大迭代次数
    tol: 收敛阈值

    返回：
    x: 最优解
    history: 迭代历史
    """
    x = x0.copy()
    history = [x.copy()]

    for i in range(max_iter):
        # 定义障碍函数
        def barrier_function(x):
            barrier = 0
            for gi in g_ineq:
                if gi(x) >= 0:
                    return float('inf')  # 不可行
                barrier -= np.log(-gi(x))
            return f(x) + barrier_weight * barrier

        # 优化障碍函数
        constraints = [{'type': 'eq', 'fun': gi} for gi in g_eq]
        result = minimize(barrier_function, x, method='SLSQP', 
                         constraints=constraints)
        x_new = result.x

        # 记录历史
        history.append(x_new.copy())

        # 检查收敛
        if np.linalg.norm(x_new - x) < tol:
            print(f"收敛于第 {i+1} 次迭代")
            break

        # 减少障碍函数权重
        barrier_weight *= 0.5

        x = x_new

    return x, history

# 示例
def f(x):
    return x[0]**2 + x[1]**2

def g_ineq(x):
    return x[0] + x[1] - 1  # 约束: x + y <= 1

def g_eq(x):
    return 0  # 无等式约束

x0 = np.array([0.3, 0.3])
x_opt, history = interior_point_method(f, [g_ineq], [g_eq], x0)

print(f"最优解: {x_opt}")
print(f"最优值: {f(x_opt):.6f}")
print(f"约束满足: {g_ineq(x_opt):.6f}")

4. 线性规划¶

4.1 单纯形法¶

import numpy as np
from scipy.optimize import linprog

def linear_programming(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None, 
                       bounds=None):
    """
    线性规划

    参数：
    c: 目标函数系数
    A_ub: 不等式约束矩阵
    b_ub: 不等式约束向量
    A_eq: 等式约束矩阵
    b_eq: 等式约束向量
    bounds: 变量边界

    返回：
    result: 优化结果
    """
    result = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, 
                     bounds=bounds, method='highs')

    return result

# 示例：最小化 f(x,y) = -x - 2y，约束 x + y <= 4, x + 3y <= 6
c = [-1, -2]  # 最小化 -x - 2y
A_ub = [[1, 1], [1, 3]]
b_ub = [4, 6]
bounds = [(0, None), (0, None)]

result = linear_programming(c, A_ub=A_ub, b_ub=b_ub, bounds=bounds)

print(f"最优解: {result.x}")
print(f"最优值: {-result.fun}")  # 注意符号

5. 非线性规划¶

5.1 序列二次规划 (SQP)¶

import numpy as np
from scipy.optimize import minimize

def sequential_quadratic_programming(f, g_ineq, g_eq, x0, 
                                      max_iter=100, tol=1e-6):
    """
    序列二次规划

    参数：
    f: 目标函数
    g_ineq: 不等式约束函数列表
    g_eq: 等式约束函数列表
    x0: 初始点
    max_iter: 最大迭代次数
    tol: 收敛阈值

    返回：
    x: 最优解
    history: 迭代历史
    """
    # 使用 scipy 的 SLSQP 方法
    constraints = []
    for gi in g_ineq:
        constraints.append({'type': 'ineq', 'fun': gi})
    for gi in g_eq:
        constraints.append({'type': 'eq', 'fun': gi})

    result = minimize(f, x0, method='SLSQP', constraints=constraints,
                     options={'maxiter': max_iter, 'ftol': tol})

    return result

# 示例
def f(x):
    return x[0]**2 + x[1]**2

def g_ineq(x):
    return x[0] + x[1] - 1  # x + y >= 1

def g_eq(x):
    return 0  # 无等式约束

x0 = np.array([0.5, 0.5])
result = sequential_quadratic_programming(f, [g_ineq], [g_eq], x0)

print(f"最优解: {result.x}")
print(f"最优值: {result.fun:.6f}")

6. 最优控制¶

6.1 动态规划¶

import numpy as np

def dynamic_programming(states, actions, transition, cost, terminal_cost, 
                        horizon, discount=1.0):
    """
    动态规划

    参数：
    states: 状态空间
    actions: 动作空间
    transition: 状态转移函数
    cost: 阶段成本函数
    terminal_cost: 终端成本函数
    horizon: 时间范围
    discount: 折扣因子

    返回：
    V: 值函数
    policy: 最优策略
    """
    n_states = len(states)
    n_actions = len(actions)

    # 初始化值函数
    V = np.zeros((horizon + 1, n_states))
    policy = np.zeros((horizon, n_states), dtype=int)

    # 终端成本
    for s in range(n_states):
        V[horizon, s] = terminal_cost(states[s])

    # 反向归纳
    for t in range(horizon - 1, -1, -1):
        for s in range(n_states):
            # 计算每个动作的 Q 值
            Q = np.zeros(n_actions)
            for a in range(n_actions):
                # 状态转移
                s_next = transition(states[s], actions[a])
                s_next_idx = np.argmin(np.abs(states - s_next))

                # Q 值
                Q[a] = cost(states[s], actions[a]) + discount * V[t + 1, s_next_idx]

            # 最优值和策略
            V[t, s] = np.min(Q)
            policy[t, s] = np.argmin(Q)

    return V, policy

# 示例：一维导航
states = np.array([0, 1, 2, 3, 4])
actions = np.array([-1, 0, 1])

def transition(s, a):
    return max(0, min(4, s + a))

def cost(s, a):
    return (s - 2)**2 + a**2

def terminal_cost(s):
    return (s - 2)**2

V, policy = dynamic_programming(states, actions, transition, cost, 
                                terminal_cost, horizon=5)

print("值函数:")
print(V)
print("\n最优策略:")
print(policy)

6.2 模型预测控制 (MPC)¶

import numpy as np
from scipy.optimize import minimize

class ModelPredictiveControl:
    """模型预测控制"""

    def __init__(self, horizon, Q, R, Qf, bounds):
        """
        horizon: 预测范围
        Q: 状态权重矩阵
        R: 控制权重矩阵
        Qf: 终端权重矩阵
        bounds: 控制边界
        """
        self.horizon = horizon
        self.Q = Q
        self.R = R
        self.Qf = Qf
        self.bounds = bounds

    def predict(self, x0, u_sequence):
        """预测状态序列"""
        x = x0.copy()
        x_sequence = [x.copy()]

        for u in u_sequence:
            x = self.dynamics(x, u)
            x_sequence.append(x.copy())

        return np.array(x_sequence)

    def dynamics(self, x, u):
        """系统动力学"""
        # 简单的线性动力学
        A = np.array([[1, 1], [0, 1]])
        B = np.array([[0], [1]])
        return A @ x + B @ u

    def cost(self, x0, u_sequence):
        """计算成本"""
        x_sequence = self.predict(x0, u_sequence)

        cost = 0
        for t in range(self.horizon):
            x = x_sequence[t]
            u = u_sequence[t]
            cost += x.T @ self.Q @ x + u.T @ self.R @ u

        # 终端成本
        cost += x_sequence[-1].T @ self.Qf @ x_sequence[-1]

        return cost

    def optimize(self, x0):
        """优化控制序列"""
        # 初始控制序列
        u0 = np.zeros(self.horizon)

        # 优化
        result = minimize(lambda u: self.cost(x0, u.reshape(-1, 1)), 
                         u0, method='SLSQP',
                         bounds=[self.bounds] * self.horizon)

        return result.x.reshape(-1, 1)

# 示例
horizon = 10
Q = np.eye(2)
R = np.eye(1)
Qf = np.eye(2) * 10
bounds = (-1, 1)

mpc = ModelPredictiveControl(horizon, Q, R, Qf, bounds)

# 初始状态
x0 = np.array([5, 0])

# 优化
u_opt = mpc.optimize(x0)
print("最优控制序列:")
print(u_opt.flatten())

7. 应用示例¶

7.1 轨迹优化¶

import numpy as np
from scipy.optimize import minimize

def trajectory_optimization(start, goal, obstacles, n_points=10):
    """
    轨迹优化

    参数：
    start: 起点
    goal: 终点
    obstacles: 障碍物列表 [(x, y, radius), ...]
    n_points: 轨迹点数量

    返回：
    trajectory: 优化后的轨迹
    """
    # 初始轨迹（直线插值）
    t = np.linspace(0, 1, n_points).reshape(-1, 1)
    initial_trajectory = start + t * (goal - start)

    # 目标函数：最小化轨迹长度
    def objective(trajectory):
        trajectory = trajectory.reshape(-1, 2)
        length = 0
        for i in range(len(trajectory) - 1):
            length += np.linalg.norm(trajectory[i+1] - trajectory[i])
        return length

    # 约束：避障
    def obstacle_constraint(trajectory, obs):
        trajectory = trajectory.reshape(-1, 2)
        min_dist = float('inf')
        for point in trajectory:
            dist = np.linalg.norm(point - obs[:2]) - obs[2]
            min_dist = min(min_dist, dist)
        return min_dist

    # 约束：起点和终点
    constraints = [
        {'type': 'eq', 'fun': lambda t: t.reshape(-1, 2)[0] - start},
        {'type': 'eq', 'fun': lambda t: t.reshape(-1, 2)[-1] - goal}
    ]

    # 添加避障约束
    for obs in obstacles:
        constraints.append({
            'type': 'ineq',
            'fun': lambda t, obs=obs: obstacle_constraint(t, obs)
        })

    # 优化
    result = minimize(objective, initial_trajectory.flatten(),
                     method='SLSQP', constraints=constraints)

    return result.x.reshape(-1, 2)

# 示例
start = np.array([0, 0])
goal = np.array([5, 5])
obstacles = [(2, 2, 0.5), (3, 3, 0.5)]

trajectory = trajectory_optimization(start, goal, obstacles)
print("优化后的轨迹:")
print(trajectory)

7.2 参数标定¶

import numpy as np
from scipy.optimize import minimize

def parameter_calibration(model, data, initial_params):
    """
    参数标定

    参数：
    model: 模型函数
    data: 观测数据 [(t, y), ...]
    initial_params: 初始参数

    返回：
    params: 标定后的参数
    """
    # 目标函数：最小化预测误差
    def objective(params):
        error = 0
        for t, y_obs in data:
            y_pred = model(t, params)
            error += (y_pred - y_obs) ** 2
        return error

    # 优化
    result = minimize(objective, initial_params, method='Nelder-Mead')

    return result.x

# 示例：指数衰减模型 y = a * exp(-b * t)
def model(t, params):
    a, b = params
    return a * np.exp(-b * t)

# 观测数据
data = [(0, 1.0), (1, 0.6), (2, 0.4), (3, 0.2), (4, 0.1)]

# 初始参数
initial_params = [1.0, 0.5]

# 标定
params = parameter_calibration(model, data, initial_params)
print(f"标定后的参数: a={params[0]:.4f}, b={params[1]:.4f}")

8. 练习题¶

基础练习¶

实现梯度下降法
使用牛顿法求解优化问题
求解线性规划问题

进阶练习¶

实现 BFGS 拟牛顿法
使用内点法求解约束优化问题
实现动态规划

应用练习¶

实现轨迹优化
实现模型预测控制
实现参数标定

下一步¶

控制理论 — 学习控制方法
数值计算 — 学习数值计算方法
路径规划 — 应用优化方法进行路径规划

参考资源¶

← 返回索引 | 下一页：数值计算 →

优化理论¶

学习目标¶

1. 优化基础¶

1.1 优化问题定义¶

1.2 优化问题分类¶

1.3 最优性条件¶

2. 无约束优化¶

2.1 梯度下降法¶

2.2 牛顿法¶

2.3 拟牛顿法 (BFGS)¶

2.4 共轭梯度法¶

3. 有约束优化¶

3.1 拉格朗日乘数法¶

3.2 罚函数法¶

3.3 内点法¶

4. 线性规划¶

4.1 单纯形法¶

5. 非线性规划¶

5.1 序列二次规划 (SQP)¶

6. 最优控制¶

6.1 动态规划¶

6.2 模型预测控制 (MPC)¶

7. 应用示例¶

7.1 轨迹优化¶

7.2 参数标定¶

8. 练习题¶

基础练习¶

进阶练习¶

应用练习¶

下一步¶

参考资源¶

Robotics Course Docs

Learn

Build

Community