跳转至

优化理论

优化理论是机器人学中寻找最优解的数学基础。本章将讲解机器人学所需的优化知识。

学习目标

完成本章后,你将能够:

  • 理解优化问题的基本概念
  • 掌握无约束优化方法
  • 理解有约束优化方法
  • 应用优化理论解决机器人学问题

1. 优化基础

1.1 优化问题定义

一般优化问题:
minimize    f(x)
subject to  gᵢ(x) ≤ 0, i = 1, ..., m
            hⱼ(x) = 0, j = 1, ..., p

其中:
- f(x): 目标函数 (objective function)
- gᵢ(x): 不等式约束 (inequality constraints)
- hⱼ(x): 等式约束 (equality constraints)
- x: 决策变量 (decision variables)

1.2 优化问题分类

"""
优化问题分类:

1. 无约束优化 vs 有约束优化
2. 线性优化 vs 非线性优化
3. 凸优化 vs 非凸优化
4. 连续优化 vs 离散优化
5. 单目标优化 vs 多目标优化
"""

1.3 最优性条件

import numpy as np

def gradient(f, x, h=1e-5):
    """数值梯度"""
    n = len(x)
    grad = np.zeros(n)

    for i in range(n):
        x_plus = x.copy()
        x_minus = x.copy()
        x_plus[i] += h
        x_minus[i] -= h

        grad[i] = (f(x_plus) - f(x_minus)) / (2 * h)

    return grad

def hessian(f, x, h=1e-5):
    """数值海森矩阵"""
    n = len(x)
    H = np.zeros((n, n))

    for i in range(n):
        for j in range(n):
            x_pp = x.copy()
            x_pm = x.copy()
            x_mp = x.copy()
            x_mm = x.copy()

            x_pp[i] += h
            x_pp[j] += h

            x_pm[i] += h
            x_pm[j] -= h

            x_mp[i] -= h
            x_mp[j] += h

            x_mm[i] -= h
            x_mm[j] -= h

            H[i, j] = (f(x_pp) - f(x_pm) - f(x_mp) + f(x_mm)) / (4 * h**2)

    return H

# 一阶最优性条件:梯度为零
def check_first_order(f, x, tol=1e-6):
    """检查一阶最优性条件"""
    grad = gradient(f, x)
    return np.linalg.norm(grad) < tol

# 二阶最优性条件:海森矩阵正定
def check_second_order(f, x, tol=1e-6):
    """检查二阶最优性条件"""
    H = hessian(f, x)
    eigenvalues = np.linalg.eigvalsh(H)
    return np.all(eigenvalues > -tol)

# 示例
def f(x):
    return x[0]**2 + x[1]**2

x = np.array([0.0, 0.0])
print("一阶最优性条件:", check_first_order(f, x))
print("二阶最优性条件:", check_second_order(f, x))

2. 无约束优化

2.1 梯度下降法

import numpy as np

def gradient_descent(f, x0, learning_rate=0.01, max_iter=1000, tol=1e-6):
    """
    梯度下降法

    参数:
    f: 目标函数
    x0: 初始点
    learning_rate: 学习率
    max_iter: 最大迭代次数
    tol: 收敛阈值

    返回:
    x: 最优解
    history: 迭代历史
    """
    x = x0.copy()
    history = [x.copy()]

    for i in range(max_iter):
        # 计算梯度
        grad = gradient(f, x)

        # 更新
        x_new = x - learning_rate * grad

        # 记录历史
        history.append(x_new.copy())

        # 检查收敛
        if np.linalg.norm(x_new - x) < tol:
            print(f"收敛于第 {i+1} 次迭代")
            break

        x = x_new

    return x, history

# 示例:最小化 f(x, y) = x² + y²
def f(x):
    return x[0]**2 + x[1]**2

x0 = np.array([2.0, 3.0])
x_opt, history = gradient_descent(f, x0, learning_rate=0.1)

print(f"最优解: {x_opt}")
print(f"最优值: {f(x_opt):.6f}")

2.2 牛顿法

import numpy as np

def newton_method(f, x0, max_iter=100, tol=1e-6):
    """
    牛顿法

    参数:
    f: 目标函数
    x0: 初始点
    max_iter: 最大迭代次数
    tol: 收敛阈值

    返回:
    x: 最优解
    history: 迭代历史
    """
    x = x0.copy()
    history = [x.copy()]

    for i in range(max_iter):
        # 计算梯度和海森矩阵
        grad = gradient(f, x)
        H = hessian(f, x)

        # 计算牛顿方向
        try:
            d = -np.linalg.solve(H, grad)
        except np.linalg.LinAlgError:
            # 海森矩阵奇异,使用梯度下降
            d = -grad

        # 线搜索(简化版)
        alpha = 1.0

        # 更新
        x_new = x + alpha * d

        # 记录历史
        history.append(x_new.copy())

        # 检查收敛
        if np.linalg.norm(x_new - x) < tol:
            print(f"收敛于第 {i+1} 次迭代")
            break

        x = x_new

    return x, history

# 示例
def f(x):
    return x[0]**2 + x[1]**2

x0 = np.array([2.0, 3.0])
x_opt, history = newton_method(f, x0)

print(f"最优解: {x_opt}")
print(f"最优值: {f(x_opt):.6f}")

2.3 拟牛顿法 (BFGS)

import numpy as np

def bfgs(f, x0, max_iter=100, tol=1e-6):
    """
    BFGS 拟牛顿法

    参数:
    f: 目标函数
    x0: 初始点
    max_iter: 最大迭代次数
    tol: 收敛阈值

    返回:
    x: 最优解
    history: 迭代历史
    """
    n = len(x0)
    x = x0.copy()
    H = np.eye(n)  # 初始近似海森矩阵
    history = [x.copy()]

    grad = gradient(f, x)

    for i in range(max_iter):
        # 计算搜索方向
        d = -H @ grad

        # 线搜索(简化版)
        alpha = 1.0

        # 更新
        x_new = x + alpha * d
        grad_new = gradient(f, x_new)

        # 计算 s 和 y
        s = x_new - x
        y = grad_new - grad

        # BFGS 更新
        rho = 1.0 / (y @ s)
        I = np.eye(n)
        H = (I - rho * np.outer(s, y)) @ H @ (I - rho * np.outer(y, s)) + rho * np.outer(s, s)

        # 记录历史
        history.append(x_new.copy())

        # 检查收敛
        if np.linalg.norm(x_new - x) < tol:
            print(f"收敛于第 {i+1} 次迭代")
            break

        x = x_new
        grad = grad_new

    return x, history

# 示例
def f(x):
    return x[0]**2 + x[1]**2

x0 = np.array([2.0, 3.0])
x_opt, history = bfgs(f, x0)

print(f"最优解: {x_opt}")
print(f"最优值: {f(x_opt):.6f}")

2.4 共轭梯度法

import numpy as np

def conjugate_gradient(f, x0, max_iter=100, tol=1e-6):
    """
    共轭梯度法

    参数:
    f: 目标函数
    x0: 初始点
    max_iter: 最大迭代次数
    tol: 收敛阈值

    返回:
    x: 最优解
    history: 迭代历史
    """
    x = x0.copy()
    grad = gradient(f, x)
    d = -grad  # 初始搜索方向
    history = [x.copy()]

    for i in range(max_iter):
        # 线搜索(简化版)
        alpha = 1.0

        # 更新
        x_new = x + alpha * d
        grad_new = gradient(f, x_new)

        # 计算 beta (Fletcher-Reeves)
        beta = np.dot(grad_new, grad_new) / np.dot(grad, grad)

        # 更新搜索方向
        d = -grad_new + beta * d

        # 记录历史
        history.append(x_new.copy())

        # 检查收敛
        if np.linalg.norm(x_new - x) < tol:
            print(f"收敛于第 {i+1} 次迭代")
            break

        x = x_new
        grad = grad_new

    return x, history

# 示例
def f(x):
    return x[0]**2 + x[1]**2

x0 = np.array([2.0, 3.0])
x_opt, history = conjugate_gradient(f, x0)

print(f"最优解: {x_opt}")
print(f"最优值: {f(x_opt):.6f}")

3. 有约束优化

3.1 拉格朗日乘数法

import numpy as np
from scipy.optimize import minimize

def lagrangian_method(f, g, x0, method='SLSQP'):
    """
    拉格朗日乘数法

    参数:
    f: 目标函数
    g: 约束函数列表 (等式约束)
    x0: 初始点
    method: 优化方法

    返回:
    result: 优化结果
    """
    # 定义约束
    constraints = [{'type': 'eq', 'fun': gi} for gi in g]

    # 优化
    result = minimize(f, x0, method=method, constraints=constraints)

    return result

# 示例:最小化 f(x,y) = x² + y²,约束 x + y = 1
def f(x):
    return x[0]**2 + x[1]**2

def g(x):
    return x[0] + x[1] - 1

x0 = np.array([0.5, 0.5])
result = lagrangian_method(f, [g], x0)

print(f"最优解: {result.x}")
print(f"最优值: {result.fun:.6f}")
print(f"约束满足: {g(result.x):.6f}")

3.2 罚函数法

import numpy as np

def penalty_method(f, g, x0, penalty_weight=1.0, max_iter=100, tol=1e-6):
    """
    罚函数法

    参数:
    f: 目标函数
    g: 约束函数列表
    x0: 初始点
    penalty_weight: 罚函数权重
    max_iter: 最大迭代次数
    tol: 收敛阈值

    返回:
    x: 最优解
    history: 迭代历史
    """
    x = x0.copy()
    history = [x.copy()]

    for i in range(max_iter):
        # 定义罚函数
        def penalty_function(x):
            penalty = 0
            for gi in g:
                penalty += max(0, gi(x)) ** 2
            return f(x) + penalty_weight * penalty

        # 优化罚函数
        result = minimize(penalty_function, x, method='BFGS')
        x_new = result.x

        # 记录历史
        history.append(x_new.copy())

        # 检查收敛
        if np.linalg.norm(x_new - x) < tol:
            print(f"收敛于第 {i+1} 次迭代")
            break

        # 增加罚函数权重
        penalty_weight *= 2

        x = x_new

    return x, history

# 示例
def f(x):
    return x[0]**2 + x[1]**2

def g(x):
    return x[0] + x[1] - 1  # 约束: x + y >= 1

x0 = np.array([0.0, 0.0])
x_opt, history = penalty_method(f, [g], x0)

print(f"最优解: {x_opt}")
print(f"最优值: {f(x_opt):.6f}")
print(f"约束满足: {g(x_opt):.6f}")

3.3 内点法

import numpy as np
from scipy.optimize import minimize

def interior_point_method(f, g_ineq, g_eq, x0, barrier_weight=1.0, 
                          max_iter=100, tol=1e-6):
    """
    内点法

    参数:
    f: 目标函数
    g_ineq: 不等式约束函数列表
    g_eq: 等式约束函数列表
    x0: 初始点
    barrier_weight: 障碍函数权重
    max_iter: 最大迭代次数
    tol: 收敛阈值

    返回:
    x: 最优解
    history: 迭代历史
    """
    x = x0.copy()
    history = [x.copy()]

    for i in range(max_iter):
        # 定义障碍函数
        def barrier_function(x):
            barrier = 0
            for gi in g_ineq:
                if gi(x) >= 0:
                    return float('inf')  # 不可行
                barrier -= np.log(-gi(x))
            return f(x) + barrier_weight * barrier

        # 优化障碍函数
        constraints = [{'type': 'eq', 'fun': gi} for gi in g_eq]
        result = minimize(barrier_function, x, method='SLSQP', 
                         constraints=constraints)
        x_new = result.x

        # 记录历史
        history.append(x_new.copy())

        # 检查收敛
        if np.linalg.norm(x_new - x) < tol:
            print(f"收敛于第 {i+1} 次迭代")
            break

        # 减少障碍函数权重
        barrier_weight *= 0.5

        x = x_new

    return x, history

# 示例
def f(x):
    return x[0]**2 + x[1]**2

def g_ineq(x):
    return x[0] + x[1] - 1  # 约束: x + y <= 1

def g_eq(x):
    return 0  # 无等式约束

x0 = np.array([0.3, 0.3])
x_opt, history = interior_point_method(f, [g_ineq], [g_eq], x0)

print(f"最优解: {x_opt}")
print(f"最优值: {f(x_opt):.6f}")
print(f"约束满足: {g_ineq(x_opt):.6f}")

4. 线性规划

4.1 单纯形法

import numpy as np
from scipy.optimize import linprog

def linear_programming(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None, 
                       bounds=None):
    """
    线性规划

    参数:
    c: 目标函数系数
    A_ub: 不等式约束矩阵
    b_ub: 不等式约束向量
    A_eq: 等式约束矩阵
    b_eq: 等式约束向量
    bounds: 变量边界

    返回:
    result: 优化结果
    """
    result = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, 
                     bounds=bounds, method='highs')

    return result

# 示例:最小化 f(x,y) = -x - 2y,约束 x + y <= 4, x + 3y <= 6
c = [-1, -2]  # 最小化 -x - 2y
A_ub = [[1, 1], [1, 3]]
b_ub = [4, 6]
bounds = [(0, None), (0, None)]

result = linear_programming(c, A_ub=A_ub, b_ub=b_ub, bounds=bounds)

print(f"最优解: {result.x}")
print(f"最优值: {-result.fun}")  # 注意符号

5. 非线性规划

5.1 序列二次规划 (SQP)

import numpy as np
from scipy.optimize import minimize

def sequential_quadratic_programming(f, g_ineq, g_eq, x0, 
                                      max_iter=100, tol=1e-6):
    """
    序列二次规划

    参数:
    f: 目标函数
    g_ineq: 不等式约束函数列表
    g_eq: 等式约束函数列表
    x0: 初始点
    max_iter: 最大迭代次数
    tol: 收敛阈值

    返回:
    x: 最优解
    history: 迭代历史
    """
    # 使用 scipy 的 SLSQP 方法
    constraints = []
    for gi in g_ineq:
        constraints.append({'type': 'ineq', 'fun': gi})
    for gi in g_eq:
        constraints.append({'type': 'eq', 'fun': gi})

    result = minimize(f, x0, method='SLSQP', constraints=constraints,
                     options={'maxiter': max_iter, 'ftol': tol})

    return result

# 示例
def f(x):
    return x[0]**2 + x[1]**2

def g_ineq(x):
    return x[0] + x[1] - 1  # x + y >= 1

def g_eq(x):
    return 0  # 无等式约束

x0 = np.array([0.5, 0.5])
result = sequential_quadratic_programming(f, [g_ineq], [g_eq], x0)

print(f"最优解: {result.x}")
print(f"最优值: {result.fun:.6f}")

6. 最优控制

6.1 动态规划

import numpy as np

def dynamic_programming(states, actions, transition, cost, terminal_cost, 
                        horizon, discount=1.0):
    """
    动态规划

    参数:
    states: 状态空间
    actions: 动作空间
    transition: 状态转移函数
    cost: 阶段成本函数
    terminal_cost: 终端成本函数
    horizon: 时间范围
    discount: 折扣因子

    返回:
    V: 值函数
    policy: 最优策略
    """
    n_states = len(states)
    n_actions = len(actions)

    # 初始化值函数
    V = np.zeros((horizon + 1, n_states))
    policy = np.zeros((horizon, n_states), dtype=int)

    # 终端成本
    for s in range(n_states):
        V[horizon, s] = terminal_cost(states[s])

    # 反向归纳
    for t in range(horizon - 1, -1, -1):
        for s in range(n_states):
            # 计算每个动作的 Q 值
            Q = np.zeros(n_actions)
            for a in range(n_actions):
                # 状态转移
                s_next = transition(states[s], actions[a])
                s_next_idx = np.argmin(np.abs(states - s_next))

                # Q 值
                Q[a] = cost(states[s], actions[a]) + discount * V[t + 1, s_next_idx]

            # 最优值和策略
            V[t, s] = np.min(Q)
            policy[t, s] = np.argmin(Q)

    return V, policy

# 示例:一维导航
states = np.array([0, 1, 2, 3, 4])
actions = np.array([-1, 0, 1])

def transition(s, a):
    return max(0, min(4, s + a))

def cost(s, a):
    return (s - 2)**2 + a**2

def terminal_cost(s):
    return (s - 2)**2

V, policy = dynamic_programming(states, actions, transition, cost, 
                                terminal_cost, horizon=5)

print("值函数:")
print(V)
print("\n最优策略:")
print(policy)

6.2 模型预测控制 (MPC)

import numpy as np
from scipy.optimize import minimize

class ModelPredictiveControl:
    """模型预测控制"""

    def __init__(self, horizon, Q, R, Qf, bounds):
        """
        horizon: 预测范围
        Q: 状态权重矩阵
        R: 控制权重矩阵
        Qf: 终端权重矩阵
        bounds: 控制边界
        """
        self.horizon = horizon
        self.Q = Q
        self.R = R
        self.Qf = Qf
        self.bounds = bounds

    def predict(self, x0, u_sequence):
        """预测状态序列"""
        x = x0.copy()
        x_sequence = [x.copy()]

        for u in u_sequence:
            x = self.dynamics(x, u)
            x_sequence.append(x.copy())

        return np.array(x_sequence)

    def dynamics(self, x, u):
        """系统动力学"""
        # 简单的线性动力学
        A = np.array([[1, 1], [0, 1]])
        B = np.array([[0], [1]])
        return A @ x + B @ u

    def cost(self, x0, u_sequence):
        """计算成本"""
        x_sequence = self.predict(x0, u_sequence)

        cost = 0
        for t in range(self.horizon):
            x = x_sequence[t]
            u = u_sequence[t]
            cost += x.T @ self.Q @ x + u.T @ self.R @ u

        # 终端成本
        cost += x_sequence[-1].T @ self.Qf @ x_sequence[-1]

        return cost

    def optimize(self, x0):
        """优化控制序列"""
        # 初始控制序列
        u0 = np.zeros(self.horizon)

        # 优化
        result = minimize(lambda u: self.cost(x0, u.reshape(-1, 1)), 
                         u0, method='SLSQP',
                         bounds=[self.bounds] * self.horizon)

        return result.x.reshape(-1, 1)

# 示例
horizon = 10
Q = np.eye(2)
R = np.eye(1)
Qf = np.eye(2) * 10
bounds = (-1, 1)

mpc = ModelPredictiveControl(horizon, Q, R, Qf, bounds)

# 初始状态
x0 = np.array([5, 0])

# 优化
u_opt = mpc.optimize(x0)
print("最优控制序列:")
print(u_opt.flatten())

7. 应用示例

7.1 轨迹优化

import numpy as np
from scipy.optimize import minimize

def trajectory_optimization(start, goal, obstacles, n_points=10):
    """
    轨迹优化

    参数:
    start: 起点
    goal: 终点
    obstacles: 障碍物列表 [(x, y, radius), ...]
    n_points: 轨迹点数量

    返回:
    trajectory: 优化后的轨迹
    """
    # 初始轨迹(直线插值)
    t = np.linspace(0, 1, n_points).reshape(-1, 1)
    initial_trajectory = start + t * (goal - start)

    # 目标函数:最小化轨迹长度
    def objective(trajectory):
        trajectory = trajectory.reshape(-1, 2)
        length = 0
        for i in range(len(trajectory) - 1):
            length += np.linalg.norm(trajectory[i+1] - trajectory[i])
        return length

    # 约束:避障
    def obstacle_constraint(trajectory, obs):
        trajectory = trajectory.reshape(-1, 2)
        min_dist = float('inf')
        for point in trajectory:
            dist = np.linalg.norm(point - obs[:2]) - obs[2]
            min_dist = min(min_dist, dist)
        return min_dist

    # 约束:起点和终点
    constraints = [
        {'type': 'eq', 'fun': lambda t: t.reshape(-1, 2)[0] - start},
        {'type': 'eq', 'fun': lambda t: t.reshape(-1, 2)[-1] - goal}
    ]

    # 添加避障约束
    for obs in obstacles:
        constraints.append({
            'type': 'ineq',
            'fun': lambda t, obs=obs: obstacle_constraint(t, obs)
        })

    # 优化
    result = minimize(objective, initial_trajectory.flatten(),
                     method='SLSQP', constraints=constraints)

    return result.x.reshape(-1, 2)

# 示例
start = np.array([0, 0])
goal = np.array([5, 5])
obstacles = [(2, 2, 0.5), (3, 3, 0.5)]

trajectory = trajectory_optimization(start, goal, obstacles)
print("优化后的轨迹:")
print(trajectory)

7.2 参数标定

import numpy as np
from scipy.optimize import minimize

def parameter_calibration(model, data, initial_params):
    """
    参数标定

    参数:
    model: 模型函数
    data: 观测数据 [(t, y), ...]
    initial_params: 初始参数

    返回:
    params: 标定后的参数
    """
    # 目标函数:最小化预测误差
    def objective(params):
        error = 0
        for t, y_obs in data:
            y_pred = model(t, params)
            error += (y_pred - y_obs) ** 2
        return error

    # 优化
    result = minimize(objective, initial_params, method='Nelder-Mead')

    return result.x

# 示例:指数衰减模型 y = a * exp(-b * t)
def model(t, params):
    a, b = params
    return a * np.exp(-b * t)

# 观测数据
data = [(0, 1.0), (1, 0.6), (2, 0.4), (3, 0.2), (4, 0.1)]

# 初始参数
initial_params = [1.0, 0.5]

# 标定
params = parameter_calibration(model, data, initial_params)
print(f"标定后的参数: a={params[0]:.4f}, b={params[1]:.4f}")

8. 练习题

基础练习

  1. 实现梯度下降法
  2. 使用牛顿法求解优化问题
  3. 求解线性规划问题

进阶练习

  1. 实现 BFGS 拟牛顿法
  2. 使用内点法求解约束优化问题
  3. 实现动态规划

应用练习

  1. 实现轨迹优化
  2. 实现模型预测控制
  3. 实现参数标定

下一步

参考资源


← 返回索引 | 下一页:数值计算 →