非确定性规划¶

非确定性规划是指在状态转移不确定的环境中进行规划，需要考虑所有可能的结果。本章将介绍非确定性规划的核心概念和算法。

学习目标¶

完成本章后，你将能够：

理解非确定性环境的表示方法
掌握 And/Or 图搜索算法
理解确定化技术
了解模型检测方法

1. 非确定性表示¶

1.1 非确定性状态转移系统¶

非确定性状态转移系统：
- 状态集合 S
- 动作集合 A
- 转移关系 τ: S × A → 2^S

与确定性系统的区别：
- 确定性：执行动作后到达唯一状态
- 非确定性：执行动作后可能到达多个状态中的一个

class NondeterministicSystem:
    """非确定性系统"""

    def __init__(self, states, actions, transition_relation):
        """
        初始化非确定性系统

        参数：
        states: 状态列表
        actions: 动作列表
        transition_relation: 转移关系 τ(s, a) -> 可能的下一个状态集合
        """
        self.states = states
        self.actions = actions
        self.transition_relation = transition_relation

    def get_possible_next_states(self, state, action):
        """获取可能的下一个状态"""
        return self.transition_relation(state, action)

    def is_safe(self, state, action, unsafe_states):
        """检查动作是否安全"""
        possible_states = self.get_possible_next_states(state, action)
        return not any(s in unsafe_states for s in possible_states)

# 示例：非确定性网格世界
class NondeterministicGridWorld:
    """非确定性网格世界"""

    def __init__(self, width, height, goal, obstacles):
        self.width = width
        self.height = height
        self.goal = goal
        self.obstacles = obstacles

    def transition_relation(self, state, action):
        """转移关系"""
        x, y = state
        possible_states = []

        # 计算期望的下一个状态
        if action == 'up':
            expected = (x, min(y + 1, self.height - 1))
        elif action == 'down':
            expected = (x, max(y - 1, 0))
        elif action == 'left':
            expected = (max(x - 1, 0), y)
        elif action == 'right':
            expected = (min(x + 1, self.width - 1), y)
        else:
            expected = state

        # 添加期望状态
        if expected not in self.obstacles:
            possible_states.append(expected)

        # 添加可能的滑动状态
        if action in ['up', 'down']:
            # 可能向左或向右滑动
            left = (max(x - 1, 0), y)
            right = (min(x + 1, self.width - 1), y)
            if left not in self.obstacles:
                possible_states.append(left)
            if right not in self.obstacles:
                possible_states.append(right)

        # 如果没有可能的状态，保持原位
        if not possible_states:
            possible_states.append(state)

        return possible_states

1.2 非确定性规划问题¶

class NondeterministicPlanningProblem:
    """非确定性规划问题"""

    def __init__(self, system, initial_state, goal_states, unsafe_states=None):
        """
        初始化规划问题

        参数：
        system: 非确定性系统
        initial_state: 初始状态
        goal_states: 目标状态集合
        unsafe_states: 不安全状态集合
        """
        self.system = system
        self.initial_state = initial_state
        self.goal_states = goal_states
        self.unsafe_states = unsafe_states or set()

    def is_goal(self, state):
        """检查是否达到目标"""
        return state in self.goal_states

    def is_safe(self, state):
        """检查状态是否安全"""
        return state not in self.unsafe_states

    def get_applicable_actions(self, state):
        """获取可应用的动作"""
        applicable = []
        for action in self.system.actions:
            possible_states = self.system.get_possible_next_states(state, action)
            # 检查是否所有可能的状态都是安全的
            if all(self.is_safe(s) for s in possible_states):
                applicable.append(action)
        return applicable

2. And/Or 图搜索¶

2.1 And/Or 图定义¶

And/Or 图：
- Or 节点：选择一个动作
- And 节点：考虑所有可能的结果

搜索过程：
1. 从初始状态开始（Or 节点）
2. 选择一个动作（Or 节点）
3. 考虑所有可能的下一个状态（And 节点）
4. 对每个可能的状态递归搜索

class AndOrNode:
    """And/Or 图节点"""

    def __init__(self, state, node_type='or', parent=None, action=None):
        """
        初始化节点

        参数：
        state: 状态
        node_type: 节点类型 ('or' 或 'and')
        parent: 父节点
        action: 到达此节点的动作
        """
        self.state = state
        self.node_type = node_type
        self.parent = parent
        self.action = action
        self.children = []
        self.solution = None

def and_or_search(problem, state, visited=None):
    """
    And/Or 图搜索

    参数：
    problem: 规划问题
    state: 当前状态
    visited: 已访问的状态集合

    返回：
    plan: 解决方案（如果存在）
    """
    if visited is None:
        visited = set()

    # 检查是否达到目标
    if problem.is_goal(state):
        return []

    # 检查是否已访问（避免循环）
    if state in visited:
        return None

    visited.add(state)

    # 获取可应用的动作
    actions = problem.get_applicable_actions(state)

    for action in actions:
        # 获取可能的下一个状态
        possible_states = problem.system.get_possible_next_states(state, action)

        # 对每个可能的状态递归搜索
        sub_plans = []
        all_succeed = True

        for next_state in possible_states:
            sub_plan = and_or_search(problem, next_state, visited.copy())

            if sub_plan is None:
                all_succeed = False
                break

            sub_plans.append((next_state, sub_plan))

        # 如果所有可能的状态都有解决方案
        if all_succeed:
            # 构建解决方案
            plan = {
                'action': action,
                'sub_plans': sub_plans
            }
            return plan

    return None

# 示例
system = NondeterministicGridWorld(3, 3, (2, 2), [(1, 1)])
problem = NondeterministicPlanningProblem(system, (0, 0), {(2, 2)})

plan = and_or_search(problem, (0, 0))
print("解决方案:", plan)

2.2 强解决方案¶

def strong_solution(problem, state, visited=None):
    """
    强解决方案：保证在所有情况下都能达到目标

    参数：
    problem: 规划问题
    state: 当前状态
    visited: 已访问的状态集合

    返回：
    plan: 强解决方案（如果存在）
    """
    if visited is None:
        visited = set()

    # 检查是否达到目标
    if problem.is_goal(state):
        return []

    # 检查是否已访问
    if state in visited:
        return None

    visited.add(state)

    # 获取可应用的动作
    actions = problem.get_applicable_actions(state)

    for action in actions:
        # 获取可能的下一个状态
        possible_states = problem.system.get_possible_next_states(state, action)

        # 对每个可能的状态递归搜索
        sub_plans = []
        all_succeed = True

        for next_state in possible_states:
            sub_plan = strong_solution(problem, next_state, visited.copy())

            if sub_plan is None:
                all_succeed = False
                break

            sub_plans.append((next_state, sub_plan))

        # 如果所有可能的状态都有强解决方案
        if all_succeed:
            plan = {
                'action': action,
                'sub_plans': sub_plans,
                'type': 'strong'
            }
            return plan

    return None

def cyclic_strong_solution(problem, state, visited=None, path=None):
    """
    循环强解决方案：允许循环但保证最终达到目标

    参数：
    problem: 规划问题
    state: 当前状态
    visited: 已访问的状态集合
    path: 当前路径

    返回：
    plan: 循环强解决方案（如果存在）
    """
    if visited is None:
        visited = set()
    if path is None:
        path = []

    # 检查是否达到目标
    if problem.is_goal(state):
        return []

    # 检查是否在当前路径中（循环）
    if state in path:
        # 允许循环，继续搜索
        pass

    path.append(state)

    # 获取可应用的动作
    actions = problem.get_applicable_actions(state)

    for action in actions:
        possible_states = problem.system.get_possible_next_states(state, action)

        sub_plans = []
        all_succeed = True

        for next_state in possible_states:
            sub_plan = cyclic_strong_solution(problem, next_state, visited.copy(), path.copy())

            if sub_plan is None:
                all_succeed = False
                break

            sub_plans.append((next_state, sub_plan))

        if all_succeed:
            plan = {
                'action': action,
                'sub_plans': sub_plans,
                'type': 'cyclic_strong'
            }
            return plan

    return None

3. 确定化技术¶

3.1 最可能确定化¶

def most_likely_determinization(problem):
    """
    最可能确定化：选择最可能的结果作为确定性转移

    参数：
    problem: 非确定性规划问题

    返回：
    deterministic_problem: 确定性规划问题
    """
    # 创建确定性转移函数
    def deterministic_transition(state, action):
        possible_states = problem.system.get_possible_next_states(state, action)

        # 假设所有可能状态等概率
        # 选择第一个状态（或根据某些启发式选择）
        return possible_states[0]

    # 创建确定性系统
    deterministic_system = DeterministicSystem(
        problem.system.states,
        problem.system.actions,
        deterministic_transition
    )

    # 创建确定性规划问题
    deterministic_problem = DeterministicPlanningProblem(
        deterministic_system,
        problem.initial_state,
        problem.goal_states
    )

    return deterministic_problem

class DeterministicSystem:
    """确定性系统"""

    def __init__(self, states, actions, transition_function):
        self.states = states
        self.actions = actions
        self.transition_function = transition_function

    def get_next_state(self, state, action):
        """获取下一个状态"""
        return self.transition_function(state, action)

class DeterministicPlanningProblem:
    """确定性规划问题"""

    def __init__(self, system, initial_state, goal_states):
        self.system = system
        self.initial_state = initial_state
        self.goal_states = goal_states

    def is_goal(self, state):
        return state in self.goal_states

    def get_applicable_actions(self, state):
        return self.system.actions

3.2 所有可能的确定化¶

def all_determinizations(problem):
    """
    所有可能的确定化：生成所有可能的确定性规划问题

    参数：
    problem: 非确定性规划问题

    返回：
    deterministic_problems: 确定性规划问题列表
    """
    deterministic_problems = []

    # 为每个动作的每个可能结果创建确定性问题
    for action in problem.system.actions:
        # 获取所有可能的转移
        all_transitions = {}

        for state in problem.system.states:
            possible_states = problem.system.get_possible_next_states(state, action)
            all_transitions[state] = possible_states

        # 生成确定性问题
        for i in range(max(len(states) for states in all_transitions.values())):
            def deterministic_transition(state, action=action, idx=i):
                possible_states = all_transitions.get(state, [state])
                return possible_states[idx % len(possible_states)]

            deterministic_system = DeterministicSystem(
                problem.system.states,
                problem.system.actions,
                deterministic_transition
            )

            deterministic_problem = DeterministicPlanningProblem(
                deterministic_system,
                problem.initial_state,
                problem.goal_states
            )

            deterministic_problems.append(deterministic_problem)

    return deterministic_problems

3.3 计划验证¶

def validate_plan(plan, problem):
    """
    验证计划是否在所有情况下都能达到目标

    参数：
    plan: 计划
    problem: 非确定性规划问题

    返回：
    is_valid: 是否有效
    """
    # 模拟所有可能的执行路径
    def simulate(state, plan_index):
        if plan_index >= len(plan):
            return problem.is_goal(state)

        action = plan[plan_index]
        possible_states = problem.system.get_possible_next_states(state, action)

        # 检查所有可能的结果
        for next_state in possible_states:
            if not simulate(next_state, plan_index + 1):
                return False

        return True

    return simulate(problem.initial_state, 0)

4. 行为树¶

4.1 行为树表示¶

class BehaviorTree:
    """行为树"""

    def __init__(self):
        self.root = None

    def set_root(self, node):
        """设置根节点"""
        self.root = node

    def execute(self, state):
        """执行行为树"""
        if self.root is None:
            return None
        return self.root.execute(state)

class BTNode:
    """行为树节点基类"""

    def __init__(self, name):
        self.name = name

    def execute(self, state):
        """执行节点"""
        raise NotImplementedError

class SequenceNode(BTNode):
    """顺序节点：依次执行子节点"""

    def __init__(self, name, children):
        super().__init__(name)
        self.children = children

    def execute(self, state):
        for child in self.children:
            result = child.execute(state)
            if result == 'failure':
                return 'failure'
            if result == 'running':
                return 'running'
        return 'success'

class SelectorNode(BTNode):
    """选择节点：选择一个子节点执行"""

    def __init__(self, name, children):
        super().__init__(name)
        self.children = children

    def execute(self, state):
        for child in self.children:
            result = child.execute(state)
            if result == 'success':
                return 'success'
            if result == 'running':
                return 'running'
        return 'failure'

class ActionNode(BTNode):
    """动作节点：执行动作"""

    def __init__(self, name, action):
        super().__init__(name)
        self.action = action

    def execute(self, state):
        # 执行动作
        return 'success'

class ConditionNode(BTNode):
    """条件节点：检查条件"""

    def __init__(self, name, condition):
        super().__init__(name)
        self.condition = condition

    def execute(self, state):
        if self.condition(state):
            return 'success'
        return 'failure'

# 示例
def create_navigation_behavior():
    """创建导航行为"""
    # 条件：是否到达目标
    at_goal = ConditionNode("At Goal", lambda s: s == (2, 2))

    # 动作：移动
    move_up = ActionNode("Move Up", 'up')
    move_down = ActionNode("Move Down", 'down')
    move_left = ActionNode("Move Left", 'left')
    move_right = ActionNode("Move Right", 'right')

    # 选择节点：选择移动方向
    move_selector = SelectorNode("Move Selector", [
        move_up, move_down, move_left, move_right
    ])

    # 顺序节点：检查目标，然后移动
    root = SequenceNode("Navigation", [
        at_goal,
        move_selector
    ])

    return root

5. 实践练习¶

练习 1：实现 And/Or 图搜索¶

def and_or_search_exercise():
    # 创建非确定性系统
    system = NondeterministicGridWorld(3, 3, (2, 2), [(1, 1)])

    # 创建规划问题
    problem = NondeterministicPlanningProblem(system, (0, 0), {(2, 2)})

    # 搜索解决方案
    plan = and_or_search(problem, (0, 0))

    if plan:
        print("找到解决方案:", plan)
    else:
        print("无解决方案")

练习 2：实现确定化技术¶

def determinization_exercise():
    # 创建非确定性系统
    system = NondeterministicGridWorld(3, 3, (2, 2), [(1, 1)])

    # 创建规划问题
    problem = NondeterministicPlanningProblem(system, (0, 0), {(2, 2)})

    # 最可能确定化
    det_problem = most_likely_determinization(problem)

    print("确定性规划问题创建成功")
    print(f"初始状态: {det_problem.initial_state}")
    print(f"目标状态: {det_problem.goal_states}")

练习 3：实现行为树¶

def behavior_tree_exercise():
    # 创建行为树
    bt = BehaviorTree()
    root = create_navigation_behavior()
    bt.set_root(root)

    # 执行行为树
    state = (0, 0)
    result = bt.execute(state)

    print(f"状态: {state}, 结果: {result}")

6. 常见问题¶

1. 计算复杂度高¶

问题：非确定性规划计算量大

解决方案： - 使用启发式搜索 - 使用确定化技术 - 使用近似方法

2. 解决方案不存在¶

问题：找不到强解决方案

解决方案： - 使用弱解决方案 - 使用循环解决方案 - 放宽约束

3. 行为树设计困难¶

问题：如何设计有效的行为树

解决方案： - 使用模块化设计 - 使用学习方法 - 使用自动合成

下一步¶

时间规划 — 学习时间约束下的规划
运动规划 — 学习机器人运动规划
规划与学习 — 学习规划与学习的结合

参考资源¶

← 返回索引 | 下一页：时间规划 →