非确定性规划¶
非确定性规划是指在状态转移不确定的环境中进行规划,需要考虑所有可能的结果。本章将介绍非确定性规划的核心概念和算法。
学习目标¶
完成本章后,你将能够:
- 理解非确定性环境的表示方法
- 掌握 And/Or 图搜索算法
- 理解确定化技术
- 了解模型检测方法
1. 非确定性表示¶
1.1 非确定性状态转移系统¶
非确定性状态转移系统:
- 状态集合 S
- 动作集合 A
- 转移关系 τ: S × A → 2^S
与确定性系统的区别:
- 确定性:执行动作后到达唯一状态
- 非确定性:执行动作后可能到达多个状态中的一个
class NondeterministicSystem:
"""非确定性系统"""
def __init__(self, states, actions, transition_relation):
"""
初始化非确定性系统
参数:
states: 状态列表
actions: 动作列表
transition_relation: 转移关系 τ(s, a) -> 可能的下一个状态集合
"""
self.states = states
self.actions = actions
self.transition_relation = transition_relation
def get_possible_next_states(self, state, action):
"""获取可能的下一个状态"""
return self.transition_relation(state, action)
def is_safe(self, state, action, unsafe_states):
"""检查动作是否安全"""
possible_states = self.get_possible_next_states(state, action)
return not any(s in unsafe_states for s in possible_states)
# 示例:非确定性网格世界
class NondeterministicGridWorld:
"""非确定性网格世界"""
def __init__(self, width, height, goal, obstacles):
self.width = width
self.height = height
self.goal = goal
self.obstacles = obstacles
def transition_relation(self, state, action):
"""转移关系"""
x, y = state
possible_states = []
# 计算期望的下一个状态
if action == 'up':
expected = (x, min(y + 1, self.height - 1))
elif action == 'down':
expected = (x, max(y - 1, 0))
elif action == 'left':
expected = (max(x - 1, 0), y)
elif action == 'right':
expected = (min(x + 1, self.width - 1), y)
else:
expected = state
# 添加期望状态
if expected not in self.obstacles:
possible_states.append(expected)
# 添加可能的滑动状态
if action in ['up', 'down']:
# 可能向左或向右滑动
left = (max(x - 1, 0), y)
right = (min(x + 1, self.width - 1), y)
if left not in self.obstacles:
possible_states.append(left)
if right not in self.obstacles:
possible_states.append(right)
# 如果没有可能的状态,保持原位
if not possible_states:
possible_states.append(state)
return possible_states
1.2 非确定性规划问题¶
class NondeterministicPlanningProblem:
"""非确定性规划问题"""
def __init__(self, system, initial_state, goal_states, unsafe_states=None):
"""
初始化规划问题
参数:
system: 非确定性系统
initial_state: 初始状态
goal_states: 目标状态集合
unsafe_states: 不安全状态集合
"""
self.system = system
self.initial_state = initial_state
self.goal_states = goal_states
self.unsafe_states = unsafe_states or set()
def is_goal(self, state):
"""检查是否达到目标"""
return state in self.goal_states
def is_safe(self, state):
"""检查状态是否安全"""
return state not in self.unsafe_states
def get_applicable_actions(self, state):
"""获取可应用的动作"""
applicable = []
for action in self.system.actions:
possible_states = self.system.get_possible_next_states(state, action)
# 检查是否所有可能的状态都是安全的
if all(self.is_safe(s) for s in possible_states):
applicable.append(action)
return applicable
2. And/Or 图搜索¶
2.1 And/Or 图定义¶
And/Or 图:
- Or 节点:选择一个动作
- And 节点:考虑所有可能的结果
搜索过程:
1. 从初始状态开始(Or 节点)
2. 选择一个动作(Or 节点)
3. 考虑所有可能的下一个状态(And 节点)
4. 对每个可能的状态递归搜索
class AndOrNode:
"""And/Or 图节点"""
def __init__(self, state, node_type='or', parent=None, action=None):
"""
初始化节点
参数:
state: 状态
node_type: 节点类型 ('or' 或 'and')
parent: 父节点
action: 到达此节点的动作
"""
self.state = state
self.node_type = node_type
self.parent = parent
self.action = action
self.children = []
self.solution = None
def and_or_search(problem, state, visited=None):
"""
And/Or 图搜索
参数:
problem: 规划问题
state: 当前状态
visited: 已访问的状态集合
返回:
plan: 解决方案(如果存在)
"""
if visited is None:
visited = set()
# 检查是否达到目标
if problem.is_goal(state):
return []
# 检查是否已访问(避免循环)
if state in visited:
return None
visited.add(state)
# 获取可应用的动作
actions = problem.get_applicable_actions(state)
for action in actions:
# 获取可能的下一个状态
possible_states = problem.system.get_possible_next_states(state, action)
# 对每个可能的状态递归搜索
sub_plans = []
all_succeed = True
for next_state in possible_states:
sub_plan = and_or_search(problem, next_state, visited.copy())
if sub_plan is None:
all_succeed = False
break
sub_plans.append((next_state, sub_plan))
# 如果所有可能的状态都有解决方案
if all_succeed:
# 构建解决方案
plan = {
'action': action,
'sub_plans': sub_plans
}
return plan
return None
# 示例
system = NondeterministicGridWorld(3, 3, (2, 2), [(1, 1)])
problem = NondeterministicPlanningProblem(system, (0, 0), {(2, 2)})
plan = and_or_search(problem, (0, 0))
print("解决方案:", plan)
2.2 强解决方案¶
def strong_solution(problem, state, visited=None):
"""
强解决方案:保证在所有情况下都能达到目标
参数:
problem: 规划问题
state: 当前状态
visited: 已访问的状态集合
返回:
plan: 强解决方案(如果存在)
"""
if visited is None:
visited = set()
# 检查是否达到目标
if problem.is_goal(state):
return []
# 检查是否已访问
if state in visited:
return None
visited.add(state)
# 获取可应用的动作
actions = problem.get_applicable_actions(state)
for action in actions:
# 获取可能的下一个状态
possible_states = problem.system.get_possible_next_states(state, action)
# 对每个可能的状态递归搜索
sub_plans = []
all_succeed = True
for next_state in possible_states:
sub_plan = strong_solution(problem, next_state, visited.copy())
if sub_plan is None:
all_succeed = False
break
sub_plans.append((next_state, sub_plan))
# 如果所有可能的状态都有强解决方案
if all_succeed:
plan = {
'action': action,
'sub_plans': sub_plans,
'type': 'strong'
}
return plan
return None
def cyclic_strong_solution(problem, state, visited=None, path=None):
"""
循环强解决方案:允许循环但保证最终达到目标
参数:
problem: 规划问题
state: 当前状态
visited: 已访问的状态集合
path: 当前路径
返回:
plan: 循环强解决方案(如果存在)
"""
if visited is None:
visited = set()
if path is None:
path = []
# 检查是否达到目标
if problem.is_goal(state):
return []
# 检查是否在当前路径中(循环)
if state in path:
# 允许循环,继续搜索
pass
path.append(state)
# 获取可应用的动作
actions = problem.get_applicable_actions(state)
for action in actions:
possible_states = problem.system.get_possible_next_states(state, action)
sub_plans = []
all_succeed = True
for next_state in possible_states:
sub_plan = cyclic_strong_solution(problem, next_state, visited.copy(), path.copy())
if sub_plan is None:
all_succeed = False
break
sub_plans.append((next_state, sub_plan))
if all_succeed:
plan = {
'action': action,
'sub_plans': sub_plans,
'type': 'cyclic_strong'
}
return plan
return None
3. 确定化技术¶
3.1 最可能确定化¶
def most_likely_determinization(problem):
"""
最可能确定化:选择最可能的结果作为确定性转移
参数:
problem: 非确定性规划问题
返回:
deterministic_problem: 确定性规划问题
"""
# 创建确定性转移函数
def deterministic_transition(state, action):
possible_states = problem.system.get_possible_next_states(state, action)
# 假设所有可能状态等概率
# 选择第一个状态(或根据某些启发式选择)
return possible_states[0]
# 创建确定性系统
deterministic_system = DeterministicSystem(
problem.system.states,
problem.system.actions,
deterministic_transition
)
# 创建确定性规划问题
deterministic_problem = DeterministicPlanningProblem(
deterministic_system,
problem.initial_state,
problem.goal_states
)
return deterministic_problem
class DeterministicSystem:
"""确定性系统"""
def __init__(self, states, actions, transition_function):
self.states = states
self.actions = actions
self.transition_function = transition_function
def get_next_state(self, state, action):
"""获取下一个状态"""
return self.transition_function(state, action)
class DeterministicPlanningProblem:
"""确定性规划问题"""
def __init__(self, system, initial_state, goal_states):
self.system = system
self.initial_state = initial_state
self.goal_states = goal_states
def is_goal(self, state):
return state in self.goal_states
def get_applicable_actions(self, state):
return self.system.actions
3.2 所有可能的确定化¶
def all_determinizations(problem):
"""
所有可能的确定化:生成所有可能的确定性规划问题
参数:
problem: 非确定性规划问题
返回:
deterministic_problems: 确定性规划问题列表
"""
deterministic_problems = []
# 为每个动作的每个可能结果创建确定性问题
for action in problem.system.actions:
# 获取所有可能的转移
all_transitions = {}
for state in problem.system.states:
possible_states = problem.system.get_possible_next_states(state, action)
all_transitions[state] = possible_states
# 生成确定性问题
for i in range(max(len(states) for states in all_transitions.values())):
def deterministic_transition(state, action=action, idx=i):
possible_states = all_transitions.get(state, [state])
return possible_states[idx % len(possible_states)]
deterministic_system = DeterministicSystem(
problem.system.states,
problem.system.actions,
deterministic_transition
)
deterministic_problem = DeterministicPlanningProblem(
deterministic_system,
problem.initial_state,
problem.goal_states
)
deterministic_problems.append(deterministic_problem)
return deterministic_problems
3.3 计划验证¶
def validate_plan(plan, problem):
"""
验证计划是否在所有情况下都能达到目标
参数:
plan: 计划
problem: 非确定性规划问题
返回:
is_valid: 是否有效
"""
# 模拟所有可能的执行路径
def simulate(state, plan_index):
if plan_index >= len(plan):
return problem.is_goal(state)
action = plan[plan_index]
possible_states = problem.system.get_possible_next_states(state, action)
# 检查所有可能的结果
for next_state in possible_states:
if not simulate(next_state, plan_index + 1):
return False
return True
return simulate(problem.initial_state, 0)
4. 行为树¶
4.1 行为树表示¶
class BehaviorTree:
"""行为树"""
def __init__(self):
self.root = None
def set_root(self, node):
"""设置根节点"""
self.root = node
def execute(self, state):
"""执行行为树"""
if self.root is None:
return None
return self.root.execute(state)
class BTNode:
"""行为树节点基类"""
def __init__(self, name):
self.name = name
def execute(self, state):
"""执行节点"""
raise NotImplementedError
class SequenceNode(BTNode):
"""顺序节点:依次执行子节点"""
def __init__(self, name, children):
super().__init__(name)
self.children = children
def execute(self, state):
for child in self.children:
result = child.execute(state)
if result == 'failure':
return 'failure'
if result == 'running':
return 'running'
return 'success'
class SelectorNode(BTNode):
"""选择节点:选择一个子节点执行"""
def __init__(self, name, children):
super().__init__(name)
self.children = children
def execute(self, state):
for child in self.children:
result = child.execute(state)
if result == 'success':
return 'success'
if result == 'running':
return 'running'
return 'failure'
class ActionNode(BTNode):
"""动作节点:执行动作"""
def __init__(self, name, action):
super().__init__(name)
self.action = action
def execute(self, state):
# 执行动作
return 'success'
class ConditionNode(BTNode):
"""条件节点:检查条件"""
def __init__(self, name, condition):
super().__init__(name)
self.condition = condition
def execute(self, state):
if self.condition(state):
return 'success'
return 'failure'
# 示例
def create_navigation_behavior():
"""创建导航行为"""
# 条件:是否到达目标
at_goal = ConditionNode("At Goal", lambda s: s == (2, 2))
# 动作:移动
move_up = ActionNode("Move Up", 'up')
move_down = ActionNode("Move Down", 'down')
move_left = ActionNode("Move Left", 'left')
move_right = ActionNode("Move Right", 'right')
# 选择节点:选择移动方向
move_selector = SelectorNode("Move Selector", [
move_up, move_down, move_left, move_right
])
# 顺序节点:检查目标,然后移动
root = SequenceNode("Navigation", [
at_goal,
move_selector
])
return root
5. 实践练习¶
练习 1:实现 And/Or 图搜索¶
def and_or_search_exercise():
# 创建非确定性系统
system = NondeterministicGridWorld(3, 3, (2, 2), [(1, 1)])
# 创建规划问题
problem = NondeterministicPlanningProblem(system, (0, 0), {(2, 2)})
# 搜索解决方案
plan = and_or_search(problem, (0, 0))
if plan:
print("找到解决方案:", plan)
else:
print("无解决方案")
练习 2:实现确定化技术¶
def determinization_exercise():
# 创建非确定性系统
system = NondeterministicGridWorld(3, 3, (2, 2), [(1, 1)])
# 创建规划问题
problem = NondeterministicPlanningProblem(system, (0, 0), {(2, 2)})
# 最可能确定化
det_problem = most_likely_determinization(problem)
print("确定性规划问题创建成功")
print(f"初始状态: {det_problem.initial_state}")
print(f"目标状态: {det_problem.goal_states}")
练习 3:实现行为树¶
def behavior_tree_exercise():
# 创建行为树
bt = BehaviorTree()
root = create_navigation_behavior()
bt.set_root(root)
# 执行行为树
state = (0, 0)
result = bt.execute(state)
print(f"状态: {state}, 结果: {result}")
6. 常见问题¶
1. 计算复杂度高¶
问题:非确定性规划计算量大
解决方案: - 使用启发式搜索 - 使用确定化技术 - 使用近似方法
2. 解决方案不存在¶
问题:找不到强解决方案
解决方案: - 使用弱解决方案 - 使用循环解决方案 - 放宽约束
3. 行为树设计困难¶
问题:如何设计有效的行为树
解决方案: - 使用模块化设计 - 使用学习方法 - 使用自动合成
下一步¶
参考资源¶
- Acting, Planning, and Learning - Chapter 11-13
- Planning Algorithms - Chapter 7
- Behavior Trees in Robotics and AI