1. 依赖安装
pip install ai2thor
pip install numpy pillow opencv-python

2. 验证安装
from ai2thor.controller import Controller
controller = Controller(scene="FloorPlan1")
controller.step(action="MoveAhead")
print("Success!")
3.手动键盘控制
from ai2thor.controller import Controller
import keyboard
controller = Controller(
scene="FloorPlan1",
gridSize=0.25,
rotateStepDegrees=45
)
print("WASD控制移动,QE旋转,F抓取,ESC退出")
while True:
event = controller.step(action="Pass")
if keyboard.is_pressed('esc'):
break
elif keyboard.is_pressed('w'):
event = controller.step(action="MoveAhead")
elif keyboard.is_pressed('s'):
event = controller.step(action="MoveBack")
elif keyboard.is_pressed('a'):
event = controller.step(action="MoveLeft")
elif keyboard.is_pressed('d'):
event = controller.step(action="MoveRight")
elif keyboard.is_pressed('q'):
event = controller.step(action="RotateLeft")
elif keyboard.is_pressed('e'):
event = controller.step(action="RotateRight")
elif keyboard.is_pressed('f'):
objects = [obj for obj in event.metadata["objects"] if obj["visible"]]
if objects:
event = controller.step(
action="PickupObject",
objectId=objects[0]["objectId"],
forceAction=True
)
controller.stop()
4. 定位目标物体(以微波炉为例)
from ai2thor.controller import Controller
controller = Controller(
scene="FloorPlan1",
visibilityDistance=1.5,
renderInstanceSegmentation=True
)
microwaves = [obj for obj in controller.last_event.metadata["objects"]
if obj["objectType"] == "Microwave"]
if microwaves:
target_id = microwaves[0]["objectId"]
controller.step(
action="MoveToObject",
objectId=target_id,
moveMagnitude=0.5
)
event = controller.step(
action="PickupObject",
objectId=target_id
)
if event.metadata["lastActionSuccess"]:
print("抓取成功!")
else:
print("抓取失败:", event.metadata["errorMessage"])
else:
print("场景中没有微波炉")
controller.stop()
5. 多房间导航
controller.reset(scene="FloorPlan201")
event = controller.step(action="GetReachablePositions")
reachable_positions = event.metadata["actionReturn"]
import random
target_pos = random.choice(reachable_positions)
controller.step(
action="MoveTo",
position=target_pos,
forceAction=True
)
6. 视觉辅助抓取
event = controller.step(action="Pass")
for obj in event.instance_segmentation_frame:
if obj["objectType"] == "Mug":
center_x = obj["axisAlignedBoundingBox"]["center"]["x"]
center_z = obj["axisAlignedBoundingBox"]["center"]["z"]
controller.step(
action="MoveTo",
position=dict(x=center_x, y=0, z=center_z),
forceAction=True
)
controller.step(action="PickupObject", objectId=obj["objectId"])
break
7. 语音控制
import speech_recognition as sr
r = sr.Recognizer()
with sr.Microphone() as source:
print("请说指令:")
audio = r.listen(source)
command = r.recognize_google(audio, language='zh-CN')