当前位置: 首页 > wzjs >正文

今日大事件新闻免费的seo优化

今日大事件新闻,免费的seo优化,山东建设银行官方网站,经验丰富的网站制作公司视频讲解: DQN 玩 2048 实战|第三期!优化网络,使用GPU、Env奖励优化 1. 仅考虑局部合并奖励:目前的奖励只设置为合并方块时获得的分数,只关注了每一步的即时合并收益,而没有对最终达成 2048 这个…

视频讲解:

DQN 玩 2048 实战|第三期!优化网络,使用GPU、Env奖励优化

1. 仅考虑局部合并奖励:目前的奖励只设置为合并方块时获得的分数,只关注了每一步的即时合并收益,而没有对最终达成 2048 这个目标给予额外的激励,如果没有对达成 2048 给予足够的奖励信号,Agent 可能不会将其作为一个重要的目标

2. 训练硬件资源利用不高,没有使用GPU进行加速,默认为CPU,较慢

代码修改如下:

step函数里面,输入维度增加max_tile最大的数是多少

if 2048 in self.board:reward += 10000done = True
state = self.board.flatten()
max_tile = np.max(self.board)
state = np.append(state, max_tile)
return state, reward, done
input_size = 17

检查系统中是否有可用的 GPU,如果有则使用 GPU 进行计算,否则使用 CPU。

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

在 train ,创建模型实例后,使用 .to(device) 将模型移动到指定的设备(GPU 或 CPU)

model = DQN(input_size, output_size).to(device)
target_model = DQN(input_size, output_size).to(device)

在训练和推理过程中,将输入数据(状态、动作、奖励等)也移动到指定的设备上。

state = torch.FloatTensor(state).unsqueeze(0).to(device)next_state = torch.FloatTensor(next_state).unsqueeze(0).to(device)states = torch.FloatTensor(states).to(device)
actions = torch.LongTensor(actions).to(device)
rewards = torch.FloatTensor(rewards).to(device)
next_states = torch.FloatTensor(next_states).to(device)
dones = torch.FloatTensor(dones).to(device)

将 state 和 next_state 先使用 .cpu() 方法移动到 CPU 上,再使用 .numpy() 方法转换为 NumPy 数组

replay_buffer.add(state.cpu().squeeze(0).numpy(), action, reward, next_state.cpu().squeeze(0).numpy(), done)

这个不改的话,会出现 TypeError: can't convert cuda:0 device type tensor to numpy 错误

完整代码如下:

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.table import Tabledevice = torch.device("cuda" if torch.cuda.is_available() else "cpu")# 2048 游戏环境类
class Game2048:def __init__(self):self.board = np.zeros((4, 4), dtype=int)self.add_random_tile()self.add_random_tile()def add_random_tile(self):empty_cells = np.argwhere(self.board == 0)if len(empty_cells) > 0:index = random.choice(empty_cells)self.board[index[0], index[1]] = 2 if random.random() < 0.9 else 4def move_left(self):reward = 0new_board = np.copy(self.board)for row in range(4):line = new_board[row]non_zero = line[line != 0]merged = []i = 0while i < len(non_zero):if i + 1 < len(non_zero) and non_zero[i] == non_zero[i + 1]:merged.append(2 * non_zero[i])reward += 2 * non_zero[i]i += 2else:merged.append(non_zero[i])i += 1new_board[row] = np.pad(merged, (0, 4 - len(merged)), 'constant')if not np.array_equal(new_board, self.board):self.board = new_boardself.add_random_tile()return rewarddef move_right(self):self.board = np.fliplr(self.board)reward = self.move_left()self.board = np.fliplr(self.board)return rewarddef move_up(self):self.board = self.board.Treward = self.move_left()self.board = self.board.Treturn rewarddef move_down(self):self.board = self.board.Treward = self.move_right()self.board = self.board.Treturn rewarddef step(self, action):if action == 0:reward = self.move_left()elif action == 1:reward = self.move_right()elif action == 2:reward = self.move_up()elif action == 3:reward = self.move_down()done = not np.any(self.board == 0) and all([np.all(self.board[:, i] != self.board[:, i + 1]) for i in range(3)]) and all([np.all(self.board[i, :] != self.board[i + 1, :]) for i in range(3)])if 2048 in self.board:reward += 10000done = Truestate = self.board.flatten()max_tile = np.max(self.board)state = np.append(state, max_tile)return state, reward, donedef reset(self):self.board = np.zeros((4, 4), dtype=int)self.add_random_tile()self.add_random_tile()state = self.board.flatten()max_tile = np.max(self.board)state = np.append(state, max_tile)return state# 深度 Q 网络类
class DQN(nn.Module):def __init__(self, input_size, output_size):super(DQN, self).__init__()self.fc1 = nn.Linear(input_size, 128)self.fc2 = nn.Linear(128, 128)self.fc3 = nn.Linear(128, output_size)def forward(self, x):x = torch.relu(self.fc1(x))x = torch.relu(self.fc2(x))return self.fc3(x)# 经验回放缓冲区类
class ReplayBuffer:def __init__(self, capacity):self.buffer = deque(maxlen=capacity)def add(self, state, action, reward, next_state, done):self.buffer.append((state, action, reward, next_state, done))def sample(self, batch_size):batch = random.sample(self.buffer, batch_size)states, actions, rewards, next_states, dones = zip(*batch)return np.array(states), np.array(actions), np.array(rewards), np.array(next_states), np.array(dones)def __len__(self):return len(self.buffer)# 可视化函数
def visualize_board(board, ax):ax.clear()table = Table(ax, bbox=[0, 0, 1, 1])nrows, ncols = board.shapewidth, height = 1.0 / ncols, 1.0 / nrows# 定义颜色映射cmap = mcolors.LinearSegmentedColormap.from_list("", ["white", "yellow", "orange", "red"])for (i, j), val in np.ndenumerate(board):color = cmap(np.log2(val + 1) / np.log2(2048 + 1)) if val > 0 else "white"table.add_cell(i, j, width, height, text=val if val > 0 else "",loc='center', facecolor=color)ax.add_table(table)ax.set_axis_off()plt.draw()plt.pause(0.1)# 训练函数
def train():env = Game2048()input_size = 17output_size = 4model = DQN(input_size, output_size).to(device)target_model = DQN(input_size, output_size).to(device)target_model.load_state_dict(model.state_dict())target_model.eval()optimizer = optim.Adam(model.parameters(), lr=0.001)criterion = nn.MSELoss()replay_buffer = ReplayBuffer(capacity=10000)batch_size = 32gamma = 0.99epsilon = 1.0epsilon_decay = 0.995epsilon_min = 0.01update_target_freq = 10num_episodes = 1000fig, ax = plt.subplots()for episode in range(num_episodes):state = env.reset()state = torch.FloatTensor(state).unsqueeze(0).to(device)done = Falsetotal_reward = 0while not done:visualize_board(env.board, ax)if random.random() < epsilon:action = random.randint(0, output_size - 1)else:q_values = model(state)action = torch.argmax(q_values, dim=1).item()next_state, reward, done = env.step(action)next_state = torch.FloatTensor(next_state).unsqueeze(0).to(device)replay_buffer.add(state.cpu().squeeze(0).numpy(), action, reward, next_state.cpu().squeeze(0).numpy(), done)if len(replay_buffer) >= batch_size:states, actions, rewards, next_states, dones = replay_buffer.sample(batch_size)states = torch.FloatTensor(states).to(device)actions = torch.LongTensor(actions).to(device)rewards = torch.FloatTensor(rewards).to(device)next_states = torch.FloatTensor(next_states).to(device)dones = torch.FloatTensor(dones).to(device)q_values = model(states)q_values = q_values.gather(1, actions.unsqueeze(1)).squeeze(1)next_q_values = target_model(next_states)next_q_values = next_q_values.max(1)[0]target_q_values = rewards + gamma * (1 - dones) * next_q_valuesloss = criterion(q_values, target_q_values)optimizer.zero_grad()loss.backward()optimizer.step()state = next_statetotal_reward += rewardif episode % update_target_freq == 0:target_model.load_state_dict(model.state_dict())epsilon = max(epsilon * epsilon_decay, epsilon_min)print(f"Episode {episode}: Total Reward = {total_reward}, Epsilon = {epsilon}")plt.close()if __name__ == "__main__":train()

http://www.dtcms.com/wzjs/168459.html

相关文章:

  • wordpress后台登陆不上佛山网络排名优化
  • css是什么意思句容市网站seo优化排名
  • 哈尔滨大连工程建设信息网站北京网优化seo公司
  • 农村服务建设有限公司网站seo如何优化一个网站
  • 做网站的分析报告案例汕头网站关键词推广
  • 一流的上海网站建设公百度网站名称和网址
  • 新闻门户网站源码可以发布软文的平台
  • 没有版权可以做视频网站吗他达拉非片和伟哥区别
  • 网站开发技术服务费合同范本seo推广是什么
  • 陕西省咸阳市建设银行网站长沙seo公司
  • 长沙做网站多少钱太原免费网站建站模板
  • erlang做网站优势视频网站搭建
  • 展馆网站建设方案网页seo
  • 宁波网络营销外包推广网站推广优化方案
  • 网站开发用php还是js百度百度网址大全
  • 好的高端企业网站建设公司百度推广代理赚钱
  • 响应式网站开发案例杭州网站优化流程
  • 怎么样用html做asp网站seo顾问推推蛙
  • 关键词优化的策略有哪些衡阳seo
  • wordpress adams主题优化方案模板
  • 官网网站设计费用百度论坛首页官网
  • idc国外服务器开封网站seo
  • 石家庄 做网站营销公司排名
  • 网站改版设计方案百度搜索推广平台
  • wordpress一键安装脚本安徽网站seo公司
  • 个人网站备案可以做项目网站全球最大的中文搜索引擎
  • 网站的建设要多少钱推广获客
  • 一个产品的宣传和推广方案旺道seo系统
  • 网站开发好后版权归谁国外域名注册平台
  • 建站abc永久免费0元建站微博seo营销