当前位置：首页 > news >正文

6.4 note

news 2025/9/1 15:21:59

cpu vs gpu编程

以下是GPU和CPU编程区别的简洁通俗总结：

- 编程模型：CPU是顺序执行，适合复杂逻辑和串行任务；GPU是并行编程，适合大规模数据并行计算。
- 编程语言和工具：CPU用C、C++等通用语言；GPU有CUDA、OpenCL等专门语言及对应开发工具。
- 内存管理：CPU内存管理由系统和编译器自动完成；GPU需程序员手动管理内存分配、传输和释放。
- 性能优化：CPU优化注重算法、代码和缓存；GPU侧重于提高并行度和减少内存访问延迟。
- 适用场景：CPU用于复杂逻辑处理、串行计算等，如操作系统等；GPU用于图形渲染、深度学习等高度并行的计算任务。

构造矩阵 lc2392

class Solution {
private:
vector<int> empty = {};
// 返回每个数字(-1)所在的序号，可以是行或列, 如果为空则无效
vector<int> topoSort(int k, vector<vector<int>>& conditions)
{
// 构建一个图: 范围1-k，这里故意留一个0空着，后续计算无需额外-1
vector<vector<int>> g(k+1);
// 入度: 范围1-k，这里故意留一个0空着，后续计算无需额外-1
// vector<int> inDegrees(k+1);
int inDegrees[k+1];
memset(inDegrees, 0, sizeof(inDegrees));
for (vector<int>& condition : conditions)
{
g[condition[0]].push_back(condition[1]);
++inDegrees[condition[1]];
}

// 拓扑排序使用队列
queue<int> q;
// 插入入度为0的节点
for (int i = 1; i <= k; ++i)
{
if (inDegrees[i] == 0)
{
q.push(i);
}
}
// 计算找到接点的序号：即优先级顺序
int seq = 0;
// 返回结果
vector<int> res(k);
while (q.size() > 0)
{
int curr = q.front();
q.pop();
res[curr-1] = seq++;
for (int next : g[curr])
{
if (--inDegrees[next] == 0)
{
q.push(next);
}
}
}

// 找不到则返回空
return seq == k ? res : empty;
}

public:
vector<vector<int>> buildMatrix(int k, vector<vector<int>>& rowConditions, vector<vector<int>>& colConditions) {
vector<int> rows = topoSort(k, rowConditions);
vector<int> cols = topoSort(k, colConditions);
// 一旦发现有空的情况（即无法满足条件），就可以直接返回空
if (rows.size() == 0 || cols.size() == 0)
{
return {};
}

vector<vector<int>> res(k, vector<int>(k));
for (int i = 0; i < k; ++i)
{
// 这里需要额外+1，因为编号是 0~k-1
res[rows[i]][cols[i]] = i+1;
}

return res;
}
};

双指针➕找规律

要改变的最小次数就等于中值，可以通过两端做差再除以2得到最小的变化次数，

然后迭代下去即可，具体实参看代码，时间复杂度O(n)

代码：

class Solution {
public:
int minOperations(int n) {
int start = 0;
int end = n-1;
int res = 0;
while(start<end){ //len长奇偶都无关
res+=((end*2-1)-(start*2-1))/2;
start++;
end--;
}
return res;
}
};

算法

1. 计算机编程语言是人类和计算机交流的语言工具。

2. 计算机的本质是状态机，内存里存储的所有数据构成了当前的状态；

3. 数据结构相当于告诉计算机怎么来记录当前的状态，算法相当于告诉计算机怎么根据当前的状态计算出下一个状态。

4. 空间复杂度就是你需要的必需存储的状态最多有多少；

5. 时间复杂度就是从初始状态到达最终状态中间需要多少步，这个和算法强相关，是算法关注的事情；

6. 程序在计算过程中，将问题层层分解之后，会有大量的中间过程数据，这个本质上就是记录问题的中间状态；中间过程数据就是由你设计的数据结构来承载。

cpp性能优化

单线程下, 智能指针作为参数传递的时候没有使用引用 每次都拷贝构造造成了额外的性能开销。

只要是个类型变量都可以传参，例如share_ptr

错误贪心 lc3458

class Solution {

public:

bool maxSubstringLength(string s, int k) {

unordered_map<char,int> hash;

for(auto c:s)

hash[c]++;

int cnt=0;

for(auto& [a,b]:hash)

{

if(b==1) cnt++;

}

if(cnt>=k) return true;

else return false;

}

};

正确贪心：

class Solution {
public:
bool maxSubstringLength(string s, int K) {
int n = s.size();

vector<int> pos[26];
for (int i = 0; i < n; i++) {
int c = s[i] - 'a';
pos[c].push_back(i);
}

typedef pair<int, int> pii;
vector<pii> vec;
// 枚举每一种子串
for (int i = 0; i < 26; i++) if (!pos[i].empty()) {
// 一开始先用这种字母的范围作为子串的范围
int l = pos[i][0], r = pos[i].back();
bool flag = true;
while (flag) {
flag = false;
// 检查子串里是否出现了其它字母
for (int j = 0; j < 26; j++) {
int cnt = upper_bound(pos[j].begin(), pos[j].end(), r) - lower_bound(pos[j].begin(), pos[j].end(), l);
if (cnt > 0 && cnt < pos[j].size()) {
// 有一种字母没有被完全包含，用它的范围更新子串的范围
l = min(l, pos[j][0]);
r = max(r, pos[j].back());
flag = true;
}
}
}
// 得到了这种子串里的最短子串
if (l > 0 || r < n - 1) vec.push_back({r, l});
}

// leetcode 435. 无重叠区间
sort(vec.begin(), vec.end());
int R = -1, cnt = 0;
for (pii p : vec) if (p.second > R) {
R = p.first;
cnt++;
}
return cnt >= K;
}
};

查看全文

http://www.dtcms.com/a/229992.html