当前位置：首页 > news >正文

瑞莎星瑞（Radxa Orion O6) 基于 Android OS 使用 NPU的图片模糊查找APP 开发

news 2025/10/15 9:52:16

安谋科技、此芯科技与瑞莎计算机共同推出"星睿O6"开发套件，专为AI PC、边缘计算及机器人等应用场景设计。该套件创新性地整合了Arm®v9 CPU核心、Arm Immortalis™ GPU以及安谋科技自主研发的"周易"NPU。

在Android操作系统环境下，开发者可利用这套开发套件，通过原生JNI方式对开源项目进行优化改造，实现基于NPU加速的CLIP技术，从而提升图片模糊搜索的性能表现。

🔍 Search local images with natural language on Android, powered by OpenAI's CLIP model. / 在 Android 上用自然语言搜索本地图片 (基于 OpenAI 的 CLIP 模型)

https://github.com/greyovo/PicQuery

下面我们开始改造之路：

导入项目到 AndroidStudio
导入 O6 NPU的 native lib 库
新增 JNI CPP代码，
改造项目 kotlin 代码
编译
上板执行！

下面给出具体的操作细节：

O6 NPU 的 native lib 库是在 Android 镜像的如下位置

.
└── vendor├── include│   └── npu│       ├── kmd│       │   ├── armchina_aipu.h│       │   └── tcb.h│       └── standard_api.h└── lib64└── libaipudrv.so

这里还需要注意一下的就是，我们还需要把 libc++的库包含到 apk 的 jnilibs 中去，因为系统里提供的这个 so 应该是动态编译的，不包含这个 c++的 so,运行会报错的

vendor/lib64/libc++.so

主要是用到的 API 可以参考 CIX NPU 开发指导手册

首先使用方法和 Linux 端的使用基本一致，只是需要根据 JNI 的方式做一些符合 JNI 要求的修改即可。

大家也可以参考我这边的代码来组织自己的code，这边应该可以说是通用的。


extern "C" JNIEXPORT jint JNICALL
Java_me_grey_picquery_NpuInference_preprocessNpuInference(JNIEnv *env, jobject thiz, jbyteArray model, jint model_size)
{initTestBench(&opt);jbyte* model_bin = env->GetByteArrayElements(model, NULL);char* buffer1 = new char[model_size];memcpy(buffer1, model_bin, model_size);opt.model_bin=(buffer1);opt.bin_size = model_size;LOGE("[TEST INFO] preprocessNpuInference\n");memset(&sim_glb_config, 0, sizeof(sim_glb_config));memset(&sim_job_config, 0, sizeof(sim_job_config));memset(&mem_dump_config, 0, sizeof(mem_dump_config));mem_dump_config.dump_dir = opt.dump_dir;aipu_init_context(&ctx);if (ret != AIPU_STATUS_SUCCESS){aipu_get_error_message(ctx, ret, &msg);LOGE("[TEST ERROR] AIPU_init_ctx: %s\n", msg);
//        return -1;}ret = aipu_config_global(ctx, AIPU_CONFIG_TYPE_SIMULATION, &sim_glb_config);if (ret != AIPU_STATUS_SUCCESS){aipu_get_error_message(ctx, ret, &msg);LOGE("[TEST ERROR] AIPU_config_simulation: %s\n", msg);
//        goto deinit_ctx;}if (part_cnt == 0) {ret = aipu_get_partition_count(ctx, &part_cnt);if (ret != AIPU_STATUS_SUCCESS) {aipu_get_error_message(ctx, ret, &msg);LOGE("aipu_get_partition_count: %s \n", msg);
//            goto unload_graph;}for (uint32_t i = 0; i < part_cnt; i++) {ret = aipu_get_cluster_count(ctx, i, &cluster_cnt);if (ret != AIPU_STATUS_SUCCESS) {aipu_get_error_message(ctx, ret, &msg);LOGE("aipu_get_cluster_count: %s \n", msg);
//                goto unload_graph;}for (uint32_t j = 0; j < cluster_cnt; j++) {ret = aipu_get_core_count(ctx, i, j, &core_cnt);if (ret != AIPU_STATUS_SUCCESS) {aipu_get_error_message(ctx, ret, &msg);LOGE("aipu_get_core_count: %s \n", msg);
//                    goto unload_graph;}LOGE("[TEST INFO] <part_idx, cluster_idx, core_cnt> = <%u, %u, %u>\n", i, j, core_cnt);}}}ret = aipu_load_graph_helper(ctx, opt.model_bin,opt.bin_size, &graph_id);if (ret != AIPU_STATUS_SUCCESS){aipu_get_error_message(ctx, ret, &msg);LOGE("[TEST ERROR] AIPU_load_graph_helper: %s\n", msg);
//        goto deinit_ctx;}LOGE("[TEST INFO] AIPU load graph successfully.\n");ret = aipu_get_tensor_count(ctx, graph_id, AIPU_TENSOR_TYPE_INPUT, &input_cnt);if (ret != AIPU_STATUS_SUCCESS){aipu_get_error_message(ctx, ret, &msg);LOGE("[TEST ERROR] INPUT: aipu_get_tensor_count: %s\n", msg);
//        goto unload_graph;}LOGE("[TEST INFO] INPUT: aipu_get_tensor_count success: input_cnt = %d\n",input_cnt);for (uint32_t i = 0; i < input_cnt; i++){aipu_tensor_desc_t desc;ret = aipu_get_tensor_descriptor(ctx, graph_id, AIPU_TENSOR_TYPE_INPUT, i, &desc);if (ret != AIPU_STATUS_SUCCESS){aipu_get_error_message(ctx, ret, &msg);LOGE("[TEST ERROR] INPUT: aipu_get_tensor_descriptor: %s\n", msg);}LOGE("[TEST INFO] INPUT[%d]: desc.size: %u\n", i, desc.size);LOGE("[TEST INFO] INPUT[%d]: desc.scale: %f\n", i, desc.scale);LOGE("[TEST INFO] INPUT[%d]: desc.zero_point: %f\n", i, desc.zero_point);LOGE("[TEST INFO] INPUT[%d]: desc.data_type: %u\n", i, desc.data_type);LOGE("[TEST INFO] INPUT[%d]: desc.id: %u\n", i, desc.id);input_desc.push_back(desc);}ret = aipu_get_tensor_count(ctx, graph_id, AIPU_TENSOR_TYPE_OUTPUT, &output_cnt);if (ret != AIPU_STATUS_SUCCESS){aipu_get_error_message(ctx, ret, &msg);fprintf(stderr, "[TEST ERROR] aipu_get_tensor_count: %s\n", msg);
//        goto unload_graph;}LOGE("[TEST INFO] OUTPUT: aipu_get_tensor_count success: output_cnt = %d\n", output_cnt);for (uint32_t i = 0; i < output_cnt; i++){aipu_tensor_desc_t desc;aipu_get_tensor_descriptor(ctx, graph_id, AIPU_TENSOR_TYPE_OUTPUT, i, &desc);if (ret != AIPU_STATUS_SUCCESS){aipu_get_error_message(ctx, ret, &msg);LOGE("[TEST ERROR] aipu_get_tensor_descriptor: %s\n", msg);
//            goto unload_graph;}LOGE("[TEST INFO] OUTPUT[%d]: desc.size: %u\n", i, desc.size);LOGE("[TEST INFO] OUTPUT[%d]: desc.scale: %f\n", i, desc.scale);LOGE("[TEST INFO] OUTPUT[%d]: desc.zero_point: %f\n", i, desc.zero_point);LOGE("[TEST INFO] OUTPUT[%d]: desc.data_type: %u\n", i, desc.data_type);LOGE("[TEST INFO] OUTPUT[%d]: desc.id: %u\n", i, desc.id);output_desc.push_back(desc);}for (uint32_t i = 0; i < output_cnt; i++){char* output = new char[output_desc[i].size];output_data.push_back(output);}env->ReleaseByteArrayElements(model, model_bin, 0);delete[] buffer1;return 0;
}

extern "C" JNIEXPORT jint JNICALL
Java_me_grey_picquery_NpuInference_processNpuInference(JNIEnv *env, jobject thiz,jintArray inputBin, jint inputLength,
//                                                       jbyteArray goldenOutputBin, jint outputLength,jbyteArray output)
{char* buffer2 = new char[inputLength * sizeof(int)];jint* inputData = env->GetIntArrayElements(inputBin, NULL);jbyte* outputData = env->GetByteArrayElements(output, NULL);void* voidInputData = malloc(inputLength * sizeof(jint));if (voidInputData != nullptr) {memcpy(voidInputData, inputData, inputLength * sizeof(jint));}opt.inputs.push_back(voidInputData);opt.inputs_size.push_back(inputLength * sizeof(int));LOGE("[TEST INFO] NpuInference void* type inputLength= %lu \n", inputLength* sizeof(int));//    char* buffer3 = new char[outputLength];
//    jbyte* outputGoldenData = env->GetByteArrayElements(goldenOutputBin, NULL);
//    if (outputGoldenData != NULL) {
//        memcpy(buffer3, outputGoldenData, outputLength);
//        opt.gt = buffer3;
//        opt.gt_size = outputLength;
//    }LOGE("[TEST INFO] do npu inference now\n");create_job_cfg.partition_id = 0;create_job_cfg.qos_level = AIPU_JOB_QOS_HIGH;ret = aipu_create_job(ctx, graph_id, &job_id, &create_job_cfg);if (ret != AIPU_STATUS_SUCCESS){aipu_get_error_message(ctx, ret, &msg);LOGE("[TEST ERROR] aipu_create_job: %s\n", msg);
//        goto unload_graph;}LOGE("[TEST INFO] aipu_create_job success\n");//    cfg_types = AIPU_JOB_CONFIG_TYPE_DUMP_INPUT | AIPU_JOB_CONFIG_TYPE_DUMP_OUTPUT;
//    ret = aipu_config_job(ctx, job_id, cfg_types, &mem_dump_config);
//    if (ret != AIPU_STATUS_SUCCESS) {
//        aipu_get_error_message(ctx, ret, &msg);
//        LOGE("[TEST ERROR] aipu_config_job: %s\n", msg);
//    }aipu_config_job(ctx, job_id, AIPU_CONFIG_TYPE_SIMULATION, &sim_job_config);if (ret != AIPU_STATUS_SUCCESS){aipu_get_error_message(ctx, ret, &msg);LOGE("[TEST ERROR] aipu_config_job: %s\n", msg);
//        goto clean_job;}LOGE("[TEST INFO] set job simulation config success\n");if (opt.inputs.size() != input_cnt) {LOGE("[TEST WARN] input file count (%u) != input tensor count (%u)\n",(uint32_t)opt.inputs.size(), input_cnt);}for (uint32_t i = 0; i < min((uint32_t)opt.inputs.size(), input_cnt); i++) {if (input_desc[i].size > opt.inputs_size[i]) {LOGE("[TEST INFO] input file %s len 0x%x < input tensor %u size 0x%x\n",opt.input_files[i].c_str(), opt.inputs_size[i], i, input_desc[i].size);
//            goto clean_job;}ret = aipu_load_tensor(ctx, job_id, i, opt.inputs[i]);if (ret != AIPU_STATUS_SUCCESS) {aipu_get_error_message(ctx, ret, &msg);LOGE("[TEST ERROR] aipu_load_tensor: %s\n", msg);
//            goto clean_job;}LOGE("[TEST INFO] load input tensor %d from (%u/%u)\n", i, i+1, input_cnt);}gettimeofday(&timestart, NULL);aipu_finish_job(ctx, job_id, -1);if (ret != AIPU_STATUS_SUCCESS){aipu_get_error_message(ctx, ret, &msg);LOGE("[TEST ERROR] aipu_finish_job: %s\n", msg);pass = -1;
//        goto clean_job;}LOGE("[TEST INFO] aipu_finish_job success\n");gettimeofday(&timeend, NULL);for (uint32_t i = 0; i < input_cnt; i++){opt.inputs.pop_back();opt.inputs_size.pop_back();}for (uint32_t i = 0; i < output_cnt; i++){ret = aipu_get_tensor(ctx, job_id, AIPU_TENSOR_TYPE_OUTPUT, i, output_data[i]);if (ret != AIPU_STATUS_SUCCESS){aipu_get_error_message(ctx, ret, &msg);LOGE("[TEST ERROR] aipu_get_tensor: %s\n", msg);
//            goto clean_job;}LOGE("[TEST INFO] get output tensor %u success (%u/%u)\n", i, i+1, output_cnt);}//    pass = check_result_helper(output_data, output_desc, opt.gt, opt.gt_size);LOGE("[TEST INFO] output_desc[0].size 0x%x\n",output_desc[0].size);// post processLOGE("[TEST INFO] npu post process\n");memcpy(outputData, output_data[0], output_desc[0].size);//    input_desc.clear();
//    output_desc.clear();clean_job:ret = aipu_clean_job(ctx, job_id);if (ret != AIPU_STATUS_SUCCESS){aipu_get_error_message(ctx, ret, &msg);LOGE("[TEST ERROR] AIPU_clean_job: %s\n", msg);
//        goto unload_graph;}LOGE("[TEST INFO] aipu_clean_job success\n");///////////////////////////////////////////
//unload_graph:
//    ret = aipu_unload_graph(ctx, graph_id);
//    if (ret != AIPU_STATUS_SUCCESS)
//    {
//        aipu_get_error_message(ctx, ret, &msg);
//        LOGE("[TEST ERROR] aipu_unload_graph: %s\n", msg);
//        goto deinit_ctx;
//    }
//    LOGE("[TEST INFO] aipu_unload_graph success\n");
//
//deinit_ctx:
//    ret = aipu_deinit_context(ctx);
//    if (ret != AIPU_STATUS_SUCCESS)
//    {
//        aipu_get_error_message(ctx, ret, &msg);
//        LOGE("[TEST ERROR] aipu_deinit_ctx: %s\n", msg);
////        return -1;
//    }
//    LOGE("[TEST INFO] aipu_deinit_context success\n");////#endif
////    return 0;
//    finish:
//    if (AIPU_STATUS_SUCCESS != ret) {
//        pass = -1;
//    }
//    for (uint32_t i = 0; i < output_data.size(); i++) {
//        delete[] output_data[i];
//    }
//
//    output_data.clear();
///////////////////////////////////////////env->ReleaseIntArrayElements(inputBin, inputData, 0);env->ReleaseByteArrayElements(output, outputData, 0);
//    env->ReleaseByteArrayElements(goldenOutputBin, outputGoldenData, 0);delete[] buffer2;
//    delete[] buffer3;delete[] voidInputData;return 0;
}

CMakeLists.txt 也可以参考我的代码：

include_directories(${CMAKE_CURRENT_SOURCE_DIR}
)add_library(npu_inference SHARED ${CMAKE_CURRENT_SOURCE_DIR}/npu_inference.cpp)target_link_libraries(npu_inferenceaipudrvandroidlog
)

查看全文

http://www.dtcms.com/a/482933.html