5. 快速入门k230 AI推理流程#
本章介绍了K230 AI推理的完整流程,让大家对基于K230的AI推理过程有个大概印象,它包括视频采集,图像预处理,模型推理、后处理、显示等过程。
视频输入:VI(Video Input),视频采集
视频输出:VO(Video Output),显示
本章包含了K230 AI推理流程的两种实现:基于OpenCV(c++)的AI推理、基于ulab(MicroPython)的AI推理,分别对应两个子章节,具体实现详子章节介绍。
详细代码实现:k230_AI_Demo_Code_Flow_Introduction
5.1 基于OpenCV(C++)的AI推理流程#
基于OpenCV(C++)的K230 AI推理,简要介绍了使用c++语言实现的视频采集,图像预处理(OpenCV),模型推理、后处理、显示等过程。
5.1.1 视频采集#
视频采集:(视频输入,VI)与摄像头相关,本小节简要介绍基于c++的摄像头设置、摄像头启动、从摄像头中获取一帧数据、摄像头停止的整体流程;详细介绍见K230_VICAP_API参考.md、K230_VICAP_SENSOR_参数分区参考.md、K230_Camera_Sensor适配指南.md。
1. 设置摄像头属性:
设置的sensor两路输出;
一路输出用于显示,输出大小设置1080p,图像格式为PIXEL_FORMAT_YVU_PLANAR_420,直接绑定到vo;
另一路输出用于AI计算,输出大小720p,图像格式为PIXEL_FORMAT_BGR_888_PLANAR(实际为rgb,chw,uint8);
创建摄像头(sensor)输出缓存(VB):用于存放摄像头两路输出
//vi_vo.h
/***************************unfixed:不同AI Demo可能需要修改***********************/
#define SENSOR_CHANNEL (3) // 通道数
#define SENSOR_HEIGHT (720) // sensor ch1输出高度,AI输入
#define SENSOR_WIDTH (1280) // sensor ch1输出宽度,AI输入
#define ISP_CHN0_WIDTH (1920) // sensor ch0输出宽度,vo
#define ISP_CHN0_HEIGHT (1080) // sensor ch0输出高度,vo
/*****************************************************************************/
/***************************fixed:无需修改***********************************/
memset(&config, 0, sizeof(config));
config.max_pool_cnt = 64;
//VB for YUV420SP output
config.comm_pool[0].blk_cnt = 5;
config.comm_pool[0].mode = VB_REMAP_MODE_NOCACHE;
config.comm_pool[0].blk_size = VICAP_ALIGN_UP((ISP_CHN0_WIDTH * ISP_CHN0_HEIGHT * 3 / 2), VICAP_ALIGN_1K);
//VB for RGB888 output
config.comm_pool[1].blk_cnt = 5;
config.comm_pool[1].mode = VB_REMAP_MODE_NOCACHE;
config.comm_pool[1].blk_size = VICAP_ALIGN_UP((SENSOR_HEIGHT * SENSOR_WIDTH * 3 ), VICAP_ALIGN_1K);
ret = kd_mpi_vb_set_config(&config);
if (ret) {
printf("vb_set_config failed ret:%d\n", ret);
return ret;
}
/*****************************************************************************/
设置摄像头属性:设置sensor_type;一般无需换摄像头,无需修改。
//vi_vo.h
/***************************fixed:无需修改***********************************/
vicap_dev = VICAP_DEV_ID_0;
ret = kd_mpi_vicap_get_sensor_info(sensor_type, &sensor_info);
if (ret) {
printf("sample_vicap, the sensor type not supported!\n");
return ret;
}
......
dev_attr.cpature_frame = 0;
memcpy(&dev_attr.sensor_info, &sensor_info, sizeof(k_vicap_sensor_info));
ret = kd_mpi_vicap_set_dev_attr(vicap_dev, dev_attr);
if (ret) {
printf("sample_vicap, kd_mpi_vicap_set_dev_attr failed.\n");
return ret;
}
/*****************************************************************************/
设置摄像头通道0属性:设置摄像头通道0分辨率为1080p,格式为PIXEL_FORMAT_YVU_PLANAR_420;并将摄像头通道0绑定到显示;一般只需要关注chn_attr.out_win.width
、chn_attr.out_win.height
、chn_attr.pix_format
即可,其它不用修改。
//vi_vo.h
/***************************unfixed:不同AI Demo可能需要修改***********************/
#define ISP_CHN0_WIDTH (1920) // sensor ch0输出宽度,vo
#define ISP_CHN0_HEIGHT (1080) // sensor ch0输出高度,vo
/*****************************************************************************/
/***************************unfixed:不同AI Demo可能需要修改******************/
//set chn0 output yuv420sp
......
chn_attr.out_win.width = ISP_CHN0_WIDTH;
chn_attr.out_win.height = ISP_CHN0_HEIGHT;
......
chn_attr.chn_enable = K_TRUE;
chn_attr.pix_format = PIXEL_FORMAT_YVU_PLANAR_420;
/*****************************************************************************/
......
/***************************fixed:无需修改***********************************/
chn_attr.buffer_num = VICAP_MAX_FRAME_COUNT; //at least 3 buffers for isp
chn_attr.buffer_size = config.comm_pool[0].blk_size;
vicap_chn = VICAP_CHN_ID_0;
printf("sample_vicap ...kd_mpi_vicap_set_chn_attr, buffer_size[%d]\n", chn_attr.buffer_size);
ret = kd_mpi_vicap_set_chn_attr(vicap_dev, vicap_chn, chn_attr);
if (ret) {
printf("sample_vicap, kd_mpi_vicap_set_chn_attr failed.\n");
return ret;
}
//bind vicap chn 0 to vo
vicap_mpp_chn.mod_id = K_ID_VI;
vicap_mpp_chn.dev_id = vicap_dev;
vicap_mpp_chn.chn_id = vicap_chn;
vo_mpp_chn.mod_id = K_ID_VO;
vo_mpp_chn.dev_id = K_VO_DISPLAY_DEV_ID;
vo_mpp_chn.chn_id = K_VO_DISPLAY_CHN_ID1;
sample_vicap_bind_vo(vicap_mpp_chn, vo_mpp_chn);
printf("sample_vicap ...dwc_dsi_init\n");
/*****************************************************************************/
**设置摄像头通道1属性:**设置通道1分辨率为720p,格式为PIXEL_FORMAT_BGR_888_PLANAR。
//vi_vo.h
/***************************unfixed:不同AI Demo可能需要修改***********************/
#define SENSOR_CHANNEL (3) // 通道数
#define SENSOR_HEIGHT (720) // sensor ch1输出高度,AI输入
#define SENSOR_WIDTH (1280) // sensor ch1输出宽度,AI输入
/*****************************************************************************/
......
/***************************unfixed:不同AI Demo可能需要修改******************/
//set chn1 output rgb888p
....
chn_attr.out_win.width = SENSOR_WIDTH ;
chn_attr.out_win.height = SENSOR_HEIGHT;
......
chn_attr.chn_enable = K_TRUE;
chn_attr.pix_format = PIXEL_FORMAT_BGR_888_PLANAR;
/*****************************************************************************/
/***************************fixed:无需修改***********************************/
chn_attr.buffer_num = VICAP_MAX_FRAME_COUNT;//at least 3 buffers for isp
chn_attr.buffer_size = config.comm_pool[1].blk_size;
printf("sample_vicap ...kd_mpi_vicap_set_chn_attr, buffer_size[%d]\n", chn_attr.buffer_size);
ret = kd_mpi_vicap_set_chn_attr(vicap_dev, VICAP_CHN_ID_1, chn_attr);
if (ret) {
printf("sample_vicap, kd_mpi_vicap_set_chn_attr failed.\n");
return ret;
}
/*****************************************************************************/
设置初始化、并启动摄像头:
//vi_vo.h
/***************************fixed:无需修改***********************************/
ret = kd_mpi_vicap_init(vicap_dev);
if (ret) {
printf("sample_vicap, kd_mpi_vicap_init failed.\n");
// goto err_exit;
}
printf("sample_vicap ...kd_mpi_vicap_start_stream\n");
ret = kd_mpi_vicap_start_stream(vicap_dev);
if (ret) {
printf("sample_vicap, kd_mpi_vicap_init failed.\n");
// goto err_exit;
}
/*****************************************************************************/
使用示例:
//main.cc
vivcap_start();
2. 获取摄像头图像:
摄像头数据临时地址:创建vaddr,临时存放摄像头最新数据,它与摄像头通道1大小相同。
// main.cc
/***************************fixed:无需修改***********************************/
// alloc memory for sensor
size_t paddr = 0;
void *vaddr = nullptr;
size_t size = SENSOR_CHANNEL * SENSOR_HEIGHT * SENSOR_WIDTH;
int ret = kd_mpi_sys_mmz_alloc_cached(&paddr, &vaddr, "allocate", "anonymous", size);
if (ret)
{
std::cerr << "physical_memory_block::allocate failed: ret = " << ret << ", errno = " << strerror(errno) << std::endl;
std::abort();
}
/*****************************************************************************/
读取最新帧:
dump:从摄像头通道1读取一帧图像,即从VB中dump一帧数据到dump_info
映射:将dump_info对应DDR地址(物理地址)映射到当前系统地址(虚拟地址)进行访问
转cv::Mat:将摄像头数据转换为cv::Mat,sensor(rgb,chw)->cv::Mat(bgr,hwc);将摄像头数据转换为为cv::Mat不是必须的,这里只是为了以大家比较熟悉的方式(cv::Mat)进行讲解。
//vi_vo.h
/***************************fixed:无需修改***********************************/
//VB for RGB888 output
config.comm_pool[1].blk_cnt = 5;
config.comm_pool[1].mode = VB_REMAP_MODE_NOCACHE;
config.comm_pool[1].blk_size = VICAP_ALIGN_UP((SENSOR_HEIGHT * SENSOR_WIDTH * 3 ), VICAP_ALIGN_1K);
/*****************************************************************************/
//main.cc
while (!isp_stop)
{
cv::Mat ori_img;
//sensor to cv::Mat
{
/***************************fixed:无需修改***********************************/
//从摄像头通道1读取一帧图像,即从VB中dump一帧数据到dump_info
memset(&dump_info, 0 , sizeof(k_video_frame_info));
ret = kd_mpi_vicap_dump_frame(vicap_dev, VICAP_CHN_ID_1, VICAP_DUMP_YUV, &dump_info, 1000);
if (ret) {
printf("sample_vicap...kd_mpi_vicap_dump_frame failed.\n");
continue;
}
//将dump_info对应DDR地址(物理地址)映射到当前系统(虚拟地址)进行访问
//vbvaddr是实时改变的,因此我们最好把最新数据拷贝到【固定地址】vaddr,以便其它部分进行访问
auto vbvaddr = kd_mpi_sys_mmap_cached(dump_info.v_frame.phys_addr[0], size);
memcpy(vaddr, (void *)vbvaddr, SENSOR_HEIGHT * SENSOR_WIDTH * 3);
kd_mpi_sys_munmap(vbvaddr, size);
/*****************************************************************************/
//将摄像头数据转换为为cv::Mat,sensor(rgb,chw)->cv::Mat(bgr,hwc)
cv::Mat image_r = cv::Mat(SENSOR_HEIGHT,SENSOR_WIDTH, CV_8UC1, vaddr);
cv::Mat image_g = cv::Mat(SENSOR_HEIGHT,SENSOR_WIDTH, CV_8UC1, vaddr+SENSOR_HEIGHT*SENSOR_WIDTH);
cv::Mat image_b = cv::Mat(SENSOR_HEIGHT,SENSOR_WIDTH, CV_8UC1, vaddr+2*SENSOR_HEIGHT*SENSOR_WIDTH);
std::vector<cv::Mat> color_vec(3);
color_vec.clear();
color_vec.push_back(image_b);
color_vec.push_back(image_g);
color_vec.push_back(image_r);
cv::merge(color_vec, ori_img);
}
//使用当前帧数据
......
......
{
/***************************fixed:无需修改***********************************/
// 释放sensor当前帧
ret = kd_mpi_vicap_dump_release(vicap_dev, VICAP_CHN_ID_1, &dump_info);
if (ret) {
printf("sample_vicap...kd_mpi_vicap_dump_release failed.\n");
}
/*****************************************************************************/
}
}
3. 停止摄像头:
//vi_vo.h
/***************************fixed:无需修改***********************************/
int vivcap_stop()
{
// 摄像头停止
printf("sample_vicap ...kd_mpi_vicap_stop_stream\n");
int ret = kd_mpi_vicap_stop_stream(vicap_dev);
if (ret) {
printf("sample_vicap, kd_mpi_vicap_init failed.\n");
return ret;
}
// 摄像头资源释放
ret = kd_mpi_vicap_deinit(vicap_dev);
if (ret) {
printf("sample_vicap, kd_mpi_vicap_deinit failed.\n");
return ret;
}
kd_mpi_vo_disable_video_layer(K_VO_LAYER1);
vicap_mpp_chn.mod_id = K_ID_VI;
vicap_mpp_chn.dev_id = vicap_dev;
vicap_mpp_chn.chn_id = vicap_chn;
vo_mpp_chn.mod_id = K_ID_VO;
vo_mpp_chn.dev_id = K_VO_DISPLAY_DEV_ID;
vo_mpp_chn.chn_id = K_VO_DISPLAY_CHN_ID1;
// vi vo解绑
sample_vicap_unbind_vo(vicap_mpp_chn, vo_mpp_chn);
/*Allow one frame time for the VO to release the VB block*/
k_u32 display_ms = 1000 / 33;
usleep(1000 * display_ms);
// 退出vb
ret = kd_mpi_vb_exit();
if (ret) {
printf("sample_vicap, kd_mpi_vb_exit failed.\n");
return ret;
}
return 0;
}
/*****************************************************************************/
只需在main.cc中调用:
//main.cc
vivcap_start();
......
vivcap_stop();
5.1.2 预处理#
对当前帧数据进行resize处理。
//main.cc
while (!isp_stop)
{
cv::Mat ori_img; //ori:uint8,chw,rgb
//sensor to cv::Mat
{
......
}
/***************************unfixed:不同AI Demo可能需要修改******************/
// pre_process,cv::Mat((1280,720),bgr,hwc)->kmodel((224,224),rgb,hwc)
cv::Mat pre_process_img;
{
cv::Mat rgb_img;
cv::cvtColor(ori_img, rgb_img, cv::COLOR_BGR2RGB);
cv::resize(rgb_img, pre_process_img, cv::Size(kmodel_input_width, kmodel_input_height), cv::INTER_LINEAR);
}
/*****************************************************************************/
//已在ai_base.cc中给出通用实现,并且将在第6章给出开源代码
/***************************fixed:无需修改***********************************/
// set kmodel input
{
runtime_tensor tensor0 = kmodel_interp.input_tensor(0).expect("cannot get input tensor");
auto in_buf = tensor0.impl()->to_host().unwrap()->buffer().as_host().unwrap().map(map_access_::map_write).unwrap().buffer();
memcpy(reinterpret_cast<unsigned char *>(in_buf.data()), pre_process_img.data,sizeof(uint8_t)* kmodel_input_height * kmodel_input_width * 3);
hrt::sync(tensor0, sync_op_t::sync_write_back, true).expect("sync write_back failed");
}
/*****************************************************************************/
......
}
5.1.3 模型推理#
设置好模型输入后,进行模型推理,得到模型推理结果。
//main.cc
string kmodel_path = argv[1];
cout<<kmodel_path<<endl;
float cls_thresh=0.5;
//已在ai_base.cc中给出通用实现,并且将在第6章给出开源代码
/***************************fixed:无需修改***********************************/
// kmodel解释器,从kmodel文件构建,负责模型的加载、输入输出设置和推理
interpreter kmodel_interp;
// load model
std::ifstream ifs(kmodel_path, std::ios::binary);
kmodel_interp.load_model(ifs).expect("Invalid kmodel");
// inputs init
for (size_t i = 0; i < kmodel_interp.inputs_size(); i++)
{
auto desc = kmodel_interp.input_desc(i);
auto shape = kmodel_interp.input_shape(i);
auto tensor = host_runtime_tensor::create(desc.datatype, shape, hrt::pool_shared).expect("cannot create input tensor");
kmodel_interp.input_tensor(i, tensor).expect("cannot set input tensor");
}
auto shape0 = kmodel_interp.input_shape(0); //nhwc
int kmodel_input_height = shape0[1];
int kmodel_input_width = shape0[2];
// outputs init
for (size_t i = 0; i < kmodel_interp.outputs_size(); i++)
{
auto desc = kmodel_interp.output_desc(i);
auto shape = kmodel_interp.output_shape(i);
auto tensor = host_runtime_tensor::create(desc.datatype, shape, hrt::pool_shared).expect("cannot create output tensor");
kmodel_interp.output_tensor(i, tensor).expect("cannot set output tensor");
}
/*****************************************************************************/
while (!isp_stop)
{
cv::Mat ori_img;
//sensor to cv::Mat
{
......
}
// pre_process
cv::Mat pre_process_img;
{
......
}
// set kmodel input
{
......
}
//已在ai_base.cc中给出通用实现,并且将在第6章给出开源代码
/***************************fixed:无需修改***********************************/
// kmodel run
kmodel_interp.run().expect("error occurred in running model");
// get kmodel output
vector<float *> k_outputs;
{
for (int i = 0; i < kmodel_interp.outputs_size(); i++)
{
auto out = kmodel_interp.output_tensor(i).expect("cannot get output tensor");
auto buf = out.impl()->to_host().unwrap()->buffer().as_host().unwrap().map(map_access_::map_read).unwrap().buffer();
float *p_out = reinterpret_cast<float *>(buf.data());
k_outputs.push_back(p_out);
}
}
/*****************************************************************************/
}
5.1.4 后处理#
对模型结果进行后处理,并结果放到results中。
//main.cc
vector<cls_res> results;
while (!isp_stop)
{
cv::Mat ori_img;
//sensor to cv::Mat
{
......
}
// pre_process
cv::Mat pre_process_img;
{
......
}
// set kmodel input
{
......
}
// kmodel run
......
// get kmodel output
vector<float *> k_outputs;
{
......
}
//post process
results.clear();
{
/***************************unfixed:不同AI Demo可能需要修改******************/
float* output0 = k_outputs[0];
//softmax
float sum = 0.0;
for (int i = 0; i < labels.size(); i++){
sum += exp(output0[i]);
}
int max_index;
for (int i = 0; i < labels.size(); i++)
{
output0[i] = exp(output0[i]) / sum;
}
max_index = std::max_element(output0,output0+labels.size()) - output0;
cls_res b;
if (output0[max_index] >= cls_thresh)
{
b.label = labels[max_index];
b.score = output0[max_index];
results.push_back(b);
}
/*****************************************************************************/
}
}
5.1.5 显示#
显示(视频输出,VO)与display相关,本小节简要介绍基于c++的显示设置、显示叠加的整体流程;详细介绍参见K230_视频输出_API参考.md
显示设置:设置显示大小,格式
显示叠加:显示由2个图层构成,其中下边的图层(原图图层)直接显示摄像头输出,上边的图层(osd图层)用于画框、画点,写文字等。
1. 显示设置:设置显示大小,格式。
//vi_vo.h
/***************************fixed:无需修改***********************************/
static k_s32 sample_connector_init(void)
{
......
k_connector_type connector_type = LT9611_MIPI_4LAN_1920X1080_30FPS;
......
}
static k_s32 vo_layer_vdss_bind_vo_config(void)
{
......
sample_connector_init();
// config lyaer
info.act_size.width = ISP_CHN0_WIDTH;//ISP_CHN0_HEIGHT;//1080;//640;//1080;
info.act_size.height = ISP_CHN0_HEIGHT;//ISP_CHN0_WIDTH;//1920;//480;//1920;
info.format = PIXEL_FORMAT_YVU_PLANAR_420;
......
}
/*****************************************************************************/
2. 显示叠加:由于摄像头和显示的通道进行了绑定,我们无法对vo进行直接操作,因此采用叠加的方式进行显示。
原图图层:由于摄像头(vi)通道0绑定了显示(vo)的通道1;随着摄像头的启动,摄像头通道0的数据会自动流到vo的通道1。
//vi_vo.h
......
/***************************fixed:无需修改***********************************/
//bind vicap chn 0 to vo
vicap_mpp_chn.mod_id = K_ID_VI;
vicap_mpp_chn.dev_id = vicap_dev;
vicap_mpp_chn.chn_id = vicap_chn;
vo_mpp_chn.mod_id = K_ID_VO;
vo_mpp_chn.dev_id = K_VO_DISPLAY_DEV_ID;
vo_mpp_chn.chn_id = K_VO_DISPLAY_CHN_ID1;
sample_vicap_bind_vo(vicap_mpp_chn, vo_mpp_chn);
printf("sample_vicap ...dwc_dsi_init\n");
/*****************************************************************************/
......
osd图层:cv::Mat上画框、画点、写文字之后,将数据插入vo对应通道。
// vi_vo.h
#define osd_id K_VO_OSD3
/***************************unfixed:不同AI Demo可能需要修改******************/
#define ISP_CHN0_WIDTH (1920) // sensor ch0输出宽度,vo
#define ISP_CHN0_HEIGHT (1080) // sensor ch0输出高度,vo
#define osd_width (1920)
#define osd_height (1080)
/*****************************************************************************/
.....
/***************************fixed:无需修改***********************************/
k_vb_blk_handle vo_insert_frame(k_video_frame_info *vf_info, void **pic_vaddr)
{
k_u64 phys_addr = 0;
k_u32 *virt_addr;
k_vb_blk_handle handle;
k_s32 size;
if (vf_info == NULL)
return K_FALSE;
if (vf_info->v_frame.pixel_format == PIXEL_FORMAT_ABGR_8888 || vf_info->v_frame.pixel_format == PIXEL_FORMAT_ARGB_8888)
size = vf_info->v_frame.height * vf_info->v_frame.width * 4;
else if (vf_info->v_frame.pixel_format == PIXEL_FORMAT_RGB_565 || vf_info->v_frame.pixel_format == PIXEL_FORMAT_BGR_565)
size = vf_info->v_frame.height * vf_info->v_frame.width * 2;
else if (vf_info->v_frame.pixel_format == PIXEL_FORMAT_ABGR_4444 || vf_info->v_frame.pixel_format == PIXEL_FORMAT_ARGB_4444)
size = vf_info->v_frame.height * vf_info->v_frame.width * 2;
else if (vf_info->v_frame.pixel_format == PIXEL_FORMAT_RGB_888 || vf_info->v_frame.pixel_format == PIXEL_FORMAT_BGR_888)
size = vf_info->v_frame.height * vf_info->v_frame.width * 3;
else if (vf_info->v_frame.pixel_format == PIXEL_FORMAT_ARGB_1555 || vf_info->v_frame.pixel_format == PIXEL_FORMAT_ABGR_1555)
size = vf_info->v_frame.height * vf_info->v_frame.width * 2;
else if (vf_info->v_frame.pixel_format == PIXEL_FORMAT_YVU_PLANAR_420)
size = vf_info->v_frame.height * vf_info->v_frame.width * 3 / 2;
size = size + 4096; // 强制4K ,后边得删了
printf("vb block size is %x \n", size);
handle = kd_mpi_vb_get_block(g_pool_id, size, NULL);
if (handle == VB_INVALID_HANDLE)
{
printf("%s get vb block error\n", __func__);
return K_FAILED;
}
phys_addr = kd_mpi_vb_handle_to_phyaddr(handle);
if (phys_addr == 0)
{
printf("%s get phys addr error\n", __func__);
return K_FAILED;
}
virt_addr = (k_u32 *)kd_mpi_sys_mmap(phys_addr, size);
// virt_addr = (k_u32 *)kd_mpi_sys_mmap_cached(phys_addr, size);
if (virt_addr == NULL)
{
printf("%s mmap error\n", __func__);
return K_FAILED;
}
vf_info->mod_id = K_ID_VO;
vf_info->pool_id = g_pool_id;
vf_info->v_frame.phys_addr[0] = phys_addr;
if (vf_info->v_frame.pixel_format == PIXEL_FORMAT_YVU_PLANAR_420)
vf_info->v_frame.phys_addr[1] = phys_addr + (vf_info->v_frame.height * vf_info->v_frame.stride[0]);
*pic_vaddr = virt_addr;
printf("phys_addr is %lx g_pool_id is %d \n", phys_addr, g_pool_id);
return handle;
}
/*****************************************************************************/
使用示例:
// main.cc
/***************************fixed:无需修改***********************************/
// osd create
k_video_frame_info vf_info;
void *pic_vaddr = NULL;
memset(&vf_info, 0, sizeof(vf_info));
vf_info.v_frame.width = osd_width;
vf_info.v_frame.height = osd_height;
vf_info.v_frame.stride[0] = osd_width;
vf_info.v_frame.pixel_format = PIXEL_FORMAT_ARGB_8888;
block = vo_insert_frame(&vf_info, &pic_vaddr);
/*****************************************************************************/
while (!isp_stop)
{
cv::Mat ori_img;
// sensor to cv::Mat
// pre_process
// set kmodel input
// kmodel run
// get kmodel output
// post process
results.clear();
{
......
}
// draw result to vo
{
// draw osd
{
cv::Mat osd_frame(osd_height, osd_width, CV_8UC4, cv::Scalar(0, 0, 0, 0));
/***************************unfixed:不同AI Demo可能需要修改******************/
{
//draw cls
double fontsize = (osd_frame.cols * osd_frame.rows * 1.0) / (1100 * 1200);
for(int i = 0; i < results.size(); i++)
{
std::string text = "class: " + results[i].label + ", score: " + std::to_string(round(results[i].score * 100) / 100.0).substr(0, 4);
cv::putText(osd_frame, text, cv::Point(1, 40), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(255, 255, 255, 0), 2);
std::cout << text << std::endl;
}
}
/*************************************************************************/
/***************************fixed:无需修改***********************************/
memcpy(pic_vaddr, osd_frame.data, osd_width * osd_height * 4);
}
// insert osd to vo
{
kd_mpi_vo_chn_insert_frame(osd_id+3, &vf_info);
printf("kd_mpi_vo_chn_insert_frame success \n");
}
/*****************************************************************************/
}
......
}
5.1.6 完整代码#
具体怎么操作运行,请参考第6章,这里着重介绍代码流程。
vi_vo.h
/* Copyright (c) 2023, Canaan Bright Sight Co., Ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <chrono>
#include <fstream>
#include <iostream>
#include <thread>
#include <atomic>
#include "mpi_sys_api.h"
/* vicap */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/mman.h>
#include "k_module.h"
#include "k_type.h"
#include "k_vb_comm.h"
#include "k_video_comm.h"
#include "k_sys_comm.h"
#include "mpi_vb_api.h"
#include "mpi_vicap_api.h"
#include "mpi_isp_api.h"
#include "mpi_sys_api.h"
#include "k_vo_comm.h"
#include "mpi_vo_api.h"
#include "vo_test_case.h"
#include "k_connector_comm.h"
#include "mpi_connector_api.h"
#include "k_autoconf_comm.h"
/***************************unfixed:不同AI Demo可能需要修改*******************/
#if defined(CONFIG_BOARD_K230_CANMV)
#define SENSOR_CHANNEL (3) // 通道数
#define SENSOR_HEIGHT (720) // sensor ch1输出高度,AI输入
#define SENSOR_WIDTH (1280) // sensor ch1输出宽度,AI输入
#define ISP_CHN0_WIDTH (1920) // sensor ch0输出宽度,vo
#define ISP_CHN0_HEIGHT (1080) // sensor ch0输出高度,vo
#define vicap_install_osd (1)
#define osd_id K_VO_OSD3
#define osd_width (1920)
#define osd_height (1080)
#else
#define SENSOR_CHANNEL (3)
#define SENSOR_HEIGHT (1280)
#define SENSOR_WIDTH (720)
#define ISP_CHN0_WIDTH (1088)
#define ISP_CHN0_HEIGHT (1920)
#define vicap_install_osd (1)
#define osd_id K_VO_OSD3
#define osd_width (1080)
#define osd_height (1920)
#endif
/*****************************************************************************/
/***************************fixed:无需修改***********************************/
k_vb_config config;
k_vicap_dev vicap_dev;
k_vicap_chn vicap_chn;
k_vicap_dev_attr dev_attr;
k_vicap_chn_attr chn_attr;
k_vicap_sensor_info sensor_info;
k_vicap_sensor_type sensor_type;
k_mpp_chn vicap_mpp_chn;
k_mpp_chn vo_mpp_chn;
k_video_frame_info dump_info;
k_vo_draw_frame vo_frame = (k_vo_draw_frame) {
1,
16,
16,
128,
128,
1
};
static k_vb_blk_handle block;
k_u32 g_pool_id;
int vo_creat_layer_test(k_vo_layer chn_id, layer_info *info)
{
k_vo_video_layer_attr attr;
// check layer
if ((chn_id >= K_MAX_VO_LAYER_NUM) || ((info->func & K_VO_SCALER_ENABLE) && (chn_id != K_VO_LAYER0))
|| ((info->func != 0) && (chn_id == K_VO_LAYER2)))
{
printf("input layer num failed \n");
return -1 ;
}
// check scaler
// set offset
attr.display_rect = info->offset;
// set act
attr.img_size = info->act_size;
// sget size
info->size = info->act_size.height * info->act_size.width * 3 / 2;
//set pixel format
attr.pixel_format = info->format;
if (info->format != PIXEL_FORMAT_YVU_PLANAR_420)
{
printf("input pix format failed \n");
return -1;
}
// set stride
attr.stride = (info->act_size.width / 8 - 1) + ((info->act_size.height - 1) << 16);
// set function
attr.func = info->func;
// set scaler attr
attr.scaler_attr = info->attr;
// set video layer atrr
kd_mpi_vo_set_video_layer_attr(chn_id, &attr);
// enable layer
kd_mpi_vo_enable_video_layer(chn_id);
return 0;
}
k_vb_blk_handle vo_insert_frame(k_video_frame_info *vf_info, void **pic_vaddr)
{
k_u64 phys_addr = 0;
k_u32 *virt_addr;
k_vb_blk_handle handle;
k_s32 size;
if (vf_info == NULL)
return K_FALSE;
if (vf_info->v_frame.pixel_format == PIXEL_FORMAT_ABGR_8888 || vf_info->v_frame.pixel_format == PIXEL_FORMAT_ARGB_8888)
size = vf_info->v_frame.height * vf_info->v_frame.width * 4;
else if (vf_info->v_frame.pixel_format == PIXEL_FORMAT_RGB_565 || vf_info->v_frame.pixel_format == PIXEL_FORMAT_BGR_565)
size = vf_info->v_frame.height * vf_info->v_frame.width * 2;
else if (vf_info->v_frame.pixel_format == PIXEL_FORMAT_ABGR_4444 || vf_info->v_frame.pixel_format == PIXEL_FORMAT_ARGB_4444)
size = vf_info->v_frame.height * vf_info->v_frame.width * 2;
else if (vf_info->v_frame.pixel_format == PIXEL_FORMAT_RGB_888 || vf_info->v_frame.pixel_format == PIXEL_FORMAT_BGR_888)
size = vf_info->v_frame.height * vf_info->v_frame.width * 3;
else if (vf_info->v_frame.pixel_format == PIXEL_FORMAT_ARGB_1555 || vf_info->v_frame.pixel_format == PIXEL_FORMAT_ABGR_1555)
size = vf_info->v_frame.height * vf_info->v_frame.width * 2;
else if (vf_info->v_frame.pixel_format == PIXEL_FORMAT_YVU_PLANAR_420)
size = vf_info->v_frame.height * vf_info->v_frame.width * 3 / 2;
size = size + 4096; // 强制4K ,后边得删了
printf("vb block size is %x \n", size);
handle = kd_mpi_vb_get_block(g_pool_id, size, NULL);
if (handle == VB_INVALID_HANDLE)
{
printf("%s get vb block error\n", __func__);
return K_FAILED;
}
phys_addr = kd_mpi_vb_handle_to_phyaddr(handle);
if (phys_addr == 0)
{
printf("%s get phys addr error\n", __func__);
return K_FAILED;
}
virt_addr = (k_u32 *)kd_mpi_sys_mmap(phys_addr, size);
// virt_addr = (k_u32 *)kd_mpi_sys_mmap_cached(phys_addr, size);
if (virt_addr == NULL)
{
printf("%s mmap error\n", __func__);
return K_FAILED;
}
vf_info->mod_id = K_ID_VO;
vf_info->pool_id = g_pool_id;
vf_info->v_frame.phys_addr[0] = phys_addr;
if (vf_info->v_frame.pixel_format == PIXEL_FORMAT_YVU_PLANAR_420)
vf_info->v_frame.phys_addr[1] = phys_addr + (vf_info->v_frame.height * vf_info->v_frame.stride[0]);
*pic_vaddr = virt_addr;
printf("phys_addr is %lx g_pool_id is %d \n", phys_addr, g_pool_id);
return handle;
}
k_u32 vo_creat_osd_test(k_vo_osd osd, osd_info *info)
{
k_vo_video_osd_attr attr;
// set attr
attr.global_alptha = info->global_alptha;
if (info->format == PIXEL_FORMAT_ABGR_8888 || info->format == PIXEL_FORMAT_ARGB_8888)
{
info->size = info->act_size.width * info->act_size.height * 4;
info->stride = info->act_size.width * 4 / 8;
}
else if (info->format == PIXEL_FORMAT_RGB_565 || info->format == PIXEL_FORMAT_BGR_565)
{
info->size = info->act_size.width * info->act_size.height * 2;
info->stride = info->act_size.width * 2 / 8;
}
else if (info->format == PIXEL_FORMAT_RGB_888 || info->format == PIXEL_FORMAT_BGR_888)
{
info->size = info->act_size.width * info->act_size.height * 3;
info->stride = info->act_size.width * 3 / 8;
}
else if(info->format == PIXEL_FORMAT_ARGB_4444 || info->format == PIXEL_FORMAT_ABGR_4444)
{
info->size = info->act_size.width * info->act_size.height * 2;
info->stride = info->act_size.width * 2 / 8;
}
else if(info->format == PIXEL_FORMAT_ARGB_1555 || info->format == PIXEL_FORMAT_ABGR_1555)
{
info->size = info->act_size.width * info->act_size.height * 2;
info->stride = info->act_size.width * 2 / 8;
}
else
{
printf("set osd pixel format failed \n");
}
attr.stride = info->stride;
attr.pixel_format = info->format;
attr.display_rect = info->offset;
attr.img_size = info->act_size;
kd_mpi_vo_set_video_osd_attr(osd, &attr);
kd_mpi_vo_osd_enable(osd);
return 0;
}
void sample_vicap_install_osd(void)
{
osd_info osd;
osd.act_size.width = osd_width ;
osd.act_size.height = osd_height;
osd.offset.x = 0;
osd.offset.y = 0;
osd.global_alptha = 0xff;
// osd.global_alptha = 0x32;
osd.format = PIXEL_FORMAT_ARGB_8888;//PIXEL_FORMAT_ARGB_4444; //PIXEL_FORMAT_ARGB_1555;//PIXEL_FORMAT_ARGB_8888;
vo_creat_osd_test(osd_id, &osd);
}
void vo_osd_release_block(void)
{
if(vicap_install_osd == 1)
{
kd_mpi_vo_osd_disable(osd_id);
kd_mpi_vb_release_block(block);
}
}
static k_s32 sample_connector_init(void)
{
k_u32 ret = 0;
k_s32 connector_fd;
#if defined(CONFIG_BOARD_K230_CANMV)
k_connector_type connector_type = LT9611_MIPI_4LAN_1920X1080_30FPS;// HX8377_V2_MIPI_4LAN_1080X1920_30FPS;
#else
k_connector_type connector_type = HX8377_V2_MIPI_4LAN_1080X1920_30FPS;
#endif
k_connector_info connector_info;
memset(&connector_info, 0, sizeof(k_connector_info));
//connector get sensor info
ret = kd_mpi_get_connector_info(connector_type, &connector_info);
if (ret) {
printf("sample_vicap, the sensor type not supported!\n");
return ret;
}
connector_fd = kd_mpi_connector_open(connector_info.connector_name);
if (connector_fd < 0) {
printf("%s, connector open failed.\n", __func__);
return K_ERR_VO_NOTREADY;
}
// set connect power
kd_mpi_connector_power_set(connector_fd, K_TRUE);
// connector init
kd_mpi_connector_init(connector_fd, connector_info);
return 0;
}
static k_s32 vo_layer_vdss_bind_vo_config(void)
{
layer_info info;
k_vo_layer chn_id = K_VO_LAYER1;
memset(&info, 0, sizeof(info));
sample_connector_init();
// config lyaer
info.act_size.width = ISP_CHN0_WIDTH;//ISP_CHN0_HEIGHT;//1080;//640;//1080;
info.act_size.height = ISP_CHN0_HEIGHT;//ISP_CHN0_WIDTH;//1920;//480;//1920;
info.format = PIXEL_FORMAT_YVU_PLANAR_420;
info.func = 0;//K_ROTATION_180;////K_ROTATION_90;
info.global_alptha = 0xff;
info.offset.x = 0;//(1080-w)/2,
info.offset.y = 0;//(1920-h)/2;
vo_creat_layer_test(chn_id, &info);
if(vicap_install_osd == 1)
sample_vicap_install_osd();
//exit ;
return 0;
}
static void sample_vicap_bind_vo(k_mpp_chn vicap_mpp_chn, k_mpp_chn vo_mpp_chn)
{
k_s32 ret;
ret = kd_mpi_sys_bind(&vicap_mpp_chn, &vo_mpp_chn);
if (ret) {
printf("kd_mpi_sys_unbind failed:0x%x\n", ret);
}
return;
}
static void sample_vicap_unbind_vo(k_mpp_chn vicap_mpp_chn, k_mpp_chn vo_mpp_chn)
{
k_s32 ret;
ret = kd_mpi_sys_unbind(&vicap_mpp_chn, &vo_mpp_chn);
if (ret) {
printf("kd_mpi_sys_unbind failed:0x%x\n", ret);
}
return;
}
int vivcap_start()
{
k_s32 ret = 0;
k_u32 pool_id;
k_vb_pool_config pool_config;
printf("sample_vicap ...\n");
#if defined(CONFIG_BOARD_K230_CANMV)
sensor_type = OV_OV5647_MIPI_CSI0_1920X1080_30FPS_10BIT_LINEAR;
kd_mpi_vicap_set_mclk(VICAP_MCLK0, VICAP_PLL0_CLK_DIV4, 16, 1);
#else
sensor_type = IMX335_MIPI_2LANE_RAW12_2592X1944_30FPS_LINEAR;
#endif
vicap_dev = VICAP_DEV_ID_0;
memset(&config, 0, sizeof(config));
config.max_pool_cnt = 64;
//VB for YUV420SP output
config.comm_pool[0].blk_cnt = 5;
config.comm_pool[0].mode = VB_REMAP_MODE_NOCACHE;
config.comm_pool[0].blk_size = VICAP_ALIGN_UP((ISP_CHN0_WIDTH * ISP_CHN0_HEIGHT * 3 / 2), VICAP_ALIGN_1K);
//VB for RGB888 output
config.comm_pool[1].blk_cnt = 5;
config.comm_pool[1].mode = VB_REMAP_MODE_NOCACHE;
config.comm_pool[1].blk_size = VICAP_ALIGN_UP((SENSOR_HEIGHT * SENSOR_WIDTH * 3 ), VICAP_ALIGN_1K);
ret = kd_mpi_vb_set_config(&config);
if (ret) {
printf("vb_set_config failed ret:%d\n", ret);
return ret;
}
k_vb_supplement_config supplement_config;
memset(&supplement_config, 0, sizeof(supplement_config));
supplement_config.supplement_config |= VB_SUPPLEMENT_JPEG_MASK;
ret = kd_mpi_vb_set_supplement_config(&supplement_config);
if (ret) {
printf("vb_set_supplement_config failed ret:%d\n", ret);
return ret;
}
ret = kd_mpi_vb_init();
if (ret) {
printf("vb_init failed ret:%d\n", ret);
return ret;
}
printf("sample_vicap ...kd_mpi_vicap_get_sensor_info\n");
// dwc_dsi_init();
vo_layer_vdss_bind_vo_config();
if(vicap_install_osd == 1)
{
memset(&pool_config, 0, sizeof(pool_config));
pool_config.blk_size = VICAP_ALIGN_UP((osd_width * osd_height * 4 * 2), VICAP_ALIGN_1K);
pool_config.blk_cnt = 4;
pool_config.mode = VB_REMAP_MODE_NOCACHE;
pool_id = kd_mpi_vb_create_pool(&pool_config); // osd0 - 3 argb 320 x 240
g_pool_id = pool_id;
printf("--------aa--------------g_pool_id is %d pool_id is %d \n",g_pool_id, pool_id);
}
memset(&sensor_info, 0, sizeof(k_vicap_sensor_info));
ret = kd_mpi_vicap_get_sensor_info(sensor_type, &sensor_info);
if (ret) {
printf("sample_vicap, the sensor type not supported!\n");
return ret;
}
memset(&dev_attr, 0, sizeof(k_vicap_dev_attr));
dev_attr.acq_win.h_start = 0;
dev_attr.acq_win.v_start = 0;
#if defined (CONFIG_BOARD_K230_CANMV)
dev_attr.acq_win.width = ISP_CHN0_WIDTH;
dev_attr.acq_win.height = ISP_CHN0_HEIGHT;
#else
dev_attr.acq_win.width = 2592;//SENSOR_HEIGHT;
dev_attr.acq_win.height = 1944;//SENSOR_WIDTH;
#endif
dev_attr.mode = VICAP_WORK_ONLINE_MODE;
dev_attr.pipe_ctrl.data = 0xFFFFFFFF;
dev_attr.pipe_ctrl.bits.af_enable = 0;
dev_attr.pipe_ctrl.bits.ahdr_enable = 0;
dev_attr.cpature_frame = 0;
memcpy(&dev_attr.sensor_info, &sensor_info, sizeof(k_vicap_sensor_info));
ret = kd_mpi_vicap_set_dev_attr(vicap_dev, dev_attr);
if (ret) {
printf("sample_vicap, kd_mpi_vicap_set_dev_attr failed.\n");
return ret;
}
memset(&chn_attr, 0, sizeof(k_vicap_chn_attr));
//set chn0 output yuv420sp
chn_attr.out_win.h_start = 0;
chn_attr.out_win.v_start = 0;
chn_attr.out_win.width = ISP_CHN0_WIDTH;
chn_attr.out_win.height = ISP_CHN0_HEIGHT;
#if defined(CONFIG_BOARD_K230_CANMV)
chn_attr.crop_win = dev_attr.acq_win;
#else
// chn_attr.crop_win = dev_attr.acq_win;
chn_attr.crop_win.h_start = 768;
chn_attr.crop_win.v_start = 16;
chn_attr.crop_win.width = ISP_CHN0_WIDTH;
chn_attr.crop_win.height = ISP_CHN0_HEIGHT;
#endif
chn_attr.scale_win = chn_attr.out_win;
chn_attr.crop_enable = K_FALSE;
chn_attr.scale_enable = K_FALSE;
// chn_attr.dw_enable = K_FALSE;
chn_attr.chn_enable = K_TRUE;
chn_attr.pix_format = PIXEL_FORMAT_YVU_PLANAR_420;
chn_attr.buffer_num = VICAP_MAX_FRAME_COUNT;//at least 3 buffers for isp
chn_attr.buffer_size = config.comm_pool[0].blk_size;
vicap_chn = VICAP_CHN_ID_0;
printf("sample_vicap ...kd_mpi_vicap_set_chn_attr, buffer_size[%d]\n", chn_attr.buffer_size);
ret = kd_mpi_vicap_set_chn_attr(vicap_dev, vicap_chn, chn_attr);
if (ret) {
printf("sample_vicap, kd_mpi_vicap_set_chn_attr failed.\n");
return ret;
}
//bind vicap chn 0 to vo
vicap_mpp_chn.mod_id = K_ID_VI;
vicap_mpp_chn.dev_id = vicap_dev;
vicap_mpp_chn.chn_id = vicap_chn;
vo_mpp_chn.mod_id = K_ID_VO;
vo_mpp_chn.dev_id = K_VO_DISPLAY_DEV_ID;
vo_mpp_chn.chn_id = K_VO_DISPLAY_CHN_ID1;
sample_vicap_bind_vo(vicap_mpp_chn, vo_mpp_chn);
printf("sample_vicap ...dwc_dsi_init\n");
//set chn1 output rgb888p
chn_attr.out_win.h_start = 0;
chn_attr.out_win.v_start = 0;
chn_attr.out_win.width = SENSOR_WIDTH ;
chn_attr.out_win.height = SENSOR_HEIGHT;
// chn_attr.crop_win = dev_attr.acq_win;
#if defined(CONFIG_BOARD_K230_CANMV)
chn_attr.crop_win = dev_attr.acq_win;
#else
chn_attr.crop_win.h_start = 768;
chn_attr.crop_win.v_start = 16;
chn_attr.crop_win.width = ISP_CHN0_WIDTH;
chn_attr.crop_win.height = ISP_CHN0_HEIGHT;
#endif
chn_attr.scale_win = chn_attr.out_win;
chn_attr.crop_enable = K_FALSE;
chn_attr.scale_enable = K_FALSE;
// chn_attr.dw_enable = K_FALSE;
chn_attr.chn_enable = K_TRUE;
chn_attr.pix_format = PIXEL_FORMAT_BGR_888_PLANAR;
chn_attr.buffer_num = VICAP_MAX_FRAME_COUNT;//at least 3 buffers for isp
chn_attr.buffer_size = config.comm_pool[1].blk_size;
printf("sample_vicap ...kd_mpi_vicap_set_chn_attr, buffer_size[%d]\n", chn_attr.buffer_size);
ret = kd_mpi_vicap_set_chn_attr(vicap_dev, VICAP_CHN_ID_1, chn_attr);
if (ret) {
printf("sample_vicap, kd_mpi_vicap_set_chn_attr failed.\n");
return ret;
}
printf("sample_vicap ...kd_mpi_vicap_init\n");
ret = kd_mpi_vicap_init(vicap_dev);
if (ret) {
printf("sample_vicap, kd_mpi_vicap_init failed.\n");
// goto err_exit;
}
printf("sample_vicap ...kd_mpi_vicap_start_stream\n");
ret = kd_mpi_vicap_start_stream(vicap_dev);
if (ret) {
printf("sample_vicap, kd_mpi_vicap_init failed.\n");
// goto err_exit;
}
return ret;
}
int vivcap_stop()
{
printf("sample_vicap ...kd_mpi_vicap_stop_stream\n");
int ret = kd_mpi_vicap_stop_stream(vicap_dev);
if (ret) {
printf("sample_vicap, kd_mpi_vicap_init failed.\n");
return ret;
}
ret = kd_mpi_vicap_deinit(vicap_dev);
if (ret) {
printf("sample_vicap, kd_mpi_vicap_deinit failed.\n");
return ret;
}
kd_mpi_vo_disable_video_layer(K_VO_LAYER1);
vicap_mpp_chn.mod_id = K_ID_VI;
vicap_mpp_chn.dev_id = vicap_dev;
vicap_mpp_chn.chn_id = vicap_chn;
vo_mpp_chn.mod_id = K_ID_VO;
vo_mpp_chn.dev_id = K_VO_DISPLAY_DEV_ID;
vo_mpp_chn.chn_id = K_VO_DISPLAY_CHN_ID1;
sample_vicap_unbind_vo(vicap_mpp_chn, vo_mpp_chn);
/*Allow one frame time for the VO to release the VB block*/
k_u32 display_ms = 1000 / 33;
usleep(1000 * display_ms);
ret = kd_mpi_vb_exit();
if (ret) {
printf("sample_vicap, kd_mpi_vb_exit failed.\n");
return ret;
}
return 0;
}
void yuv_rotate_90(char *des, char *src,int width,int height)
{
int n = 0;
int hw = width>>1;
int hh = height>>1;
int size = width * height;
int hsize = size>>2;
int pos = 0;
for(int i = width-1;i >= 0;i--)
{
pos = 0;
for(int j= 0;j < height;j++)
{
des[n++]= src[pos+i];
pos += width;
}
}
}
/****************************************************************************/
main.cc
/* Copyright (c) 2023, Canaan Bright Sight Co., Ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <iostream>
#include <thread>
#include <map>
#include <nncase/runtime/runtime_tensor.h>
#include <nncase/runtime/interpreter.h>
#include <nncase/runtime/runtime_op_utility.h>
#include <opencv2/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include "vi_vo.h"
using namespace nncase;
using namespace nncase::runtime;
using cv::Mat;
using std::cerr;
using std::cout;
using std::endl;
using namespace std;
auto cache = cv::Mat::zeros(1, 1, CV_32FC1);
/**
* @brief 分类结果结构
*/
typedef struct cls_res
{
float score;//分类分数
string label;//分类标签结果
}cls_res;
std::atomic<bool> isp_stop(false);
void video_proc_cls(char *argv[])
{
/***************************fixed:无需修改***********************************/
vivcap_start();
// osd set
k_video_frame_info vf_info;
void *pic_vaddr = NULL;
memset(&vf_info, 0, sizeof(vf_info));
vf_info.v_frame.width = osd_width;
vf_info.v_frame.height = osd_height;
vf_info.v_frame.stride[0] = osd_width;
vf_info.v_frame.pixel_format = PIXEL_FORMAT_ARGB_8888;
block = vo_insert_frame(&vf_info, &pic_vaddr);
// alloc memory for sensor
size_t paddr = 0;
void *vaddr = nullptr;
size_t size = SENSOR_CHANNEL * SENSOR_HEIGHT * SENSOR_WIDTH;
int ret = kd_mpi_sys_mmz_alloc_cached(&paddr, &vaddr, "allocate", "anonymous", size);
if (ret)
{
std::cerr << "physical_memory_block::allocate failed: ret = " << ret << ", errno = " << strerror(errno) << std::endl;
std::abort();
}
/*****************************************************************************/
string kmodel_path = argv[1];
cout<<"kmodel_path : "<<kmodel_path<<endl;
float cls_thresh=0.5;
/***************************fixed:无需修改***********************************/
// 我们已经把相关实现封装到ai_base.cc,这里只是为了介绍起来比较简单
interpreter kmodel_interp;
// load model
std::ifstream ifs(kmodel_path, std::ios::binary);
kmodel_interp.load_model(ifs).expect("Invalid kmodel");
// inputs init
for (size_t i = 0; i < kmodel_interp.inputs_size(); i++)
{
auto desc = kmodel_interp.input_desc(i);
auto shape = kmodel_interp.input_shape(i);
auto tensor = host_runtime_tensor::create(desc.datatype, shape, hrt::pool_shared).expect("cannot create input tensor");
kmodel_interp.input_tensor(i, tensor).expect("cannot set input tensor");
}
auto shape0 = kmodel_interp.input_shape(0); //nhwc
int kmodel_input_height = shape0[1];
int kmodel_input_width = shape0[2];
// outputs init
for (size_t i = 0; i < kmodel_interp.outputs_size(); i++)
{
auto desc = kmodel_interp.output_desc(i);
auto shape = kmodel_interp.output_shape(i);
auto tensor = host_runtime_tensor::create(desc.datatype, shape, hrt::pool_shared).expect("cannot create output tensor");
kmodel_interp.output_tensor(i, tensor).expect("cannot set output tensor");
}
/*****************************************************************************/
vector<cls_res> results;
std::vector<std::string> labels = {"bocai","changqiezi","huluobo","xihongshi","xilanhua"};
while (!isp_stop)
{
cv::Mat ori_img;
//sensor to cv::Mat
{
/***************************fixed:无需修改***********************************/
//从摄像头读取一帧图像
memset(&dump_info, 0 , sizeof(k_video_frame_info));
ret = kd_mpi_vicap_dump_frame(vicap_dev, VICAP_CHN_ID_1, VICAP_DUMP_YUV, &dump_info, 1000);
if (ret) {
printf("sample_vicap...kd_mpi_vicap_dump_frame failed.\n");
continue;
}
//将摄像头当前帧对应DDR地址映射到当前系统进行访问
auto vbvaddr = kd_mpi_sys_mmap_cached(dump_info.v_frame.phys_addr[0], size);
memcpy(vaddr, (void *)vbvaddr, SENSOR_HEIGHT * SENSOR_WIDTH * 3);
kd_mpi_sys_munmap(vbvaddr, size);
/*****************************************************************************/
//将摄像头数据转换为为cv::Mat,sensor(rgb,chw)->cv::Mat(bgr,hwc)
cv::Mat image_r = cv::Mat(SENSOR_HEIGHT,SENSOR_WIDTH, CV_8UC1, vaddr);
cv::Mat image_g = cv::Mat(SENSOR_HEIGHT,SENSOR_WIDTH, CV_8UC1, vaddr+SENSOR_HEIGHT*SENSOR_WIDTH);
cv::Mat image_b = cv::Mat(SENSOR_HEIGHT,SENSOR_WIDTH, CV_8UC1, vaddr+2*SENSOR_HEIGHT*SENSOR_WIDTH);
std::vector<cv::Mat> color_vec(3);
color_vec.clear();
color_vec.push_back(image_b);
color_vec.push_back(image_g);
color_vec.push_back(image_r);
cv::merge(color_vec, ori_img);
}
/***************************unfixed:不同AI Demo可能需要修改******************/
// pre_process
cv::Mat pre_process_img;
{
cv::Mat rgb_img;
cv::cvtColor(ori_img, rgb_img, cv::COLOR_BGR2RGB);
cv::resize(rgb_img, pre_process_img, cv::Size(kmodel_input_width, kmodel_input_height), cv::INTER_LINEAR);
}
/*****************************************************************************/
/***************************fixed:无需修改***********************************/
// set kmodel input
{
runtime_tensor tensor0 = kmodel_interp.input_tensor(0).expect("cannot get input tensor");
auto in_buf = tensor0.impl()->to_host().unwrap()->buffer().as_host().unwrap().map(map_access_::map_write).unwrap().buffer();
memcpy(reinterpret_cast<unsigned char *>(in_buf.data()), pre_process_img.data,sizeof(uint8_t)* kmodel_input_height * kmodel_input_width * 3);
hrt::sync(tensor0, sync_op_t::sync_write_back, true).expect("sync write_back failed");
}
// kmodel run
kmodel_interp.run().expect("error occurred in running model");
// get kmodel output
vector<float *> k_outputs;
{
for (int i = 0; i < kmodel_interp.outputs_size(); i++)
{
auto out = kmodel_interp.output_tensor(i).expect("cannot get output tensor");
auto buf = out.impl()->to_host().unwrap()->buffer().as_host().unwrap().map(map_access_::map_read).unwrap().buffer();
float *p_out = reinterpret_cast<float *>(buf.data());
k_outputs.push_back(p_out);
}
}
/***************************fixed:无需修改***********************************/
/***************************unfixed:不同AI Demo可能需要修改******************/
//post process
results.clear();
{
float* output0 = k_outputs[0];
float sum = 0.0;
for (int i = 0; i < labels.size(); i++){
sum += exp(output0[i]);
}
int max_index;
for (int i = 0; i < labels.size(); i++)
{
output0[i] = exp(output0[i]) / sum;
}
max_index = std::max_element(output0,output0+labels.size()) - output0;
cls_res b;
if (output0[max_index] >= cls_thresh)
{
b.label = labels[max_index];
b.score = output0[max_index];
results.push_back(b);
}
}
/*****************************************************************************/
// draw result to vo
{
{
cv::Mat osd_frame(osd_height, osd_width, CV_8UC4, cv::Scalar(0, 0, 0, 0));
{
/***************************unfixed:不同AI Demo可能需要修改******************/
//draw cls
double fontsize = (osd_frame.cols * osd_frame.rows * 1.0) / (1100 * 1200);
for(int i = 0; i < results.size(); i++)
{
std::string text = "class: " + results[i].label + ", score: " + std::to_string(round(results[i].score * 100) / 100.0).substr(0, 4);
cv::putText(osd_frame, text, cv::Point(1, 40), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(255, 255, 255, 0), 2);
std::cout << text << std::endl;
}
/*****************************************************************************/
}
/***************************fixed:无需修改***********************************/
memcpy(pic_vaddr, osd_frame.data, osd_width * osd_height * 4);
}
// insert osd to vo
{
kd_mpi_vo_chn_insert_frame(osd_id+3, &vf_info); //K_VO_OSD0
printf("kd_mpi_vo_chn_insert_frame success \n");
}
}
{
// 释放sensor当前帧
ret = kd_mpi_vicap_dump_release(vicap_dev, VICAP_CHN_ID_1, &dump_info);
if (ret) {
printf("sample_vicap...kd_mpi_vicap_dump_release failed.\n");
}
}
/*****************************************************************************/
}
/***************************fixed:无需修改***********************************/
vo_osd_release_block();
vivcap_stop();
// free memory
ret = kd_mpi_sys_mmz_free(paddr, vaddr);
if (ret)
{
std::cerr << "free failed: ret = " << ret << ", errno = " << strerror(errno) << std::endl;
std::abort();
}
/*****************************************************************************/
}
int main(int argc, char *argv[])
{
std::cout << "case " << argv[0] << " built at " << __DATE__ << " " << __TIME__ << std::endl;
if (argc != 2)
{
cout << "模型推理时传参说明:"
<< "<kmodel_path>" << endl
<< "Options:" << endl
<< " kmodel_path Kmodel的路径\n"
<< "\n"
<< endl;
return -1;
}
/***************************fixed:无需修改***********************************/
std::thread thread_isp(video_proc_cls, argv);
while (getchar() != 'q')
{
usleep(10000);
}
isp_stop = true;
thread_isp.join();
/*****************************************************************************/
return 0;
}
5.2 基于ulab(MicroPython)的AI推理流程#
基于ulab(MicroPython)的K230 AI推理,简要介绍了使用MicroPython语言实现的视频采集,图像预处理(ulab),模型推理、后处理(ulab)、显示等过程。
5.2.1 视频采集#
视频采集:(视频输入,VI)与摄像头相关,本小节简要介绍基于Micropython的摄像头设置、摄像头启动、从摄像头中获取一帧数据、摄像头停止的整体流程;详细介绍见K230_CanMV_Camera模块API手册.md、camera采集示例、K230_CanMV_Media模块API手册.md、media使用示例。
创建摄像头(sensor)输出缓存(VB):已经封装到c++底层实现中,在Micropython实现时无需关心。
设置的sensor两路输出;
一路输出用于显示,输出大小设置1080p,图像格式为PIXEL_FORMAT_YVU_PLANAR_420,直接绑定到vo;
另一路输出用于AI计算,输出大小(224,224),图像格式为PIXEL_FORMAT_RGB_888_PLANAR;
from media.camera import * #摄像头模块
#***************************unfixed:不同AI Demo可能需要修改******************
#显示分辨率
DISPLAY_WIDTH = ALIGN_UP(1920, 16)
DISPLAY_HEIGHT = 1080
#AI分辨率
OUT_RGB888P_WIDTH = ALIGN_UP(224, 16)
OUT_RGB888P_HEIGH = 224
#*****************************************************************************
#***************************fixed:无需修改***********************************
#for camera
def camera_init(dev_id):
# 设置摄像头类型
camera.sensor_init(dev_id, CAM_DEFAULT_SENSOR)
# (1)设置显示输出
# 设置指定设备id的chn0的输出宽高
camera.set_outsize(dev_id, CAM_CHN_ID_0, DISPLAY_WIDTH, DISPLAY_HEIGHT)
# 设置指定设备id的chn0的输出格式为yuv420sp
camera.set_outfmt(dev_id, CAM_CHN_ID_0, PIXEL_FORMAT_YUV_SEMIPLANAR_420)
# (2)设置AI输出
# 设置指定设备id的chn2的输出宽高
camera.set_outsize(dev_id, CAM_CHN_ID_2, OUT_RGB888P_WIDTH, OUT_RGB888P_HEIGH)
# 设置指定设备id的chn2的输出格式为rgb88planar
camera.set_outfmt(dev_id, CAM_CHN_ID_2, PIXEL_FORMAT_RGB_888_PLANAR)
def camera_start(dev_id):
# 启动sensor
camera.start_stream(dev_id)
def camera_read(dev_id):
# 读取指定设备chn2的一帧图像,即获取一帧AI原图
with ScopedTiming("camera_read",debug_mode >0):
rgb888p_img = camera.capture_image(dev_id, CAM_CHN_ID_2)
return rgb888p_img
def camera_release_image(dev_id,rgb888p_img):
# 释放指定设备chn2一帧图像
with ScopedTiming("camera_release_image",debug_mode >0):
camera.release_image(dev_id, CAM_CHN_ID_2, rgb888p_img)
def camera_stop(dev_id):
# 释放sensor
camera.stop_stream(dev_id)
#*****************************************************************************
将摄像头通道0绑定到显示:
#***************************fixed:无需修改***********************************
# for media
def media_init():
# meida初始化
......
# 将摄像头通道0绑定到显示
global media_source, media_sink
media_source = media_device(CAMERA_MOD_ID, CAM_DEV_ID_0, CAM_CHN_ID_0)
media_sink = media_device(DISPLAY_MOD_ID, DISPLAY_DEV_ID, DISPLAY_CHN_VIDEO1)
media.create_link(media_source, media_sink)
......
#*****************************************************************************
使用示例:
from media.camera import *
import os,sys
......
#***************************fixed:无需修改***********************************
# 初始化摄像头
camera_init(CAM_DEV_ID_0)
......
try:
media_init()
# 启动摄像头
camera_start(CAM_DEV_ID_0)
while True:
os.exitpoint()
with ScopedTiming("total",debug_mode > 0):
# 从摄像头通道2拿取一帧图像,用于喂给AI
rgb888p_img = camera_read(CAM_DEV_ID_0)
......
# 释放当前帧
camera_release_image(CAM_DEV_ID_0,rgb888p_img)
......
except KeyboardInterrupt as e:
print("user stop: ", e)
except BaseException as e:
sys.print_exception(e)
finally:
camera_stop(CAM_DEV_ID_0)
......
media_deinit()
#*****************************************************************************
5.2.2 预处理#
from media.camera import *
import os,sys
......
try:
......
while True:
os.exitpoint()
with ScopedTiming("total",debug_mode > 0):
# 从摄像头通道2拿取一帧图像,用于喂给AI
rgb888p_img = camera_read(CAM_DEV_ID_0)
if rgb888p_img.format() == image.RGBP888:
#******************unfixed:不同AI Demo可能需要修改******************
# rgb888(uint8,chw,rgb)->kmodel input(uint8,hwc,rgb)
# pre_process : chw -> hwc
ori_img_numpy = rgb888p_img.to_numpy_ref()
ori_img_copy = ori_img_numpy.copy()
shape=ori_img_copy.shape
img_tmp = ori_img_copy.reshape((shape[0], shape[1] * shape[2]))
img_tmp_trans = img_tmp.transpose()
img_res=img_tmp_trans.copy()
img_hwc=img_res.reshape((1,shape[1],shape[2],shape[0]))
input_tensor = nn.from_numpy(img_hwc)
#********************************************************************
# set kmodel input
kpu.set_input_tensor(0, input_tensor)
......
# 释放当前帧
camera_release_image(CAM_DEV_ID_0,rgb888p_img)
......
except KeyboardInterrupt as e:
print("user stop: ", e)
except BaseException as e:
sys.print_exception(e)
finally:
......
注:对于该分类模型来说,需要先拿到原图,再将原图resize到(224,224)大小,之后再经过其它预处理后喂给模型;但是由于对于micropython来说,resize不支持,故我们将喂给AI的摄像头数据分辨率设置为(224,224);当然除了micropython,我们在底层封装了可以调用的resize操作,后续会进行介绍。
5.2.3 模型推理#
from media.camera import *
import os,sys
......
try:
......
while True:
os.exitpoint()
with ScopedTiming("total",debug_mode > 0):
# 从摄像头通道2拿取一帧图像,用于喂给AI
rgb888p_img = camera_read(CAM_DEV_ID_0)
if rgb888p_img.format() == image.RGBP888:
......
# pre_process
#************************fixed:无需修改*****************************
# kmodel run
kpu.run()
# get output
results = []
for i in range(kpu.outputs_size()):
output_tensor = kpu.get_output_tensor(i)
result = output_tensor.to_numpy()
del output_tensor
results.append(result)
#********************************************************************
......
# 释放当前帧
camera_release_image(CAM_DEV_ID_0,rgb888p_img)
......
except KeyboardInterrupt as e:
print("user stop: ", e)
except BaseException as e:
sys.print_exception(e)
finally:
......
注:kmodel模块部分,kmodel的input_tensor、output_tensor都是mmz申请的内存,需要del手动释放
5.2.4 后处理#
对模型结果进行后处理,并结果放到results中。
from media.camera import *
import os,sys
......
try:
......
while True:
os.exitpoint()
with ScopedTiming("total",debug_mode > 0):
# 从摄像头通道2拿取一帧图像,用于喂给AI
rgb888p_img = camera_read(CAM_DEV_ID_0)
if rgb888p_img.format() == image.RGBP888:
......
# pre_process
# kmodel run
kpu.run()
# get output
results = []
......
#******************unfixed:不同AI Demo可能需要修改******************
# post_process
softmax_res=softmax(results[0][0])
res_idx=np.argmax(softmax_res)
if softmax_res[res_idx]>confidence_threshold:
cls_idx=res_idx
print("classification result:")
print(labels[res_idx])
print("score",softmax_res[res_idx])
else:
cls_idx=-1
#********************************************************************
......
# 释放当前帧
camera_release_image(CAM_DEV_ID_0,rgb888p_img)
......
except KeyboardInterrupt as e:
print("user stop: ", e)
except BaseException as e:
sys.print_exception(e)
finally:
......
5.2.4 显示#
显示(视频输出,VO)与display相关,本小节简要介绍基于Micropython的显示设置、资源释放、显示叠加的整体流程;详细介绍参见K230_CanMV_Display模块API手册.md、显示示例。
显示设置:设置显示大小,格式
显示叠加:显示由2个图层构成,其中下边的图层(原图图层)直接显示摄像头输出,上边的图层(osd图层)用于画框、画点,写文字等。
资源释放:释放显示相关资源
1. 显示设置、资源释放:
global draw_img,osd_img #for display
#***************************fixed:无需修改***********************************
# for display
def display_init():
# hdmi显示初始化
display.init(LT9611_1920X1080_30FPS)
display.set_plane(0, 0, DISPLAY_WIDTH, DISPLAY_HEIGHT, PIXEL_FORMAT_YVU_PLANAR_420, DISPLAY_MIRROR_NONE, DISPLAY_CHN_VIDEO1)
def display_deinit():
# 释放显示资源
display.deinit()
#*****************************************************************************
2. 显示叠加:由于摄像头和显示的通道进行了绑定,我们无法对vo进行直接操作,因此采用叠加的方式进行显示。
原图图层:由于摄像头(vi)通道0绑定了显示(vo)的通道1;随着摄像头的启动,摄像头通道0的数据会自动流到vo的通道1。
# classification.py
#***************************fixed:无需修改***********************************
# for media
def media_init():
# meida初始化
......
# 将摄像头通道0绑定到显示
global media_source, media_sink
media_source = media_device(CAMERA_MOD_ID, CAM_DEV_ID_0, CAM_CHN_ID_0)
media_sink = media_device(DISPLAY_MOD_ID, DISPLAY_DEV_ID, DISPLAY_CHN_VIDEO1)
media.create_link(media_source, media_sink)
......
#*****************************************************************************
osd图层:Image上画框、画点、写文字之后,将数据拷贝到osd_img,以进行显示。
#classification.py
global draw_img,osd_img #for display
global buffer,media_source,media_sink #for media
#***************************fixed:无需修改***********************************
# for media
def media_init():
# meida初始化
......
global buffer, draw_img, osd_img
# 在VB上创建显示大小的内存,用于存放osd
buffer = media.request_buffer(4 * DISPLAY_WIDTH * DISPLAY_HEIGHT)
# 用于画框
draw_img = image.Image(DISPLAY_WIDTH, DISPLAY_HEIGHT, image.ARGB8888)
# 用于拷贝画框结果,防止画框过程中发生buffer搬运
osd_img = image.Image(DISPLAY_WIDTH, DISPLAY_HEIGHT, image.ARGB8888, poolid=buffer.pool_id, alloc=image.ALLOC_VB,
phyaddr=buffer.phys_addr, virtaddr=buffer.virt_addr)
pass
#*****************************************************************************
def display_draw(label):
# hdmi写文字
with ScopedTiming("display_draw",debug_mode >0):
global draw_img,osd_img
#******************unfixed:不同AI Demo可能需要修改******************
if label:
#********************************************************************
draw_img.clear()
#******************unfixed:不同AI Demo可能需要修改******************
draw_img.draw_string(5,5,label,scale=5,color=(255,0,255,0))
#********************************************************************
draw_img.copy_to(osd_img)
display.show_image(osd_img, 0, 0, DISPLAY_CHN_OSD3)
else:
draw_img.clear()
draw_img.copy_to(osd_img)
display.show_image(osd_img, 0, 0, DISPLAY_CHN_OSD3)
使用示例:
#classification.py
from media.display import * #显示模块
from media.media import * #软件抽象模块,主要封装媒体数据链路以及媒体缓冲区
......
# 摄像头初始化
camera_init(CAM_DEV_ID_0)
# 显示初始化
display_init()
try:
media_init()
# 启动摄像头
camera_start(CAM_DEV_ID_0)
while True:
......
#******************unfixed:不同AI Demo可能需要修改******************
# draw result
if cls_idx>=0:
display_draw(labels[res_idx])
else:
display_draw(None)
#********************************************************************
......
except KeyboardInterrupt as e:
print("user stop: ", e)
except BaseException as e:
sys.print_exception(e)
finally:
camera_stop(CAM_DEV_ID_0)
display_deinit()
......
media_deinit()
5.2.5 资源释放#
大核内存包括两个部分,一个是系统内存,一个是 GC 内存,前者主要用来给模型还有系统内的一些功能使用,包括摄像头和屏幕的缓冲区、kmodel及其输入输出都来自这里(mmz,用del释放);后者是解析器层面申请的内存,可以给代码的变量使用(用gc.collect()释放)。
1. gc资源释放:
#classification.py
import gc
import os
def func_a():
a = []
for i in range(10000):
a.append(i)
func_a()
print(gc.mem_free() / 1024 / 1024) #stack mem
print(gc.mem_alloc() / 1024 / 1024)
gc.collect()
print(gc.mem_alloc() / 1024 / 1024)
2.系统内存释放:
kmodel模块部分,kmodel的input_tensor、output_tensor,及其本身都是mmz申请的内存,需要手动释放
#classification.py
del input_tensor
del output_tensor
del kpu
nn.shrink_memory_pool() #以免漏掉某个del,遍历所有kmodel相关内存,并释放
摄像头、显示及其VB释放:
#***************************fixed:无需修改***********************************
#classification.py
def camera_release_image(dev_id,rgb888p_img):
# 释放一帧图像
with ScopedTiming("camera_release_image",debug_mode >0):
camera.release_image(dev_id, CAM_CHN_ID_2, rgb888p_img)
def camera_stop(dev_id):
# 停止camera
camera.stop_stream(dev_id)
def display_deinit():
# 释放显示资源
display.deinit()
def media_deinit():
# meida资源释放
os.exitpoint(os.EXITPOINT_ENABLE_SLEEP)
time.sleep_ms(100)
if 'buffer' in globals():
global buffer
media.release_buffer(buffer)
if 'media_source' in globals() and 'media_sink' in globals():
global media_source, media_sink
media.destroy_link(media_source, media_sink)
media.buffer_deinit()
#*****************************************************************************
5.2.6 完整代码#
具体怎么操作运行,请参考第8章,这里着重介绍代码流程。
import ulab.numpy as np #类似python numpy操作,但也会有一些接口不同
import nncase_runtime as nn #nncase运行模块,封装了kpu(kmodel推理)和ai2d(图片预处理加速)操作
from media.camera import * #摄像头模块
from media.display import * #显示模块
from media.media import * #软件抽象模块,主要封装媒体数据链路以及媒体缓冲区
import aidemo #aidemo模块,封装ai demo相关后处理、画图操作
import image #图像模块,主要用于读取、图像绘制元素(框、点等)等操作
import time #时间统计
import gc #垃圾回收模块
import os, sys #操作系统接口模块
#***************************unfixed:不同AI Demo可能需要修改******************
#显示分辨率
DISPLAY_WIDTH = ALIGN_UP(1920, 16)
DISPLAY_HEIGHT = 1080
#AI分辨率
OUT_RGB888P_WIDTH = ALIGN_UP(224, 16)
OUT_RGB888P_HEIGH = 224
#*****************************************************************************
debug_mode=0
#***************************fixed:无需修改***********************************
class ScopedTiming:
def __init__(self, info="", enable_profile=True):
self.info = info
self.enable_profile = enable_profile
def __enter__(self):
if self.enable_profile:
self.start_time = time.time_ns()
return self
def __exit__(self, exc_type, exc_value, traceback):
if self.enable_profile:
elapsed_time = time.time_ns() - self.start_time
print(f"{self.info} took {elapsed_time / 1000000:.2f} ms")
global draw_img,osd_img #for display
global buffer,media_source,media_sink #for media
# for display
def display_init():
# hdmi显示初始化
display.init(LT9611_1920X1080_30FPS)
display.set_plane(0, 0, DISPLAY_WIDTH, DISPLAY_HEIGHT, PIXEL_FORMAT_YVU_PLANAR_420, DISPLAY_MIRROR_NONE, DISPLAY_CHN_VIDEO1)
def display_deinit():
# 释放显示资源
display.deinit()
#*****************************************************************************
def display_draw(label):
# hdmi写文字
with ScopedTiming("display_draw",debug_mode >0):
global draw_img,osd_img
#******************unfixed:不同AI Demo可能需要修改******************
if label:
#********************************************************************
draw_img.clear()
#******************unfixed:不同AI Demo可能需要修改******************
draw_img.draw_string(5,5,label,scale=5,color=(255,0,255,0))
#*****************************************************************
draw_img.copy_to(osd_img)
display.show_image(osd_img, 0, 0, DISPLAY_CHN_OSD3)
else:
draw_img.clear()
draw_img.copy_to(osd_img)
display.show_image(osd_img, 0, 0, DISPLAY_CHN_OSD3)
#***************************fixed:无需修改***********************************
#for camera
def camera_init(dev_id):
# 设置摄像头类型
camera.sensor_init(dev_id, CAM_DEFAULT_SENSOR)
# (1)设置显示输出
# 设置指定设备id的chn0的输出宽高
camera.set_outsize(dev_id, CAM_CHN_ID_0, DISPLAY_WIDTH, DISPLAY_HEIGHT)
# 设置指定设备id的chn0的输出格式为yuv420sp
camera.set_outfmt(dev_id, CAM_CHN_ID_0, PIXEL_FORMAT_YUV_SEMIPLANAR_420)
# (2)设置AI输出
# 设置指定设备id的chn2的输出宽高
camera.set_outsize(dev_id, CAM_CHN_ID_2, OUT_RGB888P_WIDTH, OUT_RGB888P_HEIGH)
# 设置指定设备id的chn2的输出格式为rgb88planar
camera.set_outfmt(dev_id, CAM_CHN_ID_2, PIXEL_FORMAT_RGB_888_PLANAR)
def camera_start(dev_id):
# camera启动
camera.start_stream(dev_id)
def camera_read(dev_id):
# 读取一帧图像
with ScopedTiming("camera_read",debug_mode >0):
rgb888p_img = camera.capture_image(dev_id, CAM_CHN_ID_2)
return rgb888p_img
def camera_release_image(dev_id,rgb888p_img):
# 释放一帧图像
with ScopedTiming("camera_release_image",debug_mode >0):
camera.release_image(dev_id, CAM_CHN_ID_2, rgb888p_img)
def camera_stop(dev_id):
# 停止camera
camera.stop_stream(dev_id)
#for media
def media_init():
# meida初始化
config = k_vb_config()
config.max_pool_cnt = 1
config.comm_pool[0].blk_size = 4 * DISPLAY_WIDTH * DISPLAY_HEIGHT
config.comm_pool[0].blk_cnt = 1
config.comm_pool[0].mode = VB_REMAP_MODE_NOCACHE
media.buffer_config(config)
global media_source, media_sink
media_source = media_device(CAMERA_MOD_ID, CAM_DEV_ID_0, CAM_CHN_ID_0)
media_sink = media_device(DISPLAY_MOD_ID, DISPLAY_DEV_ID, DISPLAY_CHN_VIDEO1)
media.create_link(media_source, media_sink)
# 初始化媒体buffer
media.buffer_init()
global buffer, draw_img, osd_img
buffer = media.request_buffer(4 * DISPLAY_WIDTH * DISPLAY_HEIGHT)
# 用于画框
draw_img = image.Image(DISPLAY_WIDTH, DISPLAY_HEIGHT, image.ARGB8888)
# 用于拷贝画框结果,防止画框过程中发生buffer搬运
osd_img = image.Image(DISPLAY_WIDTH, DISPLAY_HEIGHT, image.ARGB8888, poolid=buffer.pool_id, alloc=image.ALLOC_VB,
phyaddr=buffer.phys_addr, virtaddr=buffer.virt_addr)
def media_deinit():
# meida资源释放
os.exitpoint(os.EXITPOINT_ENABLE_SLEEP)
time.sleep_ms(100)
if 'buffer' in globals():
global buffer
media.release_buffer(buffer)
if 'media_source' in globals() and 'media_sink' in globals():
global media_source, media_sink
media.destroy_link(media_source, media_sink)
media.buffer_deinit()
#**************************************************************************
#***************************unfixed:不同AI Demo可能需要修改******************
# 任务后处理
def softmax(x):
exp_x = np.exp(x - np.max(x))
return exp_x / np.sum(exp_x)
#**************************************************************************
def classification():
print("start")
#***************************unfixed:不同AI Demo可能需要修改******************
# 初始化参数
kmodel_file = '/sdcard/k230_classify.kmodel'
labels = ["bocai","changqiezi","huluobo","xihongshi","xilanhua"]
confidence_threshold = 0.6
num_classes= 5
cls_idx=-1
#***************************************************************************
#***************************fixed:无需修改***********************************
# 加载kmodel
kpu = nn.kpu()
kpu.load_kmodel(kmodel_file)
# 摄像头初始化
camera_init(CAM_DEV_ID_0)
# 显示初始化
display_init()
try:
media_init()
# 启动摄像头
camera_start(CAM_DEV_ID_0)
while True:
os.exitpoint()
with ScopedTiming("total",debug_mode > 0):
# 从摄像头拿取一帧数据
rgb888p_img = camera_read(CAM_DEV_ID_0)
#***************************************************************************
# for rgb888planar
if rgb888p_img.format() == image.RGBP888:
#******************unfixed:不同AI Demo可能需要修改*****************
# rgb888(uint8,chw,rgb)->kmodel input(uint8,hwc,rgb)
# pre_process : chw -> hwc
ori_img_numpy = rgb888p_img.to_numpy_ref()
ori_img_copy = ori_img_numpy.copy()
shape=ori_img_copy.shape
img_tmp = ori_img_copy.reshape((shape[0], shape[1] * shape[2]))
img_tmp_trans = img_tmp.transpose()
img_res=img_tmp_trans.copy()
img_hwc=img_res.reshape((1,shape[1],shape[2],shape[0]))
input_tensor = nn.from_numpy(img_hwc)
#*****************************************************************
#************************fixed:无需修改*****************************
# set kmodel input
kpu.set_input_tensor(0, input_tensor)
# kmodel run
kpu.run()
# get output
results = []
for i in range(kpu.outputs_size()):
output_tensor = kpu.get_output_tensor(i)
result = output_tensor.to_numpy()
del output_tensor
results.append(result)
#*****************************************************************
#******************unfixed:不同AI Demo可能需要修改******************
# post process
softmax_res=softmax(results[0][0])
res_idx=np.argmax(softmax_res)
if softmax_res[res_idx]>confidence_threshold:
cls_idx=res_idx
print("classification result:")
print(labels[res_idx])
print("score",softmax_res[res_idx])
else:
cls_idx=-1
# draw result
if cls_idx>=0:
display_draw(labels[res_idx])
else:
display_draw(None)
#******************************************************************
del input_tensor
#************************fixed:无需修改*****************************
# release image
camera_release_image(CAM_DEV_ID_0,rgb888p_img)
# release gc
gc.collect()
nn.shrink_memory_pool()
#********************************************************************
#************************fixed:无需修改*****************************
except KeyboardInterrupt as e:
print("user stop: ", e)
except BaseException as e:
sys.print_exception(e)
finally:
camera_stop(CAM_DEV_ID_0)
display_deinit()
#******************************************************************
del kpu #根据实际名称修改
#************************fixed:无需修改*****************************
gc.collect()
nn.shrink_memory_pool()
media_deinit()
#******************************************************************
print("end")
return 0
if __name__=="__main__":
#************************fixed:无需修改*****************************
os.exitpoint(os.EXITPOINT_ENABLE)
nn.shrink_memory_pool()
#******************************************************************
classification()