摘要:该算法包含表格边界框检测、表格分割和表格方向识别三个部分,首先,ppyoloe-plus-x 对边界框进行预测,并对置信度较高的表格边界框(box)进行裁剪。裁剪后的单个表格实例会送入到DBNet中进行语义分割,分割结果通过opencv轮廓处理获得表格关键点(
百度网盘AI大赛-表格检测的第2名方案。
该算法包含表格边界框检测、表格分割和表格方向识别三个部分,首先,ppyoloe-plus-x 对边界框进行预测,并对置信度较高的表格边界框(box)进行裁剪。裁剪后的单个表格实例会送入到DBNet中进行语义分割,分割结果通过opencv轮廓处理获得表格关键点(Point)。之后,我们根据DBNet计算的关键点在裁剪后的单个表格实例上绘制表格边界。最后,PP-LCNet结合表格边界先验和表格实例图像,对表格的方向进行预测,并根据之前定义的几何轮廓点与语义轮廓点的对应关系,将几何轮廓点映射为语义轮廓点。
本文使用C# OpenCvSharp DNN 实现百度网盘AI大赛-表格检测第2名方案第一部分-表格边界框检测。
Model Propertiesdate:2024-10-28T13:52:42.181333
description:Ultralytics YOLO11l model trained on coco.yaml
author:Ultralytics
version:8.3.23
task:detect
license:AGPL-3.0 License (https://ultralytics.com/license)
docs:https://docs.ultralytics.com
stride:32
batch:1
imgsz:[928, 928]
names:{0:'table'}
Inputs
name:images
tensor:Float[1, 3, 928, 928]
Outputs
name:output0
tensor:Float[1, 5, 17661]
frmMain.cs
using OpenCvSharp;using System;
using System.Collections.Generic;
using System.Drawing;
using System.Windows.Forms;
namespace OpenCvSharp_DNN_Demo
{
public partial class frmMain : Form
{
publicfrmMain
{
InitializeComponent;
}
YoloDet obj_detector;
string fileFilter ="*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
string image_path ="";
DateTime dt1 = DateTime.Now;
DateTime dt2 = DateTime.Now;
Mat image;
private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog;
ofd.Filter = fileFilter;
if(ofd.ShowDialog != DialogResult.OK)return;
pictureBox1.Image = ;
pictureBox2.Image = ;
textBox1.Text ="";
image_path = ofd.FileName;
pictureBox1.Image = new Bitmap(image_path);
image = new Mat(image_path);
}
private void Form1_Load(object sender, EventArgs e)
{
string obj_model_path ="model/yolo_obj_Det.onnx";
obj_detector = new YoloDet(obj_model_path);
image_path ="test_img/real5.jpg";
}
private unsafe void button2_Click(object sender, EventArgs e)
{
if(image_path =="")
{
return;
}
textBox1.Text ="检测中,请稍等……";
Application.DoEvents;
Mat src = new Mat(image_path);
dt1 = DateTime.Now;
List result = obj_detector.infer(src);
dt2 = DateTime.Now;
//绘制
Mat draw_img = src.Clone;
for(int i = 0; i < result.Count; i++)
{
Rect r = Rect.FromLTRB(result[i].xmin, result[i].ymin, result[i].xmax, result[i].ymax);
Cv2.PutText(draw_img, $"table:{result[0].score:P0}", new OpenCvSharp.Point(r.TopLeft.X, r.TopLeft.Y - 10), HersheyFonts.HersheySimplex, 8, Scalar.Red, 8);
Cv2.Rectangle(draw_img, r, Scalar.Red, thickness: 8);
//裁剪,保存,为下一步检测做准备
Mat crop_img = new Mat(src, r);
Cv2.ImWrite(i +".jpg", crop_img);
}
pictureBox2.Image = new Bitmap(draw_img.ToMemoryStream);
textBox1.Text ="推理耗时:"+ (dt2 - dt1).TotalMilliseconds +"ms";
}
private void pictureBox2_DoubleClick(object sender, EventArgs e)
{
Common.ShowNormalImg(pictureBox2.Image);
}
private void pictureBox1_DoubleClick(object sender, EventArgs e)
{
Common.ShowNormalImg(pictureBox1.Image);
}
}
}
YoloDet.cs
using OpenCvSharp;using OpenCvSharp.Dnn;
using System;
using System.Collections.Generic;
using System.Linq;
namespace OpenCvSharp_DNN_Demo
{
internal class YoloDet
{
Net model;
int resize_shape = new int[2] { 928, 928 };
public YoloDet(string model_path)
{
model = CvDnn.ReadNetFromOnnx(model_path);
}
unsafe public List infer(Mat srcimg,floatscore = 0.4f)
{
int ori_h = srcimg.Rows;
int ori_w = srcimg.Cols;
img_preprocess
Mat img;
int new_w = 0;
int new_h = 0;
int left = 0;
int top = 0;
img = Common.ResizePad(srcimg, resize_shape[0], ref new_w, ref new_h, ref left, ref top);
//Cv2.ImWrite("0.jpg", img);
img.ConvertTo(img, MatType.CV_32FC3, 1.0 / 255.0);
Mat blob = CvDnn.BlobFromImage(img);
model.SetInput(blob);
//模型推理,读取推理结果
Mat outs = new Mat[1] { new Mat };
string outBlobNames = model.GetUnconnectedOutLayersNames.ToArray;
model.Forward(outs, outBlobNames);
img_postprocess
floatx_factor = (float)ori_w / new_w;
floaty_factor = (float)ori_h / new_h;
List boxes = new List;
Listfloat> scores = new Listfloat>;
int rows = outs[0].Size(2);
//将推理结果转为float数据类型
//5, 17661
Mat result_mat = new Mat(5, 17661, MatType.CV_32F, outs[0].Data);
result_mat = result_mat.T;
//string str = result_mat.Dump;
for(int i = 0; i < rows; i++)
{
float* ptr = (float*)(result_mat.Ptr(i).ToPointer);
floatmax_score = ptr[4];
//floatmax_score = result_mat.Atfloat>(i, 4);
if(max_score >= score)
{
//At效率不高
//floatx1 = result_mat.Atfloat>(i, 0);
//floaty2 = result_mat.Atfloat>(i, 1);
//floatw2 = result_mat.Atfloat>(i, 2);
//floath2 = result_mat.Atfloat>(i, 3);
floatx = ptr[0];
floaty = ptr[1];
floatw = ptr[2];
floath = ptr[3];
int xmin = Math.Max((int)((x - w / 2 - left) * x_factor), 0);
int ymin = Math.Max((int)((y - h / 2 - top) * y_factor), 0);
boxes.Add(new Rect(xmin, ymin, (int)(w * x_factor), (int)(h * y_factor)));
scores.Add(max_score);
}
}
int indices;
CvDnn.NMSBoxes(boxes, scores, score, 0.4f, out indices);
int num_keep = indices.Length;
List bboxes = new List;
for(int i = 0; i < num_keep; i++)
{
int ind = indices[i];
bboxes.Add(new Bbox(boxes[ind].X, boxes[ind].Y, Math.Min(boxes[ind].X + boxes[ind].Width, ori_w - 1), Math.Min(boxes[ind].Y + boxes[ind].Height, ori_h - 1), scores[ind]));
}
returnbboxes;
}
}
}
/*
Model Properties
date:2024-10-28T13:52:42.181333
description:Ultralytics YOLO11l model trained on coco.yaml
author:Ultralytics
version:8.3.23
task:detect
license:AGPL-3.0 License (https://ultralytics.com/license)
docs:https://docs.ultralytics.com
stride:32
batch:1
imgsz:[928, 928]
names:{0:'table'}
Inputs
name:images
tensor:Float[1, 3, 928, 928]
Outputs
name:output0
tensor:Float[1, 5, 17661]
*/
https://aistudio.baidu.com/projectdetail/5398861?searchKeyword=表格检测大赛&searchTab=ALL
来源:opendotnet
免责声明:本站系转载,并不代表本网赞同其观点和对其真实性负责。如涉及作品内容、版权和其它问题,请在30日内与本站联系,我们将在第一时间删除内容!