实战 | 红酒瓶标签曲面展平+文字识别(附源码)
来源:食品安全资讯 /
时间:2026-01-12
点击上方“小白学视觉”,选择加"星标"或“置顶”
重磅干货,第一时间送达
背景介绍
本文的目标是让计算机从一张简单的照片中读取一瓶红酒上标签文字的内容。因为酒瓶标签上的文本在圆柱体上是扭曲的,我们无法直接提取并识别字符,所以一般都会将曲面标签展平之后再做识别,以提升准确率。
第一部分:传统方法提取标签
以上图为例,先尝试使用传统图像处理方法提取标签轮廓。 【1】转为灰度图 + 自适应二值化
【2】高斯滤波平滑 + 固定阈值二值化
def build_model(self, Config):inputs = tf.keras.layers.Input((256,256,3))s = tf.keras.layers.Lambda(lambda x: x / 255)(inputs)#Contraction pathc1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(s)c1 = tf.keras.layers.Dropout(Config['contraction_1_dropout'])(c1)c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c1)p1 = tf.keras.layers.MaxPooling2D((2, 2))(c1)c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p1)c2 = tf.keras.layers.Dropout(Config['contraction_2_dropout'])(c2)c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c2)p2 = tf.keras.layers.MaxPooling2D((2, 2))(c2)c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p2)c3 = tf.keras.layers.Dropout(Config['contraction_3_dropout'])(c3)c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c3)p3 = tf.keras.layers.MaxPooling2D((2, 2))(c3)c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p3)c4 = tf.keras.layers.Dropout(Config['contraction_4_dropout'])(c4)c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c4)p4 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(c4)c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p4)c5 = tf.keras.layers.Dropout(Config['contraction_5_dropout'])(c5)c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c5)#Expansive pathu6 = tf.keras.layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)u6 = tf.keras.layers.concatenate([u6, c4])c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u6)c6 = tf.keras.layers.Dropout(Config['expansive_1_dropout'])(c6)c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c6)u7 = tf.keras.layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)u7 = tf.keras.layers.concatenate([u7, c3])c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u7)c7 = tf.keras.layers.Dropout(Config['expansive_2_dropout'])(c7)c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c7)u8 = tf.keras.layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c7)u8 = tf.keras.layers.concatenate([u8, c2])c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u8)c8 = tf.keras.layers.Dropout(Config['expansive_3_dropout'])(c8)c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c8)u9 = tf.keras.layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c8)u9 = tf.keras.layers.concatenate([u9, c1], axis=3)c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u9)c9 = tf.keras.layers.Dropout(Config['expansive_4_dropout'])(c9)c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c9)outputs = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid')(c9)model = tf.keras.Model(inputs=[inputs], outputs=[outputs])return model
# mask is the U-net output image# src is the source image# self is the parent class labelVisionmask = cv2.cvtColor(mask,cv2.COLOR_GRAY2RGB)mask=cv2.resize(mask,(src.shape[1],src.shape[0]))mask = np.round(mask) #binary transformr_src, r_mask = self.align_vertically(src, mask)
源码下载:
https://github.com/AntoninLeroy/wine_label_reader_toolkit
下载1:OpenCV-Contrib扩展模块中文版教程 在「小白学视觉」公众号后台回复:扩展模块中文教程,即可下载全网第一份OpenCV扩展模块教程中文版,涵盖扩展模块安装、SFM算法、立体视觉、目标跟踪、生物视觉、超分辨率处理等二十多章内容。 下载2:Python视觉实战项目52讲 在「小白学视觉」公众号后台回复:Python视觉实战项目,即可下载包括图像分割、口罩检测、车道线检测、车辆计数、添加眼线、车牌识别、字符识别、情绪检测、文本内容提取、面部识别等31个视觉实战项目,助力快速学校计算机视觉。 下载3:OpenCV实战项目20讲 在「小白学视觉」公众号后台回复:OpenCV实战项目20讲,即可下载含有20个基于OpenCV实现20个实战项目,实现OpenCV学习进阶。 交流群
欢迎加入公众号读者群一起和同行交流,目前有SLAM、三维视觉、传感器、自动驾驶、计算摄影、检测、分割、识别、医学影像、GAN、算法竞赛等微信群(以后会逐渐细分),请扫描下面微信号加群,备注:”昵称+学校/公司+研究方向“,例如:”张三 + 上海交大 + 视觉SLAM“。请按照格式备注,否则不予通过。添加成功后会根据研究方向邀请进入相关微信群。请勿在群内发送广告,否则会请出群,谢谢理解~
上一篇: 基于有源RFID的电器开关识别和控制电路
下一篇: 拍下二维码,信息随身看
