爱分享

利用YOLO实现中文验证码识别-汉字验证码识别

奶瓶 · 7月7日 · 2021年 · 17次已读

简介:
1、做一个汉字验证码的实战,巩固一下深度学习知识。

环境:
1、win10,tensorflow-gpu 1.8 显卡:1060Ti

2、识别的验证码为 四个汉字组成的100x30 像素的图片,每个汉字的位置在一定范围内是随机的,也会随机生成四条干扰线。样式如下:

3、所需文件:最常见3500个汉字的文本:3500.txt;字体文件:我这里使用两个字体文件。

思路:
1、尝试了YOLOv3的思路,直接构建一个网络同时定位加识别每个汉字,确实可以做到,准确率不是很高,在82%左右,后续改进好了也会写博客。

2、本文的思路是,分别训练两个网络,一个网络用来定位每个汉字的位置,也就是预测每个汉字的中心坐标;第二个网络是在第一个网络的基础上去训练识别每一个汉字。这样的做法最后经测试,准确率都在99.2%以上。

两个网络的草图如下:

数据生成:
1、使用pillow库生成验证码。设计一个ImageChar类;

2、rand_img_label() 会根据step参数的值去生成两个网络分别的训练数据,具体看代码注释。

3、rand_img_test() 方法是为了后续测试两个网络的准确率而设计的,返回的是一张验证码和对应四个汉字字符串。

代码:born_data.py

from PIL import Image,ImageFont,ImageDraw
import random
import os
import numpy as np
import cv2
 
class ImageChar():
	"""
	1、读取3500.txt 这是最常用3500汉字 并随机挑选出汉字
	2、在./fonts/ 文件夹下存放 字体格式 随机挑选格式 然后依据格式随机生成汉字
	3、随机画指定数目的干扰线
	4、环境:Mac python3.5
	"""
	def __init__(self, color=(0,0,0),size=(100,30),
		fontlist=['./fonts/'+i for i in os.listdir('./fonts/') if not i =='.DS_Store'],
		fontsize=20,
		num_word=4):#生成多少个字的验证码(图片宽度会随之增加)
 
		self.num_word=num_word
		self.color=color
 
		self.fontlist=fontlist
 
		if self.num_word==4:
			self.size=size
		else:
			self.size=((self.fontsize+5)*self.num_word,40)
 
		#随机挑选一个字体 randint(0,2)会取0,1,2 所以减去 1
		self.fontpath=self.fontlist[random.randint(0,len(self.fontlist)-1)]
		self.fontsize=fontsize
 
		self.chinese=open('3500.txt','r').read()
 
		self.font=ImageFont.truetype(self.fontpath, self.fontsize)
		
	#随机生成四个汉字的字符串
	def rand_chinese(self):
		chinese_str=''
		chinese_num=[]
		for i in range(self.num_word):
			temp=random.randint(0,3499)
			chinese_str=chinese_str+self.chinese[temp]
			chinese_num.append(temp)
		return chinese_str,chinese_num
	
	#随机生成杂线的坐标
	def rand_line_points(self,mode=0):
		width,height=self.size
		if mode==0:
			return (random.randint(0, width), random.randint(0, height))
		elif mode==1:
			return (random.randint(0,6),random.randint(0, height))
		elif mode==2:
			return (random.randint(width-6,width),random.randint(0, height))
 
	#随机生成一张验证码,并且返回 四个汉字的字符串,测试用
	def rand_img_test(self,num_lines=4):
		width,height=self.size
		gap=5
		start=0
 
		#第一张,带噪音的验证码
		self.img1 = Image.new('RGB',self.size,(255,255,255))
		self.draw1=ImageDraw.Draw(self.img1)
 
		#把线画上去
		for i in range(num_lines//2):
			self.draw1.line([self.rand_line_points(),self.rand_line_points()],(0,0,0))
		for i in range(num_lines//2):
			self.draw1.line([self.rand_line_points(1),self.rand_line_points(2)],(0,0,0))
 
		words,chinese_num=self.rand_chinese()
		#将汉字画上去
		for i in range(len(words)):
			x=start+(self.fontsize+gap)*i+random.randint(0,gap)
			y=random.randint(0,height-self.fontsize-gap)
			self.draw1.text((x,y),words[i],fill=(0,0,0),font=self.font)
		return self.img1,words
 
	#随机生成一张图片 根据step值,分别为第一个网络和第二个网络提供训练数据
	def rand_img_label(self,num_lines=4,step=1):
		width,height=self.size
		gap=5
		start=0
 
		self.img1 = Image.new('RGB',self.size,(255,255,255))
		self.draw1=ImageDraw.Draw(self.img1)
 
		#把线画上去
		for i in range(num_lines//2):
			self.draw1.line([self.rand_line_points(),self.rand_line_points()],(0,0,0))
		for i in range(num_lines//2):
			self.draw1.line([self.rand_line_points(1),self.rand_line_points(2)],(0,0,0))
 
		words,chinese_num=self.rand_chinese()
		label_list=[]
		#将汉字画上去
		for i in range(len(words)):
			x=start+(self.fontsize+gap)*i+random.randint(0,gap)
			y=random.randint(0,height-self.fontsize-gap)
 
			if step == 1:#为第一个网络生成标签数据:汉字的坐标
				temp_list=[0]*2
				temp_list[0]=(x+10)/100#该汉字的中心横坐标,除于100是为了规划到0~1,为了方便训练
				temp_list[1]=(y+14)/25#该汉字的中心纵坐标,除于25也是为了方便训练
			else:#为第二个网络生成标签数据,汉字的one-hot矩阵
				temp_list=[0]*3500
				temp_list[chinese_num[i]]=1
			
			label_list.append(temp_list)
			self.draw1.text((x,y),words[i],fill=(0,0,0),font=self.font)
 
		return self.img1,label_list

训练第一个网络:预测汉字中心坐标
1、准备数据,对输入数据进行预处理:

(1)将图片转为灰度图;

(2)将图片进行反转黑白颜色的二值化处理,字体变为白色,背景为黑色;

(3)对整个图片矩阵除于255,输入数据就成了 0 和 1 的矩阵。

代码:born_data.py

def prepare_data():
img_char=ImageChar()
images=[]
labels=[]
for i in range(50000):
chinese_img_PIL,label_list=img_char.rand_img_label()
np_img=np.asarray(chinese_img_PIL)
np_img = cv2.cvtColor(np_img,cv2.COLOR_BGR2GRAY)
ret,np_img = cv2.threshold(np_img,127,255,cv2.THRESH_BINARY_INV)
np_img=np_img/255
images.append(np_img.tolist())
labels.append(label_list)
if i % 200==0:
print(i,end='\r')
labels=np.array(labels)
np.save('trainLab0.npy',labels)
images=np.array(images)
np.save('trainImg0.npy',images)

2、第一个网络的基本信息:

(1)如上网络草图,前三层为卷积+池化层,后三层为全连接层;

(2)前五层激活函数为relu,最后一层为sigmoid,是为了将输出数据归化到0~1;

(3)各层之间均采用batch normalization;

(4)采用动量算法训练;

(5)训练效果测试函数 accuracy( ) 是预测1000张验证码的汉字坐标,预测坐标与标签坐标相差两个像素以上视为误识别:

#测试网络训练精度:预测点坐标和标签点坐标相差两个像素以上视为预测失败。
def accuracy(sess,pre_loca,in_image,x_image,y_label,if_is_training):
erro_count = 0
for i in range(10):#每次取一百张预测,取十次共1000
bt=random.randint(0,49999 - 100)
min_x_image = x_image[bt:(bt+100),:,:]
min_y_label = y_label[bt:(bt+100),:,:]
loca_np = sess.run(pre_loca,feed_dict={in_image:min_x_image , if_is_training:True})
m,n,l = loca_np.shape
for j in range(m):
for k in range(n):
x =round(loca_np[j,k,0]100) y=round(loca_np[j,k,1]25)
x0=round(min_y_label[j,k,0]100) y0=round(min_y_label[j,k,1]25)
lo = ((x - x0)2 + (y - y0)2)**0.5#计算两个预测坐标和标签坐标的距离
if lo>2:
erro_count+=1
if erro_count>20:
return False, erro_count
else:
return True, erro_count

完整代码如下:network_1.py

import tensorflow.contrib.slim as slim
import tensorflow as tf
import numpy as np
import random
import time

def cal_loss(pre_loca, lab_loca):
loca_loss = tf.reduce_mean(tf.square(tf.subtract(pre_loca , lab_loca)))
return loca_loss*100

def xavier_init(fan_in,fan_out,constant = 1):
low = -constant * np.sqrt(6.0/(fan_in+fan_out))
high = constant * np.sqrt(6.0/(fan_in+fan_out))
return tf.random_uniform((fan_in,fan_out),minval = low,maxval = high,dtype = tf.float32)

def network(in_image,if_is_training):
batch_norm_params={
'is_training':if_is_training,
'zero_debias_moving_mean':True,
'decay':0.99,
'epsilon':0.001,
'scale':True,
'updates_collections':None
}

with slim.arg_scope([slim.conv2d],activation_fn=tf.nn.relu,
    padding='SAME',
    weights_initializer=slim.xavier_initializer(),
    biases_initializer=tf.zeros_initializer(),
    normalizer_fn=slim.batch_norm,
    normalizer_params=batch_norm_params,
    weights_regularizer=slim.l2_regularizer(0.0005)):
    out_1=32
    out_2=64
    out_3=128

    net=slim.conv2d(in_image,num_outputs=out_2,kernel_size=[5,5],stride=1,scope='conv1')
    print('1_con:\t',net.get_shape())
    net=slim.max_pool2d(net,kernel_size=[2,2],stride=2,scope='pool1')
    print('1_pool:\t',net.get_shape())

    net=slim.conv2d(net,num_outputs=out_2,kernel_size=[5,5],stride=1,scope='conv2')
    print('2_con:\t',net.get_shape())
    net=slim.max_pool2d(net,kernel_size=[2,2],stride=2,scope='pool2')
    print('2_pool:\t',net.get_shape())

    net=slim.conv2d(net,num_outputs=out_3,kernel_size=[3,3],stride=1,scope='conv3_1')
    net=slim.conv2d(net,num_outputs=out_3,kernel_size=[3,3],stride=1,scope='conv3_2')
    print('3_con:\t',net.get_shape())
    net=slim.max_pool2d(net,kernel_size=[2,2],stride=2,scope='pool3')
    print('3_pool:\t',net.get_shape())

# net = tf.reshape(net,shape=[-1,2*2*128])
net=slim.flatten(net,scope='flatten')

with slim.arg_scope([slim.fully_connected],
    activation_fn=tf.nn.relu,
    normalizer_fn=slim.batch_norm,
    normalizer_params=batch_norm_params):

    net=slim.fully_connected(net,1000,
        weights_initializer=slim.xavier_initializer(),
        biases_initializer=tf.zeros_initializer(),
        scope='fc_total')       
    print('fc:\t',net.get_shape())

    pre_loca=slim.fully_connected(net,2000,
        weights_initializer=slim.xavier_initializer(),
        biases_initializer=tf.zeros_initializer(),
        scope='fc2_1')

    pre_loca=slim.fully_connected(pre_loca,8,
            activation_fn=tf.nn.sigmoid,
            # normalizer_fn=None,
            weights_initializer=slim.xavier_initializer(),
            biases_initializer=tf.zeros_initializer(),
            scope='fc2_2')  

    pre_loca = tf.reshape(pre_loca, shape=[-1,4,2])
    return pre_loca
#测试网络训练精度:预测点坐标和标签点坐标相差两个像素以上视为预测失败。
def accuracy(sess,pre_loca,in_image,x_image,y_label,if_is_training):
erro_count = 0
for i in range(10):#每次取一百张预测,取十次共1000
bt=random.randint(0,49999 - 100)
min_x_image = x_image[bt:(bt+100),:,:]
min_y_label = y_label[bt:(bt+100),:,:]
loca_np = sess.run(pre_loca,feed_dict={in_image:min_x_image , if_is_training:True})
m,n,l = loca_np.shape
for j in range(m):
for k in range(n):
x =round(loca_np[j,k,0]100) y=round(loca_np[j,k,1]25)
x0=round(min_y_label[j,k,0]100) y0=round(min_y_label[j,k,1]25)
lo = ((x - x0)2 + (y - y0)2)**0.5#计算两个预测坐标和标签坐标的距离
if lo>2:
erro_count+=1
if erro_count>20:
return False, erro_count
else:
return True, erro_count

def main():
in_image= tf.placeholder(dtype=tf.float32, shape=[None,30,100], name='in_image')
# lab_class=tf.placeholder(dtype=tf.float32, shape=[None,4,3500], name='lab_class')
lab_loca=tf.placeholder(dtype=tf.float32, shape=[None,4,2], name='lab_loca')

# 和 batch normalization一起使用,在训练时为True,预测时False
if_is_training=tf.placeholder(dtype=tf.bool,name='if_is_training') 

x_input = tf.reshape(in_image, shape=[-1,100,30,1], name='x_input')

pre_loca=network(x_input,if_is_training)

loca_loss=cal_loss(pre_loca,  lab_loca)

# 和 batch normalization 一起使用
update_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    # train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
    train_op = tf.train.MomentumOptimizer(learning_rate=0.01,momentum=0.9,use_nesterov=True).minimize(loca_loss)

model_saver=tf.train.Saver()
tf.add_to_collection('pre_loca',pre_loca)

x_image=np.load('trainImg0.npy')
y_label=np.load('trainLab0.npy')

batchs = 120
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    while True:
        #输入训练次数,方便控制和继续训练
        command=input('input:\t')
        if command=='qq':
            break
        for i in range(int(command)):
            bt=random.randint(0,49999 - batchs)
            min_x_image = x_image[bt:(bt + batchs),:,:]
            min_y_label = y_label[bt:(bt + batchs),:,:]

            sess.run(train_op,feed_dict={in_image:min_x_image , lab_loca:min_y_label , if_is_training:True})

            if i%500==0:
                ret,erro_count = accuracy(sess,pre_loca,in_image,x_image,y_label,if_is_training)
                print('count: ',i, '\terro: ' , erro_count , '\t\taccuracy: ',erro_count/4000)
                if ret:
                    break

    model_saver.save(sess,'./model/mymodel.ckpt')

if name=='main':
main()

3、训练至预测1000张验证码共4000个汉字的坐标,误识别在20张以下。

训练第二个网络:识别汉字:
1、数据准备:第二个网络采用实时生成数据的方式训练,每次生成30张

def generate_data():
img_char = born_data.ImageChar()
images=[]
labels=[]
for i in range(30):
chinese_img_PIL,label_list=img_char.rand_img_label(step=2)
np_img=np.asarray(chinese_img_PIL)
np_img = cv2.cvtColor(np_img,cv2.COLOR_BGR2GRAY)
ret,np_img = cv2.threshold(np_img,127,255,cv2.THRESH_BINARY_INV)
np_img=np_img/255
images.append(np_img.tolist())
labels.append(label_list)

return np.array(images),np.array(labels)

2、加载第一个网络的模型,并将generate_data() 函数生成的 输入数据交给第一个网路进行预测:

#加载第一个网络的模型。
def load_model_1():
graph_1=tf.Graph()
sess_1=tf.Session(graph=graph_1)
with graph_1.as_default():
model_saver_1=tf.train.import_meta_graph("./model_step1/mymodel.ckpt.meta")    
model_saver_1.restore(sess_1,'./model_step1/mymodel.ckpt')
    y_loca=tf.get_collection('pre_loca')[0]
    x_1=graph_1.get_operation_by_name('in_image').outputs[0]
    if_is_training_1=graph_1.get_operation_by_name('if_is_training').outputs[0]

    return x_1 , sess_1 , if_is_training_1 ,y_loca
#第一个模型对输入数据进行预测中心坐标
def pre_model_1(x_1 , sess_1 , if_is_training_1 ,y_loca,in_image_1,y_label):
loca_np=sess_1.run(y_loca,feed_dict={x_1:in_image_1,if_is_training_1:False})
M,N,L=loca_np.shape
x_image_2=[]
y_label_2=[]
for m in range(M):
imgCutList=crop_image(in_image_1[m,:,:],loca_np[m,:,:])
for im in range(len(imgCutList)):
try:
data_2=np.array(imgCutList[im]).reshape(400,)
except Exception as e:
print('imList reshape erro')
continue
x_image_2.append(data_2.tolist())
y_label_2.append(y_label[m,im,:].tolist())

return np.array(x_image_2), np.array(y_label_2)

3、如网络草图第二张,crop_image() 函数根据第一个网络预测的坐标,剪裁出四个汉字,每个汉字为20x20像素大小

def crop_image(data,loca_np,imgshow=False):
croped_img_list=[]

loca_list=loca_np.tolist()
if imgshow:
    img = data.copy()
m,n=loca_np.shape
for i in range(m):
    x = round(loca_list[i][0]*100-10)#将中心横坐标转化为左上角横坐标,方便剪裁
    y = round(loca_list[i][1]*25 -10)#将中心纵坐标转化为左上角纵坐标
    #根据坐标剪裁可能会超出边界。
    if x<0:
        x = 0
    elif x>80:
        x=80
    if y<0:
        y=0
    elif y>9:
        y=9

    temp = data[y:y+20,x:x+20]#对汉字进行剪裁
    croped_img_list.append(temp.tolist())
    if imgshow:
        img=cv2.rectangle(img*255,(x,y),(x+20,y+20),(255,0,0),1)
if imgshow:
    img = Image.fromarray(img)
    img.show()
#返回的是0~1的图片,类型List
return croped_img_list

4、测试训练效果的函数accuracy():

def accuracy(sess,pre_image,in_image,testImg,testLab,if_is_training):
erro_count = 0

for i in range(10):
    bt=random.randint(0,4999 - 100)
    x_image_2=testImg[bt:bt+100,:]
    y_label_2=testLab[bt:bt+100,:]
    pre_label = sess.run(pre_image,feed_dict={in_image:x_image_2 , if_is_training:False})
    M, N= pre_label.shape
    for m in range(M):
        x = np.argmax(pre_label[m,:])
        x0= np.argmax(y_label_2[m,:])
        if not x == x0:
            erro_count+=1
if erro_count<=2:
    return True,erro_count
else:
    return False,erro_count

5、完整代码如下:


import tensorflow.contrib.slim as slim
import tensorflow as tf
import born_data
from PIL import Image
import numpy as np
import random
import time
import cv2
 
def generate_data():
	img_char = born_data.ImageChar()
	images=[]
	labels=[]
	for i in range(30):
		chinese_img_PIL,label_list=img_char.rand_img_label(step=2)
		np_img=np.asarray(chinese_img_PIL)
		np_img = cv2.cvtColor(np_img,cv2.COLOR_BGR2GRAY)
		ret,np_img = cv2.threshold(np_img,127,255,cv2.THRESH_BINARY_INV)
		np_img=np_img/255
		images.append(np_img.tolist())
		labels.append(label_list)
 
	return np.array(images),np.array(labels)
 
def crop_image(data,loca_np,imgshow=False):
	
	croped_img_list=[]
 
	loca_list=loca_np.tolist()
	if imgshow:
		img = data.copy()
	m,n=loca_np.shape
	for i in range(m):
		x = round(loca_list[i][0]*100-10)#将中心横坐标转化为左上角横坐标,方便剪裁
		y = round(loca_list[i][1]*25 -10)#将中心纵坐标转化为左上角纵坐标
		#根据坐标剪裁可能会超出边界。
		if x<0:
			x = 0
		elif x>80:
			x=80
		if y<0:
			y=0
		elif y>9:
			y=9
		
		temp = data[y:y+20,x:x+20]#对汉字进行剪裁
		croped_img_list.append(temp.tolist())
		if imgshow:
			img=cv2.rectangle(img*255,(x,y),(x+20,y+20),(255,0,0),1)
	if imgshow:
		img = Image.fromarray(img)
		img.show()
	#返回的是0~1的图片,类型List
	return croped_img_list
 
def cal_loss(y_pre,y_label):  
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_label, logits=y_pre))
    # return -tf.reduce_sum(y_label*tf.log(y_pre))
    # return tf.reduce_mean(tf.square(y_label - y_pre))
    # return tf.reduce_mean(tf.pow(tf.subtract(y_pre,y_label),2))
 
def xavier_init(fan_in,fan_out,constant = 1):
    low = -constant * np.sqrt(6.0/(fan_in+fan_out))
    high = constant * np.sqrt(6.0/(fan_in+fan_out))
    return tf.random_uniform((fan_in,fan_out),minval = low,maxval = high,dtype = tf.float32)
 
 
def network(in_image,if_is_training):
	batch_norm_params={
		'is_training':if_is_training,
		'zero_debias_moving_mean':True,
		'decay':0.99,
		'epsilon':0.001,
		'scale':True,
		'updates_collections':None
	}
 
	with slim.arg_scope([slim.conv2d],activation_fn=tf.nn.relu,
		padding='SAME',
		weights_initializer=slim.xavier_initializer(),
		biases_initializer=tf.zeros_initializer(),
		normalizer_fn=slim.batch_norm,
		normalizer_params=batch_norm_params,
		weights_regularizer=slim.l2_regularizer(0.0005)):
		out_1=32
		out_2=64
		out_3=128
 
		net=slim.conv2d(in_image,num_outputs=out_2,kernel_size=[5,5],stride=1,scope='conv1')
		print('1_con:\t',net.get_shape())
		net=slim.max_pool2d(net,kernel_size=[2,2],stride=2,scope='pool1')
		print('1_pool:\t',net.get_shape())
 
		net=slim.conv2d(net,num_outputs=out_2,kernel_size=[5,5],stride=1,scope='conv2')
		print('2_con:\t',net.get_shape())
		net=slim.max_pool2d(net,kernel_size=[2,2],stride=2,scope='pool2')
		print('2_pool:\t',net.get_shape())
 
		net=slim.conv2d(net,num_outputs=out_3,kernel_size=[3,3],stride=1,scope='conv3_1')
		net=slim.conv2d(net,num_outputs=out_3,kernel_size=[3,3],stride=1,scope='conv3_2')
		print('3_con:\t',net.get_shape())
		net=slim.max_pool2d(net,kernel_size=[2,2],stride=2,scope='pool3')
		print('3_pool:\t',net.get_shape())
 
	# net = tf.reshape(net,shape=[-1,2*2*128])
	net=slim.flatten(net,scope='flatten')
 
	with slim.arg_scope([slim.fully_connected],
		activation_fn=tf.nn.relu,
		normalizer_fn=slim.batch_norm,
		normalizer_params=batch_norm_params):
 
		net=slim.fully_connected(net,3000,
			weights_initializer=slim.xavier_initializer(),
			biases_initializer=tf.zeros_initializer(),
			scope='fc1')		
		print('fc1:\t',net.get_shape())
	
		net=slim.fully_connected(net,9000,
			weights_initializer=slim.xavier_initializer(),
			biases_initializer=tf.zeros_initializer(),
			scope='fc2')		
		print('fc2:\t',net.get_shape())
 
		net=slim.fully_connected(net,3500,
				activation_fn=None,
				normalizer_fn=None,
				# weights_initializer=slim.xavier_initializer(),
				# biases_initializer=tf.zeros_initializer(),
				scope='fc3')
		print('soft:\t',net.get_shape())
 
		return net
 
def accuracy(sess,pre_image,in_image,testImg,testLab,if_is_training):
	erro_count = 0
	
	for i in range(10):
		bt=random.randint(0,4999 - 100)
		x_image_2=testImg[bt:bt+100,:]
		y_label_2=testLab[bt:bt+100,:]
		pre_label = sess.run(pre_image,feed_dict={in_image:x_image_2 , if_is_training:False})
		M, N= pre_label.shape
		for m in range(M):
			x = np.argmax(pre_label[m,:])
			x0= np.argmax(y_label_2[m,:])
			if not x == x0:
				erro_count+=1
	if erro_count<=2:
		return True,erro_count
	else:
		return False,erro_count
#加载第一个网络的模型。
def load_model_1():
	graph_1=tf.Graph()
	sess_1=tf.Session(graph=graph_1)
	with graph_1.as_default():
		model_saver_1=tf.train.import_meta_graph("./model_step1/mymodel.ckpt.meta")
 
		model_saver_1.restore(sess_1,'./model_step1/mymodel.ckpt')
		y_loca=tf.get_collection('pre_loca')[0]
		x_1=graph_1.get_operation_by_name('in_image').outputs[0]
		if_is_training_1=graph_1.get_operation_by_name('if_is_training').outputs[0]
 
		return x_1 , sess_1 , if_is_training_1 ,y_loca
#第一个模型对输入数据进行预测中心坐标
def pre_model_1(x_1 , sess_1 , if_is_training_1 ,y_loca,in_image_1,y_label):
	loca_np=sess_1.run(y_loca,feed_dict={x_1:in_image_1,if_is_training_1:False})
	M,N,L=loca_np.shape
	x_image_2=[]
	y_label_2=[]
	for m in range(M):
		imgCutList=crop_image(in_image_1[m,:,:],loca_np[m,:,:])
		for im in range(len(imgCutList)):
			try:
				data_2=np.array(imgCutList[im]).reshape(400,)
			except Exception as e:
				print('imList reshape erro')
				continue
			x_image_2.append(data_2.tolist())
			y_label_2.append(y_label[m,im,:].tolist())
 
	return np.array(x_image_2), np.array(y_label_2)
 
def main():
	in_image= tf.placeholder(dtype=tf.float32, shape=[None,400], name='in_image')
	out_image=tf.placeholder(dtype=tf.float32, shape=[None,3500], name='out_image')
 
	# 和 batch normalization一起使用,在训练时为True,预测时False
	if_is_training=tf.placeholder(dtype=tf.bool,name='if_is_training') 
 
	x_input = tf.reshape(in_image, shape=[-1,20,20,1], name='x_input')
 
	pre_image=network(x_input,if_is_training)
 
	# l2_loss = tf.add_n(tf.losses.get_regularization_losses())
	cost=cal_loss(pre_image,out_image)
	corr=tf.equal(tf.argmax(pre_image,1),tf.argmax(out_image,1))
	loss=tf.reduce_mean(tf.cast(corr,"float"))
 
	# 和 batch normalization 一起使用
	update_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS)
	with tf.control_dependencies(update_ops):
		# train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
		train_op = tf.train.MomentumOptimizer(learning_rate=0.01,momentum=0.9,use_nesterov=True).minimize(cost)
 
	model_saver=tf.train.Saver()
	tf.add_to_collection('pre_img',pre_image)
 
	testImg = np.load('testImg1.npy')
	testLab = np.load('testLab1.npy')
 
	
	x_1 , sess_1 , if_is_training_1 ,y_loca =load_model_1()
 
	with tf.Session() as sess:
		sess.run(tf.global_variables_initializer())
		while True:
			#输入训练次数,方便控制和继续训练
			command=input('input:\t')
			if command=='qq':
				break
			for i in range(int(command)):
				x_image_1,y_label = generate_data()
				x_image_2,y_label_2= pre_model_1(x_1 , sess_1 , if_is_training_1 ,y_loca,x_image_1,y_label)
				sess.run(train_op,feed_dict={in_image:x_image_2,out_image:y_label_2,if_is_training:True})
				
				if i%500==0:
					ret,erro_count = accuracy(sess,pre_image,in_image,testImg,testLab,if_is_training)
					print('count: ',i,'\taccuracy: ',erro_count)
					
		model_saver.save(sess,'./model_step2/mymodel.ckpt')
 
if __name__=='__main__':
	main()
 
	# x_1 , sess_1 , if_is_training_1 ,y_loca =load_model_1()
	# x_image_1,y_label = generate_data()
	# x_image_2,y_label_2= pre_model_1(x_1 , sess_1 , if_is_training_1 ,y_loca,x_image_1,y_label)
	# np.save('trainImg2',x_image_2)
	# np.save('trainLab2',y_label_2)

同时加载两个网络进行对验证码进行识别
1、predtict()函数是识别单张验证码

2、test()函数是测试准确率

完整代码如下:chineseCode.py

import tensorflow as tf
from PIL import Image
import numpy as np
import born_data
import cv2
import sys

class ChineseCodeRecognition():
"""docstring for ChineseCodeRecognition"""
def init(self):
self.w3500 = open('3500.txt','r').read()
self.x_1,self.sess_1 , self.if_is_training_1 ,self.y_loca = self.load_model_1()
self.x_2,self.sess_2 , self.if_is_training_2 ,self.y_class = self.load_model_2()

def load_model_1(self):
    graph_1=tf.Graph()
    sess_1=tf.Session(graph=graph_1)
    with graph_1.as_default():
        model_saver_1=tf.train.import_meta_graph("./model_step1/mymodel.ckpt.meta")

        model_saver_1.restore(sess_1,'./model_step1/mymodel.ckpt')
        y_loca=tf.get_collection('pre_loca')[0]
        x_1=graph_1.get_operation_by_name('in_image').outputs[0]
        if_is_training_1=graph_1.get_operation_by_name('if_is_training').outputs[0]

        return x_1 , sess_1 , if_is_training_1 ,y_loca

def load_model_2(self):
    graph_2=tf.Graph()
    sess_2=tf.Session(graph=graph_2)
    with graph_2.as_default():
        model_saver_2=tf.train.import_meta_graph("./model_step2/mymodel.ckpt.meta")
        model_saver_2.restore(sess_2,'./model_step2/mymodel.ckpt')
        y_class=tf.get_collection('pre_img')[0]

        x_2=graph_2.get_operation_by_name('in_image').outputs[0]
        if_is_training_2=graph_2.get_operation_by_name('if_is_training').outputs[0]

        return x_2 , sess_2 , if_is_training_2 ,y_class

def readImage(self,filename):
    img=cv2.imread(filename)
    img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    ret,data = cv2.threshold(img,127,255,cv2.THRESH_BINARY_INV)
    data=data/255
    return data

def crop_image(self,data,loca_np,imgshow=False):

    croped_img_list=[]

    loca_list=loca_np.tolist()
    if imgshow:
        img = data.copy()
    m,n=loca_np.shape
    for i in range(m):
        x = round(loca_list[i][0]*100-10)
        y = round(loca_list[i][1]*25 -10)
        if x<0:
            x = 0
        elif x>80:
            x=80
        if y<0:
            y=0
        elif y>9:
            y=9
        temp = data[y:y+20,x:x+20]
        croped_img_list.append(temp.tolist())
        if imgshow:
            img=cv2.rectangle(img*255,(x,y),(x+20,y+20),(255,0,0),1)
    if imgshow:
        img = Image.fromarray(img)
        img.show()
    #返回的是0~1的图片,类型List
    return croped_img_list

#预测单张验证码
def predict(self):
    while True:
        try:
            filename = input('input image filename:')
            if filename == 'qq':
                break
            data = self.readImage(filename)
        except Exception as e:
            print('please check filename')
            continue

        in_image=data.reshape(1,30,100)
        loca_np=self.sess_1.run(self.y_loca,feed_dict={self.x_1:in_image,self.if_is_training_1:False})
        loca_np=loca_np.reshape(4,2)
        imgCutList=self.crop_image(data,loca_np,True)
        chineseCode=""
        for imList in imgCutList:
            data_2=np.array(imList).reshape(1,400)

            # data=tf.reshape(data, shape=[1,400])
            rel=self.sess_2.run(self.y_class,feed_dict={self.x_2:data_2,self.if_is_training_2:False})
            num=np.argmax(rel)
            chineseCode+=self.w3500[num]
        print(chineseCode)
#测试准确率,测试times张验证码
def test(self,times):
    erro=0
    loss=0
    for i in range(times):
        i_chr=born_data.ImageChar()
        img_PIL,words=i_chr.rand_img_test()

        in_img = np.asarray(img_PIL)
        in_img = cv2.cvtColor(in_img,cv2.COLOR_BGR2GRAY)
        ret,in_img = cv2.threshold(in_img,127,255,cv2.THRESH_BINARY_INV)
        data=in_img/255
        in_image = data.reshape(1,30,100)

        loca_np=self.sess_1.run(self.y_loca,feed_dict={self.x_1:in_image,self.if_is_training_1:False})
        loca_np=loca_np.reshape(4,2)
        imgCutList=self.crop_image(data,loca_np)
        chineseCode=""
        for imList in imgCutList:
            try:
                data_2=np.array(imList).reshape(1,400)
            except Exception as e:
                loss+=1
                continue
            rel=self.sess_2.run(self.y_class,feed_dict={self.x_2:data_2,self.if_is_training_2:False})
            num=np.argmax(rel)
            chineseCode+=self.w3500[num]
        if len(chineseCode)==4:
            if not chineseCode == words:
                erro+=1
        print('\r',i,end='\r')

    print('erro: ',erro/times*100,'%','\tloss: ',loss)

if name=='main':
ccr=ChineseCodeRecognition()
ccr.test(10000)
# ccr.predict()

总结:
最终测试十次一万张验证码,误识别率都在0.333%以下。

0 条回应