文件读取:
图片数据:
图片三要素:
张量形状:
图片特征值处理:
案例:狗图片读取:
在图像数字化表示当中,分为黑白和彩色两种。在数字化表示图片的时候,有三个因素。分别是图片的长、图片的宽、图片的颜色通道数。那么黑白图片的颜色通道数为1,它只需要一个数字就可以表示一个像素位;而彩色照片就不一样了,它有三个颜色通道,分别为RGB,通过三个数字表示一个像素位。TensorFlow支持JPG、PNG图像格式,RGB、RGBA颜色空间。图像用与图像尺寸相同(heightwidthchnanel)张量表示。图像所有像素存在磁盘文件,需要被加载到内存。
大尺寸图像输入占用大量系统内存。训练CNN需要大量时间,加载大文件增加更多训练时间,也难存放多数系统GPU显存。大尺寸图像大量无关本征属性信息,影响模型泛化能力。最好在预处理阶段完成图像操作,缩小、裁剪、缩放、灰度调整等。图像加载后,翻转、扭曲,使输入网络训练信息多样化,缓解过拟合。Python图像处理框架PIL、OpenCV。TensorFlow提供部分图像处理方法。
tf.image.resize_images 压缩图片到指定大小
import tensorflow as tf import os def read_picture(): tf.compat.v1.disable_eager_execution() """ 读取狗图片案例 :return: """ # 1、构造文件名队列 # 构造文件名列表 filename_list = os.listdir("./dog") # 给文件名加上路径 file_list = [os.path.join("./dog/", i) for i in filename_list] print("file_list: ", file_list) print("filename_list: ", filename_list) file_queue = tf.compat.v1.train.string_input_producer(file_list) # 2、读取与解码 # 读取 reader = tf.compat.v1.WholeFileReader() key, value = reader.read(file_queue) print("key: ", key) print("value: ", value) # 解码 image_decoded = tf.image.decode_jpeg(value) print("image_decoded: ", image_decoded) # 将图片缩放到同一个大小 image_resized = tf.compat.v1.image.resize_images(image_decoded, [200, 200]) print("image_resized_before: ", image_resized) # 更新静态形状 image_resized.set_shape([200, 200, 3]) print("image_resized_after: ", image_resized) # 3、批处理队列 image_batch = tf.compat.v1.train.batch([image_resized], batch_size=100, num_threads=2, capacity=100) print("image_batch: ", image_batch) # 开启会话 with tf.compat.v1.Session() as sess: # 开启线程 # 构造线程协调器 coord = tf.train.Coordinator() threads = tf.compat.v1.train.start_queue_runners(sess=sess, coord=coord) # 运行 filename, sample, image, n_image = sess.run([key, value, image_resized, image_batch]) print("filename: ", filename) print("sample: ", sample) print("image: ", image) print("n_image: ", n_image) coord.request_stop() coord.join(threads) return None if __name__ == "__main__": # 代码1:读取狗图片案例 read_picture()
二进制文件读取:
import tensorflow as tf import os class Cifar(): def __init__(self): # 设置图像大小 self.height = 32 self.width = 32 self.channel = 3 # 设置图像字节数 self.image = self.height * self.width * self.channel self.label = 1 self.sample = self.image + self.label def read_binary(self): """ 读取二进制文件 :return: """ # 1、构造文件名队列 filename_list = os.listdir("./cifar-10-batches-bin") # print("filename_list: ", filename_list) file_list = [os.path.join("./cifar-10-batches-bin/", i) for i in filename_list if i[-3:]=="bin"] # print("file_list: ", file_list) file_queue = tf.compat.v1.train.string_input_producer(file_list) # 2、读取与解码 # 读取 reader = tf.compat.v1.FixedLengthRecordReader(self.sample) # key文件名 value样本 key, value = reader.read(file_queue) # 解码 image_decoded = tf.compat.v1.decode_raw(value, tf.uint8) print("image_decoded: ", image_decoded) # 切片操作 label = tf.slice(image_decoded, [0], [self.label]) image = tf.slice(image_decoded, [self.label], [self.image]) print("label: ", label) print("image: ", image) # 调整图像的形状 image_reshaped = tf.reshape(image, [self.channel, self.height, self.width]) print("image_reshaped: ", image_reshaped) # 三维数组的转置 image_transposed = tf.transpose(image_reshaped, [1, 2, 0]) print("image_transposed: ", image_transposed) # 3、构造批处理队列 image_batch, label_batch = tf.compat.v1.train.batch([image_transposed, label], batch_size=100, num_threads=2, capacity=100) # 开启会话 with tf.compat.v1.Session() as sess: # 开启线程 coord = tf.train.Coordinator() threads = tf.compat.v1.train.start_queue_runners(sess=sess, coord=coord) label_value, image_value = sess.run([label_batch, image_batch]) print("label_value: ", label_value) print("image: ", image_value) coord.request_stop() coord.join(threads) return None if __name__ == "__main__": tf.compat.v1.disable_eager_execution() cifar = Cifar() cifar.read_binary()