KMeans的数据压缩

import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.utils import shuffle
import mahotas as mh

original_img = np.array(mh.imread('Penguins.jpg'), dtype=np.float64) / 255


''' >>> original_img array([[[ 0.45490196, 0.68627451, 0.81960784], [ 0.4627451 , 0.68235294, 0.81960784], [ 0.4627451 , 0.68235294, 0.81960784], ..., [ 0.34901961, 0.62352941, 0.81568627], [ 0.35686275, 0.62352941, 0.81568627], [ 0.35686275, 0.62352941, 0.81568627]], [[ 0.45490196, 0.69019608, 0.82352941], [ 0.45490196, 0.68235294, 0.82745098], [ 0.45882353, 0.68627451, 0.83137255], ..., [ 0.34117647, 0.63137255, 0.80784314], [ 0.34117647, 0.63529412, 0.8 ], [ 0.34117647, 0.63529412, 0.8 ]], [[ 0.4627451 , 0.69411765, 0.82745098], [ 0.45882353, 0.68627451, 0.83137255], [ 0.45882353, 0.68627451, 0.83137255], ..., [ 0.33333333, 0.63921569, 0.78823529], [ 0.3372549 , 0.64313725, 0.78431373], [ 0.3372549 , 0.64313725, 0.78431373]], ..., [[ 0.34509804, 0.4745098 , 0.35294118], [ 0.50588235, 0.54901961, 0.5254902 ], [ 0.76078431, 0.79215686, 0.56078431], ..., [ 0.43921569, 0.54117647, 0.5372549 ], [ 0.39607843, 0.49803922, 0.50196078], [ 0.34117647, 0.42352941, 0.42745098]], [[ 0.31764706, 0.44705882, 0.40392157], [ 0.37647059, 0.46666667, 0.49803922], [ 0.30196078, 0.40392157, 0.29019608], ..., [ 0.44313725, 0.52156863, 0.51372549], [ 0.43921569, 0.50980392, 0.51764706], [ 0.36078431, 0.45882353, 0.44705882]], [[ 0.30588235, 0.40784314, 0.37254902], [ 0.31372549, 0.42352941, 0.47058824], [ 0.31372549, 0.39607843, 0.36862745], ..., [ 0.40784314, 0.50588235, 0.48235294], [ 0.41568627, 0.49803922, 0.50196078], [ 0.33333333, 0.40392157, 0.40392157]]]) >>> original_img.shape[0] #宽度 434 >>> original_img.shape[1] #高度 1024 >>> original_img.shape[2] 3 '''
original_dimensions
= tuple(original_img.shape)

''' >>> original_dimensions (434, 1024, 3) '''
width, height, depth
= tuple(original_img.shape) #文档说,是高度,宽度,Must be of shape (h,w,3) #http://mahotas.readthedocs.org/en/latest/api.html image_flattened = np.reshape(original_img, (width * height, depth)) ''' >>> image_flattened.shape (444416, 3) ''' #随机选取1000个颜色点 image_array_sample = shuffle(image_flattened, random_state=0)[:1000] ''' >>> image_array_sample array([[ 0.2745098 , 0.37254902, 0.4 ], [ 0.41568627, 0.6627451 , 0.82352941], [ 0.64705882, 0.75686275, 0.94117647], ..., [ 0.11764706, 0.25490196, 0.33333333], [ 0.7372549 , 0.79607843, 0.96470588], [ 0.7254902 , 0.78823529, 0.92156863]]) >>> image_array_sample.shape (1000, 3) ''' #1000个采样点,64个聚簇 estimator = KMeans(n_clusters=64, random_state=0) estimator.fit(image_array_sample) cluster_assignments = estimator.predict(image_flattened) ''' >>> cluster_assignments array([ 6, 6, 6, ..., 14, 14, 14]) >>> cluster_assignments.shape (444416,) >>> 这样就给每一个颜色值分配了一个颜色标签(这样的标签共有64个) ''' compressed_palette = estimator.cluster_centers_ compressed_img = np.zeros((width, height, compressed_palette.shape[1])) label_idx = 0 for i in range(width): for j in range(height): compressed_img[i][j] = compressed_palette[cluster_assignments[label_idx]] #根据标签,获得颜色值 label_idx += 1 plt.subplot(121) #一行两列第一个位置 plt.title('Original Image') plt.imshow(original_img) plt.axis('off') plt.subplot(122) #很神奇的地方,一行两列,第二个位置 plt.title('Compressed Image') plt.imshow(compressed_img) plt.axis('off') plt.show()
原文地址:https://www.cnblogs.com/qqhfeng/p/5277572.html