LUA中将未分类数据分为测试集和训练集

require 'torch'
require 'image'
local  setting = {parent_root = '/home/pxu/image'}

function list_children_root(path)
        local i,t,popen = 0,{},io.popen
        for file_name in popen('ls -a ' .. path):lines() do
                i = i + 1
                if i>2 then
                        t[i-2] = file_name
                --if i>0 then
                        --t[i] = file_name
                end
        end
        return t
end

function list_img(path)
        --print(path)
        local i,t,popen = 0,{},io.popen
        for file_name in popen('ls -a ' .. path .. ' |grep jpg'):lines() do
                i = i + 1
                t[i] = file_name
        end
        return t
end
print('obtain children root path ...')
train_paths,train_labels = {},{}
test_paths,test_labels = {}, {}
children_paths = list_children_root(setting.parent_root)
print(children_paths)
num_train,num_test =1,1
print('spit data begin')
for i=1,table.getn(children_paths)  do
 children_root = setting.parent_root ..'/'..children_paths[i]
        print(children_root)
 img_names = list_img(children_root)
ranIdx = torch.randperm(table.getn(img_names))
        for j=1,table.getn(img_names)do
                if j<=math.floor(0.6*table.getn(img_names)) then
                        local idx = ranIdx[{j}]
                        train_paths[num_train] = children_root .. '/'..img_names[idx]
                        train_labels[num_train]=i
                        num_train = num_train+1
                else
                        local idx = ranIdx[{j}]
                        test_paths[num_test]=children_root .. '/' ..img_names[idx]
                        test_labels[num_test]=i
                        num_test = num_test+1
end
end
end
print('begin copy')
local nTrain,nTest = table.getn(train_paths),table.getn(test_paths)
for i=1,nTrain do
        local aimpath = '/home/yqcui/image/train/'..train_labels[i]..'/'..i..'.jpg'
        local todo='cp '..train_paths[i]..' ' ..aimpath
        print(todo)
        os.execute(todo)
end
for i=1,nTest do
        local aimpath = '/home/yqcui/image/train/'..test_labels[i]..'/'..i..'.jpg'
        local todo='cp '..test_paths[i]..' ' .. aimpath
        print(todo)
        os.execute(todo)
end

将数据分为数据集和训练集,比例为6:4

原文地址:https://www.cnblogs.com/cyq041804/p/5737374.html