Created
October 15, 2020 18:39
-
-
Save kleysonr/4cefe8b1193d3131c7937aecc196143c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import os | |
from shutil import copyfile | |
RATIO_TEST = 0.25 | |
RATIO_VAL = 0.10 | |
lista = { | |
'frente': [], | |
'recibo': [] | |
} | |
def split(dataset): | |
nitens = len(dataset) | |
ntrain_ = round(nitens * (1-RATIO_TEST)) | |
ntest = round(nitens * RATIO_TEST) | |
nval = round(ntrain_ * RATIO_VAL) | |
ntrain = ntrain_ - nval | |
train = dataset[0:ntrain] | |
val = dataset[ntrain:ntrain+nval] | |
test = dataset[ntrain+nval:ntrain+nval+ntest+1] | |
print('----------------------------') | |
print('Total size: {}'.format(nitens)) | |
print('Train: {}'.format(len(train))) | |
print('Test: {}'.format(len(test))) | |
print('Val: {}'.format(len(val))) | |
return (train, test, val) | |
def move_files(train, test, val): | |
for f in train: | |
src = os.path.join('images', '{}.png'.format(f)) | |
dst = os.path.join('images/train', '{}.png'.format(f)) | |
copyfile(src, dst) | |
os.remove(src) | |
src = os.path.join('images', '{}.txt'.format(f)) | |
dst = os.path.join('images/train', '{}.txt'.format(f)) | |
copyfile(src, dst) | |
os.remove(src) | |
for f in test: | |
src = os.path.join('images', '{}.png'.format(f)) | |
dst = os.path.join('images/test', '{}.png'.format(f)) | |
copyfile(src, dst) | |
os.remove(src) | |
src = os.path.join('images', '{}.txt'.format(f)) | |
dst = os.path.join('images/test', '{}.txt'.format(f)) | |
copyfile(src, dst) | |
os.remove(src) | |
for f in val: | |
src = os.path.join('images', '{}.png'.format(f)) | |
dst = os.path.join('images/val', '{}.png'.format(f)) | |
copyfile(src, dst) | |
os.remove(src) | |
src = os.path.join('images', '{}.txt'.format(f)) | |
dst = os.path.join('images/val', '{}.txt'.format(f)) | |
copyfile(src, dst) | |
os.remove(src) | |
def main(): | |
files = glob.glob('images/*.txt', recursive=False) | |
# Gera lista de arquivos para cada classe | |
for f in files: | |
path = os.path.dirname(f) | |
filename = os.path.basename(f) | |
(filename, ext) = os.path.splitext(filename) | |
if filename == 'classes': | |
continue | |
# Ler a primeira linha do arquivo | |
fline=open(f).readline().strip() | |
if fline[0] == '0': | |
lista['frente'].append(filename) | |
elif fline[0] == '1': | |
lista['recibo'].append(filename) | |
# Split dataset | |
(train, test, val) = split(lista['frente']) | |
move_files(train, test, val) | |
(train, test, val) = split(lista['recibo']) | |
move_files(train, test, val) | |
if __name__ == "__main__": | |
os.makedirs('images/train', exist_ok=True) | |
os.makedirs('images/test', exist_ok=True) | |
os.makedirs('images/val', exist_ok=True) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment