Skip to content

Instantly share code, notes, and snippets.

@kleysonr
Created October 15, 2020 18:39
Show Gist options
  • Save kleysonr/4cefe8b1193d3131c7937aecc196143c to your computer and use it in GitHub Desktop.
Save kleysonr/4cefe8b1193d3131c7937aecc196143c to your computer and use it in GitHub Desktop.
import glob
import os
from shutil import copyfile
RATIO_TEST = 0.25
RATIO_VAL = 0.10
lista = {
'frente': [],
'recibo': []
}
def split(dataset):
nitens = len(dataset)
ntrain_ = round(nitens * (1-RATIO_TEST))
ntest = round(nitens * RATIO_TEST)
nval = round(ntrain_ * RATIO_VAL)
ntrain = ntrain_ - nval
train = dataset[0:ntrain]
val = dataset[ntrain:ntrain+nval]
test = dataset[ntrain+nval:ntrain+nval+ntest+1]
print('----------------------------')
print('Total size: {}'.format(nitens))
print('Train: {}'.format(len(train)))
print('Test: {}'.format(len(test)))
print('Val: {}'.format(len(val)))
return (train, test, val)
def move_files(train, test, val):
for f in train:
src = os.path.join('images', '{}.png'.format(f))
dst = os.path.join('images/train', '{}.png'.format(f))
copyfile(src, dst)
os.remove(src)
src = os.path.join('images', '{}.txt'.format(f))
dst = os.path.join('images/train', '{}.txt'.format(f))
copyfile(src, dst)
os.remove(src)
for f in test:
src = os.path.join('images', '{}.png'.format(f))
dst = os.path.join('images/test', '{}.png'.format(f))
copyfile(src, dst)
os.remove(src)
src = os.path.join('images', '{}.txt'.format(f))
dst = os.path.join('images/test', '{}.txt'.format(f))
copyfile(src, dst)
os.remove(src)
for f in val:
src = os.path.join('images', '{}.png'.format(f))
dst = os.path.join('images/val', '{}.png'.format(f))
copyfile(src, dst)
os.remove(src)
src = os.path.join('images', '{}.txt'.format(f))
dst = os.path.join('images/val', '{}.txt'.format(f))
copyfile(src, dst)
os.remove(src)
def main():
files = glob.glob('images/*.txt', recursive=False)
# Gera lista de arquivos para cada classe
for f in files:
path = os.path.dirname(f)
filename = os.path.basename(f)
(filename, ext) = os.path.splitext(filename)
if filename == 'classes':
continue
# Ler a primeira linha do arquivo
fline=open(f).readline().strip()
if fline[0] == '0':
lista['frente'].append(filename)
elif fline[0] == '1':
lista['recibo'].append(filename)
# Split dataset
(train, test, val) = split(lista['frente'])
move_files(train, test, val)
(train, test, val) = split(lista['recibo'])
move_files(train, test, val)
if __name__ == "__main__":
os.makedirs('images/train', exist_ok=True)
os.makedirs('images/test', exist_ok=True)
os.makedirs('images/val', exist_ok=True)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment