Skip to content

Instantly share code, notes, and snippets.

@pkerpedjiev
Last active January 26, 2020 06:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pkerpedjiev/f8ef3a96d70e1cd125a6a4697bde8949 to your computer and use it in GitHub Desktop.
Save pkerpedjiev/f8ef3a96d70e1cd125a6a4697bde8949 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import argparse
import logging
import os
import os.path as op
import resgen as rg
import sys
def main():
parser = argparse.ArgumentParser(
description="""
python sync_folder.py project-name folder [folder2] ...
Sync a directory with a resgen project. Tries to automatically
infer filetypes and datatypes from filenames. Resgen username
and password should be in the RG_USER and RG_PASSWORD environment
variables. Requires `resgen-python>=0.2.3` and `higlass-python>=0.4.0`.
"""
)
parser.add_argument("project_name")
parser.add_argument("target_dirs", nargs="+")
# parser.add_argument('-o', '--options', default='yo',
# help="Some option", type='str')
# parser.add_argument('-u', '--useless', action='store_true',
# help='Another useless option')
args = parser.parse_args()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
username = os.environ["RG_USER"]
password = os.environ["RG_PASSWORD"]
project_name = args.project_name
logger.debug(f"connecting {username}/{password}")
rgc = rg.connect(username, password)
logger.debug(f"creating project {project_name}")
project = rgc.find_or_create_project(project_name)
target_dirs = args.target_dirs
if not args.target_dirs:
target_dirs = ["."]
added = 0
filenames = set()
# add files
for target_dir in target_dirs:
for file in os.listdir(target_dir):
if file in filenames:
logger.error(f"Filename {file} appears twice in the directory tree")
filenames.add(file)
logger.info(f"Syncing {file}")
filepath = op.join(target_dir, file)
logger.debug(f"Syncing: {filepath}")
project.sync_dataset(filepath)
added += 1
deleted = 0
dsets = project.list_datasets()
for dset in dsets:
filename = op.split(dset.datafile)[1]
if filename not in filenames:
logger.info(f"Deleting dataset with filename {filename}")
project.delete_dataset(dset.uuid)
logger.info(f"Finished, synced {added} files, deleted {deleted} files")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment