Skip to content

Instantly share code, notes, and snippets.

@cpdean
Last active January 2, 2018 16:34
Show Gist options
  • Save cpdean/9936bc34ebe3359e19f1d599ad68e73c to your computer and use it in GitHub Desktop.
Save cpdean/9936bc34ebe3359e19f1d599ad68e73c to your computer and use it in GitHub Desktop.
crawl a directory of nodejs projects
#!/bin/bash
REPOS=$(find . -type d -depth 1)
for i in $REPOS; do
echo building $i
cd $i
git pull
rm -rf node_modules
yarn install
cd -
done
import json
import shlex
import subprocess
import urllib.parse
def run_process(s):
return subprocess.Popen(
shlex.split(s),
stdout=subprocess.PIPE
)
def read_results(process_ran):
return process_ran.stdout.read()
def gather_deps(package_to_search=None):
if package_to_search is None:
# the empty string will case `fd` to return all package.jsons
package_to_search = ''
out = []
search_for_package_json = 'fd package.json -d 2 {}'.format(
package_to_search
)
for i in read_results(run_process(search_for_package_json)).split():
with open(i.strip()) as f:
package = json.loads(f.read())
for dependency in package.get('dependencies', []):
out.append(dependency)
return out
def current_repos():
find_dirs = 'find . -type d -depth 1'
return [i.split(b'./')[-1].decode('utf')
for i in read_results(run_process(find_dirs)).split()]
def homepage(package_name):
return read_results(run_process(
f'npm info {package_name} homepage'
)).strip()
def get_repo_url(package_name):
return read_results(run_process(
f'npm info {package_name} repository.url'
)).strip()
class NotGithub(Exception):
pass
def repo_remote(homepage_url):
""" converts the typical npm package homepage to
a git url for cloning
so:
https://github.com/Happy0/ssb-chess =>
git@github.com:Happy0/ssb-chess.git
"""
if b'github.com' not in homepage_url:
raise NotGithub(homepage_url)
try:
user, repo = [i.decode('utf') for i in urllib.parse.urlparse(
homepage_url
).path.strip(b'/').split(b'/')]
except ValueError:
print(f"problem with: {homepage_url}")
raise
return f'git@github.com:{user}/{repo}.git'
def clone(repo_url):
return read_results(run_process(f'git clone {repo_url}'))
def download_new_repos(package_to_search=None):
packages_to_download = list(
set(gather_deps(package_to_search)) - set(current_repos())
)
for p in packages_to_download:
try:
repo_url = repo_remote(homepage(p))
except NotGithub:
pass
except ValueError:
# if they have a weird github, maybe they have filled out the
# repository metadata
repo_url = get_repo_url(p)
clone(repo_url)
if __name__ == '__main__':
import sys
if len(sys.argv) > 1:
download_new_repos(sys.argv[-1])
else:
download_new_repos()
import json
import sys
if __name__ == '__main__':
for i in sys.stdin.readlines():
with open(i.strip()) as f:
package = json.loads(f.read())
for dependency in package.get('dependencies', []):
print dependency
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment