Skip to content

Instantly share code, notes, and snippets.

@jonathanbp
Last active February 9, 2024 09:55
Show Gist options
  • Save jonathanbp/e95549b64de3b55b8f1358cbe9e23d0a to your computer and use it in GitHub Desktop.
Save jonathanbp/e95549b64de3b55b8f1358cbe9e23d0a to your computer and use it in GitHub Desktop.
Bash script to try and figure out relations between collections in a mongodb dump.
#!/usr/bin/env bash
# This script will try to detect relations between collections in a mongodb database.
function spinner() {
local info="$1"
local pid=$!
local delay=0.75
local spinstr='|/-\'
while kill -0 $pid 2>/dev/null; do
local temp=${spinstr#?}
printf "[%c] $info" "$spinstr"
local spinstr=$temp${spinstr%"$temp"}
sleep $delay
local reset="\b\b\b\b\b\b"
for ((i = 1; i <= $(echo $info | wc -c); i++)); do
reset+="\b"
done
printf $reset
done
printf " \b\b\b\b"
}
# Check if the required commands are installed - the list of commands:
commands=("bsondump" "dot" "fastgron" "rg" "jq" "fd" "awk" "sort" "uniq")
for cmd in "${commands[@]}"; do
command -v "$cmd" >/dev/null 2>&1 || {
echo >&2 "I require $cmd but it's not installed. Aborting."
exit 1
}
done
cd "$1"
# If relations directory does not exist create it
if [ ! -d relations ]; then
# Create a directory to store the data
mkdir -p relations
# Convert all bson files to json
fd -e bson -x bsondump --quiet {} --outFile relations/{.}.json &
spinner "Converting bson to json ..."
# Convert all json files to gron
fd -e json "" "relations" | while read jsonFile; do
fastgron --stream "$jsonFile" >"$jsonFile.gron"
rm "$jsonFile"
done &
spinner "Converting json to gron ..."
fi
# From now on we will work in the relations directory
cd relations
# Convert all json files to gron and extract the object ids
fd -e gron | while read gronFile; do
# Skip if the ids file is already present
if [ -f "$gronFile.ids" ]; then
continue
fi
rg "_id.*\s=\s.{20,10000}" "$gronFile" | cut -f 2 -d "=" | sort | uniq >"$gronFile.ids"
done &
spinner "Extracting object ids ..."
echo "digraph {" >../../relations.dot
echo " edge [fontname=\"Iosevka Fixed\"]" >>../../relations.dot
echo " node [fontname=\"Iosevka Fixed\" shape=rect]" >>../../relations.dot
# For each id file, run through all ids and look for references in other files
fd -e ids | while read idFile; do
# Extract the id from the filename (remove the .json.gron.ids suffix and replace . with _)
target=$(basename "$idFile" .json.gron.ids | sed 's/\./_/g')
while read id; do
# look for id, exclude $idFile
results=$(rg -l "$id" -g "!*.ids" -g "!$target.json.gron" .)
# if results is empty, continue
if [ -z "$results" ]; then
continue
fi
while read sourceFile; do
source=$(basename "$sourceFile" .json.gron | sed 's/\./_/g')
echo " $source -> $target" >>../../relations.dot
done <<<"$results" &
# if results is not empty, break inner while loop
if [ -n "$results" ]; then
break
fi
done <<<"$(cat $idFile)" &
spinner "Detecting relations to $target ..."
done
echo "}" >>../../relations.dot
cd ../..
dot -Tpng -Goverlap=prism -Gsplines=true relations.dot -o relations.png &
spinner "Creating graph ..."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment