Last active
February 9, 2024 09:55
-
-
Save jonathanbp/e95549b64de3b55b8f1358cbe9e23d0a to your computer and use it in GitHub Desktop.
Bash script to try and figure out relations between collections in a mongodb dump.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# This script will try to detect relations between collections in a mongodb database. | |
function spinner() { | |
local info="$1" | |
local pid=$! | |
local delay=0.75 | |
local spinstr='|/-\' | |
while kill -0 $pid 2>/dev/null; do | |
local temp=${spinstr#?} | |
printf "[%c] $info" "$spinstr" | |
local spinstr=$temp${spinstr%"$temp"} | |
sleep $delay | |
local reset="\b\b\b\b\b\b" | |
for ((i = 1; i <= $(echo $info | wc -c); i++)); do | |
reset+="\b" | |
done | |
printf $reset | |
done | |
printf " \b\b\b\b" | |
} | |
# Check if the required commands are installed - the list of commands: | |
commands=("bsondump" "dot" "fastgron" "rg" "jq" "fd" "awk" "sort" "uniq") | |
for cmd in "${commands[@]}"; do | |
command -v "$cmd" >/dev/null 2>&1 || { | |
echo >&2 "I require $cmd but it's not installed. Aborting." | |
exit 1 | |
} | |
done | |
cd "$1" | |
# If relations directory does not exist create it | |
if [ ! -d relations ]; then | |
# Create a directory to store the data | |
mkdir -p relations | |
# Convert all bson files to json | |
fd -e bson -x bsondump --quiet {} --outFile relations/{.}.json & | |
spinner "Converting bson to json ..." | |
# Convert all json files to gron | |
fd -e json "" "relations" | while read jsonFile; do | |
fastgron --stream "$jsonFile" >"$jsonFile.gron" | |
rm "$jsonFile" | |
done & | |
spinner "Converting json to gron ..." | |
fi | |
# From now on we will work in the relations directory | |
cd relations | |
# Convert all json files to gron and extract the object ids | |
fd -e gron | while read gronFile; do | |
# Skip if the ids file is already present | |
if [ -f "$gronFile.ids" ]; then | |
continue | |
fi | |
rg "_id.*\s=\s.{20,10000}" "$gronFile" | cut -f 2 -d "=" | sort | uniq >"$gronFile.ids" | |
done & | |
spinner "Extracting object ids ..." | |
echo "digraph {" >../../relations.dot | |
echo " edge [fontname=\"Iosevka Fixed\"]" >>../../relations.dot | |
echo " node [fontname=\"Iosevka Fixed\" shape=rect]" >>../../relations.dot | |
# For each id file, run through all ids and look for references in other files | |
fd -e ids | while read idFile; do | |
# Extract the id from the filename (remove the .json.gron.ids suffix and replace . with _) | |
target=$(basename "$idFile" .json.gron.ids | sed 's/\./_/g') | |
while read id; do | |
# look for id, exclude $idFile | |
results=$(rg -l "$id" -g "!*.ids" -g "!$target.json.gron" .) | |
# if results is empty, continue | |
if [ -z "$results" ]; then | |
continue | |
fi | |
while read sourceFile; do | |
source=$(basename "$sourceFile" .json.gron | sed 's/\./_/g') | |
echo " $source -> $target" >>../../relations.dot | |
done <<<"$results" & | |
# if results is not empty, break inner while loop | |
if [ -n "$results" ]; then | |
break | |
fi | |
done <<<"$(cat $idFile)" & | |
spinner "Detecting relations to $target ..." | |
done | |
echo "}" >>../../relations.dot | |
cd ../.. | |
dot -Tpng -Goverlap=prism -Gsplines=true relations.dot -o relations.png & | |
spinner "Creating graph ..." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment