Skip to content

Instantly share code, notes, and snippets.

@Koushikphy
Created November 8, 2023 09:34
Show Gist options
  • Save Koushikphy/970d60d46ca051f1c297ef37209000bd to your computer and use it in GitHub Desktop.
Save Koushikphy/970d60d46ca051f1c297ef37209000bd to your computer and use it in GitHub Desktop.
List all acronyms and/or keywords in your documents to avoid duplicate acronym definitions
import re,sys,subprocess
from colorama import init as colorama_init, Fore, Style
colorama_init()
keywords = [
# provide keywords
]
with open(sys.argv[1]) as f: # provide with command line
txt = f.read()
# list all acronyms
allWords = list(dict.fromkeys(re.findall('\\b[A-Z](?:[A-Z])+s?\\b', txt)))
txt = txt.splitlines()
# specifically made for latex files
for word in allWords:
res = []
for n,line in enumerate(txt,start=1):
if line.startswith('%'): # comment in latex
continue
if re.search(rf'\b{word}\b',line) and not re.search(rf'(?<={{).*({word}).*(?=}})',line):
#^ if match but not inside a `{}`, these are usually commands
#^ second one may not work properly sometimes
res.append(f"{n}: {line}")
if res: # print if found
print(f"{word} {'='*50}")
for i in res:
print(re.sub(word,f'{Fore.RED}{word}{Style.RESET_ALL}',i))
print('\n\n')
res = []
for word in keywords: # search the provided keywords
res = []
for n,line in enumerate(txt,start=1):
if line.startswith('%'): # comment in latex
continue
if re.search(rf'\b{word}\b',line,re.IGNORECASE) :
res.append(f"{n}: {line}")
if res:
print(f"{word} {'='*50}")
for i in res:
print(re.sub(word,f'{Fore.RED}{word}{Style.RESET_ALL}',i))
print('\n\n')
res = []
# a bash way
# cat file | grep -wo "[A-Z]\+\{2,10\}" | sort | uniq
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment