Skip to content

Instantly share code, notes, and snippets.

View koaning's full-sized avatar

vincent d warmerdam koaning

View GitHub Profile
@koaning
koaning / recipe.py
Created October 5, 2022 14:18
Custom audio recipe for Prodigy
import prodigy
from typing import List, Optional, Union, Iterable
from prodigy.components.loaders import get_stream
from prodigy.components.preprocess import fetch_media as fetch_media_preprocessor
from prodigy.util import log, msg, get_labels, split_string
from prodigy.types import TaskType, RecipeSettingsType
def remove_base64(examples: List[TaskType]) -> List[TaskType]:
@koaning
koaning / benchmark.py
Last active October 1, 2022 15:08
HTML parsing benchmark
import timeit
import requests
import html_text
import justext
from selectolax.parser import HTMLParser
# There's a difference between these two
html_long = requests.get("http://planet.python.org/").content.decode("utf-8")
html_short = "<p><b>This</b> is just a small example.</p>"
@koaning
koaning / demo.py
Created July 14, 2022 11:45
Demonstration of Operators and Quantifiers from spaCy.
import spacy
from spacy import displacy
def show_results(text, patterns):
nlp = spacy.blank("en")
ruler = nlp.add_pipe("entity_ruler")
ruler.add_patterns(patterns)
doc = nlp(text)
@koaning
koaning / make_patterns.py
Created July 8, 2022 10:02
Scripts that were used in the "Video Games with Sense2Vec" tutorial found here: https://youtu.be/chLZ6g4t3VA.
"""
This script combines two datasets to generate a file with all found patterns.
"""
import srsly
from prodigy.components.db import connect
import spacy
nlp = spacy.blank("en")
@koaning
koaning / recipe.py
Created June 22, 2022 08:35
This is the recipe that belongs to the Prodigy tutorial found on YouTube. https://youtu.be/dXVRonRdg7g
import time
from typing import List
from rich import box
from rich.table import Table
from rich.console import Console
import prodigy
from prodigy.components.loaders import CSV
@koaning
koaning / build_instructions.py
Created June 13, 2022 16:57
Prodigy Instructions
import base64
import pathlib
from jinja2 import Environment, FileSystemLoader, select_autoescape
env = Environment(
loader=FileSystemLoader("images"),
autoescape=select_autoescape()
)
template = env.get_template("instructions.template")
@koaning
koaning / bionic.py
Last active August 11, 2022 14:05
A custom recipe for Prodigy that mimics Bionic Reading.
import pyphen
import prodigy
from prodigy.components.loaders import JSONL
from prodigy.components.db import connect
hyphenator = pyphen.Pyphen(lang="en_US")
def construct_html(text):
hyphend = hyphenator.inserted(text)
@koaning
koaning / polars.ipynb
Created November 1, 2021 20:54
A benchmark with Polars.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@koaning
koaning / before.ipynb
Created October 30, 2021 13:26
Before nbqa
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@koaning
koaning / resume.json
Last active July 28, 2021 18:58
resume
{
"$schema": "https://raw.githubusercontent.com/jsonresume/resume-schema/v1.0.0/schema.json",
"basics": {
"name": "Vincent D. Warmerdam",
"label": "Senior Data Professional & Research Advocate",
"image": "",
"email": "vincentwarmerdam@gmail.com",
"url": "https://koaning.io",
"summary": "",
"location": {