asifr’s gists

asifr / obsidian-web-clipper.js

Last active February 6, 2022 14:07

	javascript: Promise.all([import('https://unpkg.com/turndown@6.0.0?module'), import('https://unpkg.com/@tehshrike/readability@0.2.0'),]).then(async ([{
	default: Turndown
	}, {
	default: Readability
	}]) => {

	/* Optional vault name */
	const vault = "";

	/* Optional folder name such as "Clippings/" */

asifr / consecutive_group_ids.py

Created December 6, 2020 17:00

Create unique IDs from a run of 0s and 1s

	def consecutive_group_ids(df, id_col: str, time_col: str, value_col: str, event_id_col: str="EventID"):
	w1 = Window.partitionBy(id_col).orderBy(time_col)
	wcumsum = (
	Window
	.partitionBy(id_col)
	.orderBy(time_col)
	.rangeBetween(Window.unboundedPreceding, 0)
	)

	res = (

asifr / generate_ids.py

Created December 6, 2020 06:18

Group consecutive values and return unique ids

	import numpy as np

	def generate_ids(x):
	partitions = lambda x: np.where(x[1:] != x[:-1])[0] + 1
	inds = np.split(np.arange(len(x)), partitions(x))
	ids = np.zeros(len(x))
	for k, p in enumerate(inds):
	ids[p] = k
	return ids

asifr / resample.py

Last active December 6, 2020 06:19

Resample a numpy array

	import numpy as np

	def resample(x, t, start, end, step):
	bins = np.arange(start, end+step, step)
	inds = np.digitize(t,bins)
	n = x.shape[0]
	y = np.empty((n, len(bins))) * np.nan
	for i in range(n):
	y[i,inds[i,:]] = x[i,:]
	return y, bins

asifr / pad.py

Created December 3, 2020 16:49

	import numpy as np

	def pad_sequences(
	sequences, maxlen=None, dtype="int32", padding="pre", truncating="pre", value=0.0
	):
	if not hasattr(sequences, "__len__"):
	raise ValueError("`sequences` must be iterable.")
	lengths = []
	for x in sequences:
	if not hasattr(x, "__len__"):

asifr / outlier_detect.py

Created December 3, 2020 16:46

	import numpy as np

	def outlier_detect(data, threshold=1, method="IQR"):
	assert method in ["IQR", "STD", "MAD"], "Method must be one of IQR\|STD\|MAD"

	if method == "IQR":
	IQR = np.quantile(data, 0.75) - np.quantile(data, 0.25)
	lower = np.quantile(data, 0.25) - (IQR * threshold)
	upper = np.quantile(data, 0.75) + (IQR * threshold)
	if method == "STD":

asifr / dataframe2arrays.py

Last active January 4, 2021 03:47

	from typing import Dict, List, Tuple, Optional
	import numpy as np
	import pandas as pd


	def ffill(arr: np.ndarray):
	arr = arr.T
	mask = np.isnan(arr)
	idx = np.where(~mask, np.arange(mask.shape[1]), 0)
	np.maximum.accumulate(idx, axis=1, out=idx)

asifr / ffill.py

Created December 3, 2020 16:45

	import numpy as np

	def ffill(arr: np.ndarray):
	arr = arr.T
	mask = np.isnan(arr)
	idx = np.where(~mask, np.arange(mask.shape[1]), 0)
	np.maximum.accumulate(idx, axis=1, out=idx)
	out = arr[np.arange(idx.shape[0])[:, None], idx].T
	return out

asifr / myspark.py

Created December 2, 2020 18:43

	"""
	Creates a new connection to spark and makes available:
	`spark`, `sq` (`SQLContext`), `F`, and `Window` in the global namespace.
	"""
	from textwrap import dedent
	import findspark
	import os

	def _formulate_pyspark_submit_args(submit_args=None):
	pass

asifr / event_label.py

Last active December 3, 2020 16:48

Label each row of a dataframe with EventXHoursFromNow or EventWithinXHours

	import pyspark.sql.functions as F
	from pyspark.sql import Window
	from pyspark.sql.column import Column


	def overlaps(start_first, end_first, start_second, end_second):
	return (end_first >= start_second) & (end_second >= start_first)


	def eventXHrFromNow(hours: int, time_col: str, start_col: str, end_col: str) -> Column:

Asif Rahman asifr