Skip to content

Instantly share code, notes, and snippets.

View juliensimon's full-sized avatar

Julien Simon juliensimon

View GitHub Profile
LANGUAGE PRETRAINING
python run_clm.py \
--model_name_or_path gpt2 \
--dataset_name wikitext \
--dataset_config_name wikitext-103-raw-v1 \
--num_train_epochs 10 \
--per_device_train_batch_size 8 \
--per_device_eval_batch_size 8 \
--do_train \
from datasets import load_dataset
from sentence_transformers.losses import CosineSimilarityLoss
from setfit import SetFitModel, SetFitTrainer
dataset = load_dataset("yelp_polarity")
print(dataset)
# Select N examples per class (8 in this case)
train_ds = dataset["train"].shuffle(seed=42).select(range(8 * 2))
@juliensimon
juliensimon / benchmark.py
Last active May 5, 2024 23:02
Benchmark script
import time
import numpy as np
import torch
from transformers import pipeline
def benchmark(pipeline, data, iterations=1000):
# Warmup
for i in range(100):
@juliensimon
juliensimon / gist:4eccabf58fa2d97a294d181a525b0127
Created November 1, 2022 01:38
Notebook instance instructions
### CREATE NOTEBOOK INSTANCE
export HOME=/home/ec2-user
# Install and enable Git LFS
curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | sudo bash
sudo yum install git-lfs -y
git lfs install
Here are the setup instructions. Please reply in the thread if you have questions or issues.
Using your own AWS account, log in to the AWS console at https://console.aws.amazon.com/sagemaker.
Select the « Ireland » region in the top-right corner
Go to « Notebook / Notebook instances »
Click on « Create notebook instance »
« Notebook instance name »
Type a name for your instance, e.g « workshop-instance »
« Notebook instance type »
Select « ml.t2.medium ». No need for anything bigger
« IAM role »
@juliensimon
juliensimon / dgl7.py
Last active December 20, 2019 23:49
DGL part 7
last_epoch = all_preds[epochs-1].detach().numpy()
predicted_class = np.argmax(last_epoch, axis=-1)
print(predicted_class)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 1 0 1 1 1 1 1 1 0 1 0 1 1 1]
@juliensimon
juliensimon / dgl6.py
Created December 20, 2019 23:21
DGL part 6
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
all_preds = []
epochs = 50
for epoch in range(epochs):
preds = net(G, inputs)
all_preds.append(preds)
# we only compute loss for labeled nodes
loss = F.cross_entropy(preds[labeled_nodes], labels)
# PyTorch accumulates gradients by default, we need to zero them
@juliensimon
juliensimon / dgl5.py
Last active December 20, 2019 23:17
DGL part 5
inputs = torch.eye(node_count)
labeled_nodes = torch.tensor([0, 33])
labels = torch.tensor([0,1])
@juliensimon
juliensimon / dgl4.py
Last active December 20, 2019 23:04
DGL part 4
import torch.nn as nn
import torch.nn.functional as F
def gcn_message(edge):
# In: a graph edge
# Out: a message containing the features of the source node
return {'msg' : edges.src['h']}
def gcn_reduce(node):
# In: a graph node
@juliensimon
juliensimon / dgl3.py
Last active December 20, 2019 23:37
DGL Part2
class GCN(nn.Module):
def __init__(self, in_feats, hidden_size, num_classes):
super(GCN, self).__init__()
self.gcn1 = GCNLayer(in_feats, hidden_size)
self.gcn2 = GCNLayer(hidden_size, num_classes)
self.softmax = nn.Softmax()
def forward(self, g, inputs):
h = self.gcn1(g, inputs)
h = torch.relu(h)