A working proof of concept that can iterate over each file of each Gist for a given username.
Created
December 17, 2022 15:50
-
-
Save curran/2d8d9da4c59190ebd564da9df8f24e9e to your computer and use it in GitHub Desktop.
Gist Scraping Script Prototype
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://github.com/octokit/octokit.js | |
import { Octokit } from 'octokit'; | |
// import users from 'users-combined.csv'; | |
const users = [{ username: 'curran' }]; | |
//const octokit = new Octokit({ auth: `personal-access-token123` }); | |
const octokit = new Octokit(); | |
// TODO generate actual ID. | |
const generateId = () => | |
(Math.random() + '').replace('.', ''); | |
// TODO generate actual file ID. | |
const generateFileId = generateId; | |
// TODO actually get the right user | |
const getVizHubUserIdFromGitHubUsername = (username) => | |
username + generateId(); | |
// TODO implement this. | |
// Sample input: "2022-06-05T13:33:26Z" | |
// Sample output: ? (VizHub timestamp format, integer of seconds) | |
const gistDateToTimestamp = (gistDate) => gistDate; | |
const migrate = async () => { | |
for (const user of users) { | |
const result = await octokit.request( | |
'GET /users/{username}/gists', | |
user | |
); | |
for (const gist of result.data) { | |
// Example value for gist: | |
// { | |
// "url": "https://api.github.com/gists/a6c261aca1a12452111cb1b797c04d70", | |
// "forks_url": "https://api.github.com/gists/a6c261aca1a12452111cb1b797c04d70/forks", | |
// "commits_url": "https://api.github.com/gists/a6c261aca1a12452111cb1b797c04d70/commits", | |
// "id": "a6c261aca1a12452111cb1b797c04d70", | |
// "node_id": "G_kwDOAAELQNoAIGE2YzI2MWFjYTFhMTI0NTIxMTFjYjFiNzk3YzA0ZDcw", | |
// "git_pull_url": "https://gist.github.com/a6c261aca1a12452111cb1b797c04d70.git", | |
// "git_push_url": "https://gist.github.com/a6c261aca1a12452111cb1b797c04d70.git", | |
// "html_url": "https://gist.github.com/a6c261aca1a12452111cb1b797c04d70", | |
// "files": { | |
// "App.js": { | |
// "filename": "App.js", | |
// "type": "application/javascript", | |
// "language": "JavaScript", | |
// "raw_url": "https://gist.githubusercontent.com/curran/a6c261aca1a12452111cb1b797c04d70/raw/60cfbd370a5c27328388658b0eb3213d29916959/App.js", | |
// "size": 268 | |
// }, | |
// "README.md": { | |
// "filename": "README.md", | |
// "type": "text/markdown", | |
// "language": "Markdown", | |
// "raw_url": "https://gist.githubusercontent.com/curran/a6c261aca1a12452111cb1b797c04d70/raw/4e8976a0ce82ac714e8fb3cd6e62856d4d604a26/README.md", | |
// "size": 480 | |
// }, | |
// "VizWrapper.js": { | |
// "filename": "VizWrapper.js", | |
// "type": "application/javascript", | |
// "language": "JavaScript", | |
// "raw_url": "https://gist.githubusercontent.com/curran/a6c261aca1a12452111cb1b797c04d70/raw/5be24f7c071521d6664cd4f5824ebf33bf8973e3/VizWrapper.js", | |
// "size": 645 | |
// }, | |
// "axes.js": { | |
// "filename": "axes.js", | |
// "type": "application/javascript", | |
// "language": "JavaScript", | |
// "raw_url": "https://gist.githubusercontent.com/curran/a6c261aca1a12452111cb1b797c04d70/raw/d9b568c0d3c689f5636253cb02e1c19cc7cbcec7/axes.js", | |
// "size": 558 | |
// }, | |
// "bundle.js": { | |
// "filename": "bundle.js", | |
// "type": "application/javascript", | |
// "language": "JavaScript", | |
// "raw_url": "https://gist.githubusercontent.com/curran/a6c261aca1a12452111cb1b797c04d70/raw/439597339531b97d0d63b18c6f7be81c77a204b1/bundle.js", | |
// "size": 11839 | |
// }, | |
// "data.csv": { | |
// "filename": "data.csv", | |
// "type": "text/csv", | |
// "language": "CSV", | |
// "raw_url": "https://gist.githubusercontent.com/curran/a6c261aca1a12452111cb1b797c04d70/raw/381891c6c8df91beae9ab4e768dcce842f107520/data.csv", | |
// "size": 3857 | |
// }, | |
// "index.html": { | |
// "filename": "index.html", | |
// "type": "text/html", | |
// "language": "HTML", | |
// "raw_url": "https://gist.githubusercontent.com/curran/a6c261aca1a12452111cb1b797c04d70/raw/23e22fc3d4f471c026b97db75840e3836b93dd65/index.html", | |
// "size": 449 | |
// }, | |
// "index.js": { | |
// "filename": "index.js", | |
// "type": "application/javascript", | |
// "language": "JavaScript", | |
// "raw_url": "https://gist.githubusercontent.com/curran/a6c261aca1a12452111cb1b797c04d70/raw/ebc1e332e2bcff91dde9bf629455dff19715b6c7/index.js", | |
// "size": 143 | |
// }, | |
// "package.json": { | |
// "filename": "package.json", | |
// "type": "application/json", | |
// "language": "JSON", | |
// "raw_url": "https://gist.githubusercontent.com/curran/a6c261aca1a12452111cb1b797c04d70/raw/75ac131fe8ff991dfcecbfdaf0ad67d459e0be2b/package.json", | |
// "size": 136 | |
// }, | |
// "rollup.config.js": { | |
// "filename": "rollup.config.js", | |
// "type": "application/javascript", | |
// "language": "JavaScript", | |
// "raw_url": "https://gist.githubusercontent.com/curran/a6c261aca1a12452111cb1b797c04d70/raw/381d2c316f87c64b693fc54b3815894dd106ab81/rollup.config.js", | |
// "size": 297 | |
// }, | |
// "styles.css": { | |
// "filename": "styles.css", | |
// "type": "text/css", | |
// "language": "CSS", | |
// "raw_url": "https://gist.githubusercontent.com/curran/a6c261aca1a12452111cb1b797c04d70/raw/33f16fa4050efcdc2b87120226573199ba5ab16d/styles.css", | |
// "size": 97 | |
// }, | |
// "useData.js": { | |
// "filename": "useData.js", | |
// "type": "application/javascript", | |
// "language": "JavaScript", | |
// "raw_url": "https://gist.githubusercontent.com/curran/a6c261aca1a12452111cb1b797c04d70/raw/34e195b15138a6610dcf33e5547f0e63e9071ee8/useData.js", | |
// "size": 439 | |
// }, | |
// "viz.js": { | |
// "filename": "viz.js", | |
// "type": "application/javascript", | |
// "language": "JavaScript", | |
// "raw_url": "https://gist.githubusercontent.com/curran/a6c261aca1a12452111cb1b797c04d70/raw/a60db6dc1e02f62e445c107ca295e7623347fd14/viz.js", | |
// "size": 757 | |
// } | |
// }, | |
// "public": true, | |
// "created_at": "2022-06-05T13:33:26Z", | |
// "updated_at": "2022-06-05T13:36:47Z", | |
// "description": " React & D3 Starter", | |
// "comments": 0, | |
// "user": null, | |
// "comments_url": "https://api.github.com/gists/a6c261aca1a12452111cb1b797c04d70/comments", | |
// "owner": { | |
// "login": "curran", | |
// "id": 68416, | |
// "node_id": "MDQ6VXNlcjY4NDE2", | |
// "avatar_url": "https://avatars.githubusercontent.com/u/68416?v=4", | |
// "gravatar_id": "", | |
// "url": "https://api.github.com/users/curran", | |
// "html_url": "https://github.com/curran", | |
// "followers_url": "https://api.github.com/users/curran/followers", | |
// "following_url": "https://api.github.com/users/curran/following{/other_user}", | |
// "gists_url": "https://api.github.com/users/curran/gists{/gist_id}", | |
// "starred_url": "https://api.github.com/users/curran/starred{/owner}{/repo}", | |
// "subscriptions_url": "https://api.github.com/users/curran/subscriptions", | |
// "organizations_url": "https://api.github.com/users/curran/orgs", | |
// "repos_url": "https://api.github.com/users/curran/repos", | |
// "events_url": "https://api.github.com/users/curran/events{/privacy}", | |
// "received_events_url": "https://api.github.com/users/curran/received_events", | |
// "type": "User", | |
// "site_admin": false | |
// }, | |
// "truncated": false | |
//} | |
//console.log(JSON.stringify(gist, null, 2)); | |
// | |
// Fetch the content of each file individually. | |
const files = {}; | |
for (const gistFile of Object.values(gist.files)) { | |
const { filename, raw_url } = gistFile; | |
const response = await fetch(raw_url); | |
const text = await response.text(); | |
files[generateFileId()] = { name: filename, text }; | |
await new Promise((resolve) => | |
setTimeout(resolve, 1000) | |
); | |
} | |
// TODO generate actual ID | |
const id = generateId(); | |
const owner = getVizHubUserIdFromGitHubUsername( | |
gist.owner.login | |
); | |
const viz = { | |
id, | |
vizInfo: { | |
id, | |
owner, | |
authors: [owner], | |
title: gist.description.trim(), | |
// Backfilled separately | |
forkedFrom: undefined, | |
createdTimestamp: gistDateToTimestamp( | |
gist.created_at | |
), | |
lastUpdatedTimestamp: gistDateToTimestamp( | |
gist.updated_at | |
), | |
}, | |
vizContent: { id, files }, | |
}; | |
console.log('Create this viz:'); | |
console.log(JSON.stringify(viz, null, 2)); | |
await new Promise((resolve) => | |
setTimeout(resolve, 1000) | |
); | |
} | |
} | |
}; | |
migrate(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment