Last active
November 27, 2022 11:07
-
-
Save jskherman/8d9c21dd078ad7b8743e76c86e42d786 to your computer and use it in GitHub Desktop.
Reading a csv file on GitHub, appending a new row to it, and committing it back again to GitHub. Might be useful for forms built with python like Streamlit?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import packages | |
import os | |
import dotenv # For .env files | |
import datetime | |
import pandas as pd | |
# import base64 | |
import requests | |
import io | |
# Do `pip install pygithub` first: https://stackoverflow.com/a/50072113 | |
from github import Github | |
from github import InputGitTreeElement | |
# dotenv.load_dotenv() | |
# Make sure to have an environment variable for GitHub authentication | |
user = os.environ["GITHUB_USERNAME"] | |
access_token = os.environ["GITHUB_TOKEN"] | |
# Specify repo and path to CSV file | |
repo_name = 'your-github-repo-name' | |
path = 'path-to-file/relative-to/the-root-path/of-the-repo' | |
# Make GET request for current CSV file | |
r = requests.get( | |
f'https://api.github.com/repos/{user}/{repo_name}/contents/{path}', | |
headers={ | |
'accept': 'application/vnd.github.v3.raw', | |
'authorization': f'token {access_token}' | |
} | |
) | |
# Convert CSV file to pandas DataFrame | |
string_io_obj = io.StringIO(r.text) | |
df = pd.read_csv(string_io_obj) | |
# Specify contents of the new row in key-value pairs | |
new_content = { | |
"header1": "value1", | |
"header2": "value2", | |
} | |
# Concatenate new row to df | |
new_row = pd.DataFrame(new_content, index=[0]) | |
df = pd.concat([df.loc[:],new_row]).reset_index(drop=True) | |
# Write new version of the CSV file with appended row | |
df.to_csv(path, index=False) | |
# -------------------------------------------------------- | |
# user = os.environ["GITHUB_USERNAME"] | |
# access_token = os.environ["GITHUB_TOKEN"] | |
# password = os.environ["GITHUB_PASSWORD"] | |
# Authenticate with GitHub | |
g = Github(access_token) | |
# Get Repository | |
# repo_name = 'your-github-repo-name' | |
repo = g.get_user().get_repo(repo_name) | |
# Save path to current working directory | |
work_dir = os.getcwd() | |
# Add file_names relative to the root | |
file_names = ["test_data.csv"] | |
file_list = [] | |
# Directory separator | |
if os.name == "nt": # Windows | |
separator = "\\" | |
if os.name == "posix": # Linux/MacOS | |
separator = "/" | |
# Save full-length file paths | |
for i in range(len(file_names)): | |
file_list.append(work_dir + separator + file_names[i]) | |
# Commit Message | |
commit_message = "Add new row on " + datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S") | |
# Get latest commit refs | |
main_ref = repo.get_git_ref('heads/main') # on main branch | |
main_sha = main_ref.object.sha | |
base_tree = repo.get_git_tree(main_sha) | |
# Initialize empty list for git elements | |
element_list = list() | |
# Populate list | |
for i, entry in enumerate(file_list): | |
with open(entry) as input_file: | |
data = input_file.read() | |
element = InputGitTreeElement(file_names[i], '100644', 'blob', data) | |
element_list.append(element) | |
# Commit new files | |
tree = repo.create_git_tree(element_list, base_tree) | |
parent = repo.get_git_commit(main_sha) | |
commit = repo.create_git_commit(commit_message, tree, [parent]) | |
main_ref.edit(commit.sha) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment