Skip to content

Instantly share code, notes, and snippets.

@dmesquita
Created December 8, 2019 18:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dmesquita/63addc86357835b95f98e484c74328d1 to your computer and use it in GitHub Desktop.
Save dmesquita/63addc86357835b95f98e484c74328d1 to your computer and use it in GitHub Desktop.
import pandas as pd
import modin.pandas as pd_modin
import cudf as pd_cudf
results_groupby = []
### Read in the data with Pandas
for run in range(0,30):
df = pd.read_csv("../inep/dados/microdados_educacao_superior_2018//microdados_ed_superior_2018/dados/DM_ALUNO.CSV",
delimiter="|",
encoding="latin-1")
s = time.time()
df = df.groupby("CO_IES").size()
e = time.time()
results_groupby.append({"lib":"Pandas","time":float("{}".format(e-s))})
print("Pandas Groupby Time = {}".format(e-s))
### Read in the data with Modin
for run in range(0,30):
df = pd_modin.read_csv("../inep/dados/microdados_educacao_superior_2018//microdados_ed_superior_2018/dados/DM_ALUNO.CSV",
delimiter="|",
encoding="latin-1")
s = time.time()
df = df.groupby("CO_IES").size()
e = time.time()
results_groupby.append({"lib":"Modin","time":float("{}".format(e-s))})
print("Modin Groupby Time = {}".format(e-s))
### Read in the data with cudf
for run in range(0,30):
df = pd_cudf.read_csv("../inep/dados/microdados_educacao_superior_2018//microdados_ed_superior_2018/dados/DM_ALUNO.CSV",
delimiter="|",
encoding="latin-1")
s = time.time()
df = df.groupby("CO_IES").size()
e = time.time()
results_groupby.append({"lib":"Cudf","time":float("{}".format(e-s))})
print("Cudf Groupby Time = {}".format(e-s))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment