Skip to content

Instantly share code, notes, and snippets.

@psychemedia
Last active June 22, 2022 08:48
Show Gist options
  • Save psychemedia/6fd3655782daf031299cfec03043d6bc to your computer and use it in GitHub Desktop.
Save psychemedia/6fd3655782daf031299cfec03043d6bc to your computer and use it in GitHub Desktop.
Test read/write for pandas dataframes
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "python",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8"
},
"kernelspec": {
"name": "python",
"display_name": "Pyolite",
"language": "python"
}
},
"nbformat_minor": 4,
"nbformat": 4,
"cells": [
{
"cell_type": "code",
"source": "import micropip",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "import pandas as pd\n\nimport numpy as np\n\nimport os\nsz = 100\nnp.random.seed(42)\ndf = pd.DataFrame({\"A\": np.random.randn(sz), \"B\": [1] * sz})\n\ndf",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "def test_csv_write(df):\n df.to_csv(\"data/test.csv\")\n\ndef test_csv_read():\n return pd.read_csv(\"data/test.csv\")\n \ntest_csv_write(df)\ntest_csv_read()",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "def test_json_write(df):\n df.to_json(\"test_json.json\")\n\ndef test_json_read():\n return pd.read_json(\"test_json.json\")\n \ntest_json_write(df)\ntest_json_read()",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "await micropip.install(\"html5lib\")",
"metadata": {},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "await micropip.install(\"lxml\")",
"metadata": {},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "def test_html_write(df):\n df.to_html(\"./test_html.html\")\n\ndef test_html_read():\n return pd.read_html(\"./test_html.html\")[0]\n \ntest_html_write(df)\ntest_html_read()",
"metadata": {},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "def test_xml_write(df):\n df.to_xml(\"test_xml.xml\")\n\ndef test_xml_read():\n return pd.read_xml(\"test_xml.xml\")\n \ntest_xml_write(df)\ntest_xml_read()",
"metadata": {},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "await micropip.install(\"openpyxl\")",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "def test_xlsx_write(df):\n df.to_excel(\"test_xlsx.xlsx\")\n \ndef test_xlsx_read():\n return pd.read_excel(\"test_xlsx.xlsx\")\n \ntest_xlsx_write(df)\ntest_xlsx_read()",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "import sqlite3\nimport os\n\ndef test_sql_write(df):\n if os.path.exists(\"test.sql\"):\n os.remove(\"test.sql\")\n sql_db = sqlite3.connect(\"test.sql\")\n df.to_sql(name=\"test_table\", con=sql_db)\n sql_db.close()\n\n\ndef test_sql_read():\n sql_db = sqlite3.connect(\"test.sql\")\n df = pd.read_sql_query(\"select * from test_table\", sql_db)\n sql_db.close()\n return df\n \ntest_sql_write(df)\ntest_sql_read()",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "await micropip.install(\"tables\")",
"metadata": {},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "def test_hdf_fixed_write(df):\n df.to_hdf(\"test_fixed.hdf\", \"test\", mode=\"w\")\n\ndef test_hdf_fixed_read():\n return pd.read_hdf(\"test_fixed.hdf\", \"test\")\n \ntest_hdf_fixed_write(df)\ntest_hdf_fixed_read()",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "def test_hdf_fixed_write_compress(df):\n df.to_hdf(\"test_fixed_compress.hdf\", \"test\", mode=\"w\", complib=\"blosc\")\n\n\ndef test_hdf_fixed_read_compress():\n return pd.read_hdf(\"test_fixed_compress.hdf\", \"test\")\n\ntest_hdf_fixed_write_compress(df)\ntest_hdf_fixed_read_compress()",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "def test_hdf_table_write(df):\n df.to_hdf(\"test_table.hdf\", \"test\", mode=\"w\", format=\"table\")\n\n\ndef test_hdf_table_read():\n return pd.read_hdf(\"test_table.hdf\", \"test\")\n \ntest_hdf_table_write(df)\ntest_hdf_table_read()",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "def test_hdf_table_write_compress(df):\n df.to_hdf(\n \"test_table_compress.hdf\", \"test\", mode=\"w\", complib=\"blosc\", format=\"table\"\n )\n\ndef test_hdf_table_read_compress():\n return pd.read_hdf(\"test_table_compress.hdf\", \"test\")\n \ntest_hdf_table_write_compress(df)\ntest_hdf_table_read_compress()",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "await micropip.install(\"pyarrow\")",
"metadata": {},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "def test_feather_write(df):\n df.to_feather(\"test.feather\")\n\n\ndef test_feather_read():\n return pd.read_feather(\"test.feather\")\n \ntest_feather_write(df)\ntest_feather_read()",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "def test_pickle_write(df):\n df.to_pickle(\"test.pkl\")\n\n\ndef test_pickle_read():\n return pd.read_pickle(\"test.pkl\")\n \ntest_pickle_write(df)\ntest_pickle_read()",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "def test_pickle_write_compress(df):\n df.to_pickle(\"test.pkl.compress\", compression=\"xz\")\n\n\ndef test_pickle_read_compress():\n return pd.read_pickle(\"test.pkl.compress\", compression=\"xz\")\n \ntest_pickle_write_compress(df)\ntest_pickle_read_compress()",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "def test_parquet_write(df):\n df.to_parquet(\"test.parquet\")\n\n\ndef test_parquet_read():\n return pd.read_parquet(\"test.parquet\")\n\ntest_parquet_write(df)\ntest_parquet_read()",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment