diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000000..b6e68446328 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Verwendet IntelliSense zum Ermitteln möglicher Attribute. + // Zeigen Sie auf vorhandene Attribute, um die zugehörigen Beschreibungen anzuzeigen. + // Weitere Informationen finden Sie unter https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "pwa-chrome", + "request": "launch", + "name": "Launch Chrome against localhost", + "url": "http://localhost:8080", + "webRoot": "${workspaceFolder}" + } + ] +} \ No newline at end of file diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index ab6161eba56..20f7a5120e6 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -9,6 +9,7 @@ "source": [ "import numpy as np\n", "import pandas as pd\n", + "\n", "pd.set_option('display.max_rows', 100)\n", "pd.set_option('display.max_columns', None)" ] @@ -20,15 +21,15 @@ "metadata": {}, "outputs": [], "source": [ - "def createDataFrame(manufacturer):\n", - " def read_csv(file):\n", - " return pd.read_csv(file, index_col='VAERS_ID', encoding='latin1', low_memory=False)\n", + "def createDataFrame(baseDir, manufacturer):\n", + " def read_csv(file, usecols):\n", + " return pd.read_csv(file, index_col='VAERS_ID', encoding='latin1', low_memory=False, usecols=usecols)\n", " \n", " def createDataFrameForYear(year):\n", - " folder = \"VAERS/\" + year + \"VAERSData/\"\n", + " folder = baseDir + \"/VAERS/\" + year + \"VAERSData/\"\n", " return pd.merge(\n", - " read_csv(folder + year + \"VAERSDATA.csv\"),\n", - " read_csv(folder + year + \"VAERSVAX.csv\"),\n", + " read_csv(folder + year + \"VAERSDATA.csv\", ['VAERS_ID','DIED', 'L_THREAT', 'DISABLE']),\n", + " read_csv(folder + year + \"VAERSVAX.csv\", ['VAERS_ID','VAX_TYPE', 'VAX_MANU', 'VAX_LOT']),\n", " left_index=True,\n", " right_index=True)\n", " \n", @@ -45,16 +46,15 @@ "source": [ "def createPivotTable(df):\n", " def filter(df, col):\n", - " return df[df[col]=='Y'][['VAX_LOT']]\n", + " return df[df[col] == 'Y'][['VAX_LOT']]\n", "\n", - " return pd.concat(\n", - " {\n", - " 'ADRs': df[['VAX_LOT']].value_counts(),\n", - " 'DEATHS': filter(df, 'DIED').value_counts(),\n", - " 'DISABILITIES': filter(df, 'DISABLE').value_counts(),\n", - " 'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()\n", - " },\n", - " axis=1)" + " pivotTableDict = {\n", + " 'ADRs': df[['VAX_LOT']].value_counts(),\n", + " 'DEATHS': filter(df, 'DIED').value_counts(),\n", + " 'DISABILITIES': filter(df, 'DISABLE').value_counts(),\n", + " 'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()\n", + " }\n", + " return pd.concat(pivotTableDict, axis=1).replace(to_replace=np.nan, value=0)\n" ] }, { @@ -64,7 +64,7 @@ "metadata": {}, "outputs": [], "source": [ - "df_moderna = createDataFrame(\"MODERNA\")" + "df_moderna = createDataFrame(\".\", \"MODERNA\")" ] }, { @@ -91,11 +91,65 @@ "cell_type": "code", "execution_count": null, "id": "bb7b2963", - "metadata": {}, + "metadata": { + "scrolled": false + }, "outputs": [], "source": [ "pivotTable" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f506ac8", + "metadata": {}, + "outputs": [], + "source": [ + "import unittest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e14465d7", + "metadata": {}, + "outputs": [], + "source": [ + "from pandas.testing import assert_frame_equal\n", + "\n", + "\n", + "class HowBadIsMyBatchTest(unittest.TestCase):\n", + "\n", + " def test_createPivotTable(self):\n", + " # Given\n", + " pivotTable = createPivotTable(createDataFrame(\"test\", \"MODERNA\"))\n", + "\n", + " # When\n", + " pivotTableExpected = pd.DataFrame(\n", + " {\n", + " 'ADRs': [2, 1],\n", + " 'DEATHS': [0, 1],\n", + " 'DISABILITIES': [2, 0],\n", + " 'LIFE THREATENING ILLNESSES': [0.0, 0.0]\n", + " },\n", + " index=pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names=('VAX_LOT',)))\n", + " display(\"actual:\", pivotTable)\n", + " display(\"expected:\", pivotTableExpected)\n", + "\n", + " # Then\n", + " assert_frame_equal(pivotTable, pivotTableExpected, check_dtype=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef8f99c4", + "metadata": {}, + "outputs": [], + "source": [ + "unittest.main(argv=[''], verbosity=2, exit=False)" + ] } ], "metadata": {