refactoring

This commit is contained in:
frankknoll
2022-02-28 14:07:36 +01:00
parent 9281412843
commit 16c154902f
8 changed files with 2076 additions and 2900 deletions

View File

@@ -14,33 +14,6 @@
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eee150b8",
"metadata": {},
"outputs": [],
"source": [
"bundeslandById = {\n",
" '01': 'Schleswig-Holstein',\n",
" '02': 'Freie Hansestadt Hamburg',\n",
" '03': 'Niedersachsen',\n",
" '04': 'Freie Hansestadt Bremen',\n",
" '05': 'Nordrhein-Westfalen',\n",
" '06': 'Hessen',\n",
" '07': 'Rheinland-Pfalz',\n",
" '08': 'Baden-Württemberg',\n",
" '09': 'Freistaat Bayern',\n",
" '10': 'Saarland',\n",
" '11': 'Berlin',\n",
" '12': 'Brandenburg',\n",
" '13': 'Mecklenburg-Vorpommern',\n",
" '14': 'Freistaat Sachsen',\n",
" '15': 'Sachsen-Anhalt',\n",
" '16': 'Freistaat Thüringen'}\n",
"bundeslandById"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -79,7 +52,7 @@
"class TimeseriesReader:\n",
" \n",
" def readTimeseries(self):\n",
" return pd.read_csv(\n",
" timeseries = pd.read_csv(\n",
" 'zeitreihe-tagesdaten.csv',\n",
" low_memory = False,\n",
" usecols = ['date', 'bundesland', 'gemeindeschluessel', 'betten_belegt', 'betten_frei'],\n",
@@ -88,7 +61,8 @@
" dtype = {\n",
" 'gemeindeschluessel': 'string',\n",
" 'bundesland': 'string'\n",
" })\n"
" })\n",
" return timeseries.sort_values(by = 'date', ascending = True)\n"
]
},
{
@@ -110,15 +84,31 @@
"outputs": [],
"source": [
"class KreisAndBundeslandColumnAdder:\n",
" \n",
" def __init__(self, kreisByKreisschluessel, bundeslandById):\n",
"\n",
" def __init__(self, kreisByKreisschluessel):\n",
" self.kreisByKreisschluessel = kreisByKreisschluessel\n",
" self.bundeslandById = bundeslandById\n",
"\n",
" def addKreisAndBundeslandColumn(self, dataFrame):\n",
" dataFrame['Kreis'] = dataFrame['gemeindeschluessel'].map(lambda gemeindeschluessel: self.kreisByKreisschluessel.loc[gemeindeschluessel, 'Kreis'])\n",
" dataFrame['Bundesland'] = dataFrame['bundesland'].map(lambda bundesland: self.bundeslandById[bundesland])\n",
" return dataFrame"
" dataFrame['Kreis'] = dataFrame['gemeindeschluessel'].map(\n",
" lambda gemeindeschluessel: self.kreisByKreisschluessel.loc[gemeindeschluessel, 'Kreis'])\n",
" dataFrame['Bundesland'] = dataFrame['bundesland'].map({\n",
" '01': 'Schleswig-Holstein',\n",
" '02': 'Freie Hansestadt Hamburg',\n",
" '03': 'Niedersachsen',\n",
" '04': 'Freie Hansestadt Bremen',\n",
" '05': 'Nordrhein-Westfalen',\n",
" '06': 'Hessen',\n",
" '07': 'Rheinland-Pfalz',\n",
" '08': 'Baden-Württemberg',\n",
" '09': 'Freistaat Bayern',\n",
" '10': 'Saarland',\n",
" '11': 'Berlin',\n",
" '12': 'Brandenburg',\n",
" '13': 'Mecklenburg-Vorpommern',\n",
" '14': 'Freistaat Sachsen',\n",
" '15': 'Sachsen-Anhalt',\n",
" '16': 'Freistaat Thüringen'})\n",
" return dataFrame\n"
]
},
{
@@ -128,7 +118,7 @@
"metadata": {},
"outputs": [],
"source": [
"timeSeries = KreisAndBundeslandColumnAdder(kreisByKreisschluessel, bundeslandById).addKreisAndBundeslandColumn(timeSeries)\n",
"timeSeries = KreisAndBundeslandColumnAdder(kreisByKreisschluessel).addKreisAndBundeslandColumn(timeSeries)\n",
"timeSeries"
]
},
@@ -139,14 +129,11 @@
"metadata": {},
"outputs": [],
"source": [
"def readTimeseries(bundesland = None):\n",
" dataFrame = TimeseriesReader().readTimeseries()\n",
" display(dataFrame)\n",
" display(dataFrame.info())\n",
" if bundesland is not None:\n",
" return dataFrame[dataFrame['Bundesland'] == bundesland][['date', 'betten_belegt', 'betten_frei']]\n",
"def aggregateData(timeSeries, kreis = None):\n",
" if kreis is not None:\n",
" return timeSeries[timeSeries['Kreis'] == kreis][['date', 'betten_belegt', 'betten_frei']]\n",
" else:\n",
" return dataFrame.groupby('date').agg(**{\n",
" return timeSeries.groupby('date').agg(**{\n",
" 'betten_belegt': pd.NamedAgg(column = 'betten_belegt', aggfunc = 'sum'),\n",
" 'betten_frei': pd.NamedAgg(column = 'betten_frei', aggfunc = 'sum') \n",
" }).reset_index()"
@@ -159,23 +146,13 @@
"metadata": {},
"outputs": [],
"source": [
"def readAndPersistTimeseries(bundesland = None):\n",
" dataFrame = readTimeseries(bundesland)\n",
" dataFrame.to_csv(_getFilename(bundesland), index = False)\n",
"def aggregateAndPersistData(timeSeries, kreis = None):\n",
" dataFrame = aggregateData(timeSeries, kreis)\n",
" dataFrame.to_csv(_getFilename(kreis), index = False)\n",
" return dataFrame\n",
"\n",
"def _getFilename(bundesland):\n",
" return '../../docs/data/intensivstationen/intensivstationen{suffix}.csv'.format(suffix = '-' + bundesland if bundesland is not None else '-de')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "63ca93a4",
"metadata": {},
"outputs": [],
"source": [
"# readAndPersistTimeseries(bundesland = 'BADEN_WUERTTEMBERG')"
"def _getFilename(kreis):\n",
" return '../../docs/data/intensivstationen/intensivstationen{suffix}.csv'.format(suffix = '-' + kreis if kreis is not None else '-de')\n"
]
},
{
@@ -185,27 +162,28 @@
"metadata": {},
"outputs": [],
"source": [
"dataFrame = readAndPersistTimeseries()\n",
"dataFrame"
"aggregateAndPersistData(timeSeries)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21776a31",
"id": "1b97137f",
"metadata": {},
"outputs": [],
"source": [
"dataFrame.info()"
"aggregateAndPersistData(timeSeries, 'Tübingen, Landkreis')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cc5974cd",
"id": "e183fad4",
"metadata": {},
"outputs": [],
"source": []
"source": [
"aggregateAndPersistData(timeSeries, 'Reutlingen, Landkreis')"
]
}
],
"metadata": {