refactoring
This commit is contained in:
@@ -14,33 +14,6 @@
|
||||
"pd.set_option('display.max_columns', None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eee150b8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"bundeslandById = {\n",
|
||||
" '01': 'Schleswig-Holstein',\n",
|
||||
" '02': 'Freie Hansestadt Hamburg',\n",
|
||||
" '03': 'Niedersachsen',\n",
|
||||
" '04': 'Freie Hansestadt Bremen',\n",
|
||||
" '05': 'Nordrhein-Westfalen',\n",
|
||||
" '06': 'Hessen',\n",
|
||||
" '07': 'Rheinland-Pfalz',\n",
|
||||
" '08': 'Baden-Württemberg',\n",
|
||||
" '09': 'Freistaat Bayern',\n",
|
||||
" '10': 'Saarland',\n",
|
||||
" '11': 'Berlin',\n",
|
||||
" '12': 'Brandenburg',\n",
|
||||
" '13': 'Mecklenburg-Vorpommern',\n",
|
||||
" '14': 'Freistaat Sachsen',\n",
|
||||
" '15': 'Sachsen-Anhalt',\n",
|
||||
" '16': 'Freistaat Thüringen'}\n",
|
||||
"bundeslandById"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -79,7 +52,7 @@
|
||||
"class TimeseriesReader:\n",
|
||||
" \n",
|
||||
" def readTimeseries(self):\n",
|
||||
" return pd.read_csv(\n",
|
||||
" timeseries = pd.read_csv(\n",
|
||||
" 'zeitreihe-tagesdaten.csv',\n",
|
||||
" low_memory = False,\n",
|
||||
" usecols = ['date', 'bundesland', 'gemeindeschluessel', 'betten_belegt', 'betten_frei'],\n",
|
||||
@@ -88,7 +61,8 @@
|
||||
" dtype = {\n",
|
||||
" 'gemeindeschluessel': 'string',\n",
|
||||
" 'bundesland': 'string'\n",
|
||||
" })\n"
|
||||
" })\n",
|
||||
" return timeseries.sort_values(by = 'date', ascending = True)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -110,15 +84,31 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class KreisAndBundeslandColumnAdder:\n",
|
||||
" \n",
|
||||
" def __init__(self, kreisByKreisschluessel, bundeslandById):\n",
|
||||
"\n",
|
||||
" def __init__(self, kreisByKreisschluessel):\n",
|
||||
" self.kreisByKreisschluessel = kreisByKreisschluessel\n",
|
||||
" self.bundeslandById = bundeslandById\n",
|
||||
"\n",
|
||||
" def addKreisAndBundeslandColumn(self, dataFrame):\n",
|
||||
" dataFrame['Kreis'] = dataFrame['gemeindeschluessel'].map(lambda gemeindeschluessel: self.kreisByKreisschluessel.loc[gemeindeschluessel, 'Kreis'])\n",
|
||||
" dataFrame['Bundesland'] = dataFrame['bundesland'].map(lambda bundesland: self.bundeslandById[bundesland])\n",
|
||||
" return dataFrame"
|
||||
" dataFrame['Kreis'] = dataFrame['gemeindeschluessel'].map(\n",
|
||||
" lambda gemeindeschluessel: self.kreisByKreisschluessel.loc[gemeindeschluessel, 'Kreis'])\n",
|
||||
" dataFrame['Bundesland'] = dataFrame['bundesland'].map({\n",
|
||||
" '01': 'Schleswig-Holstein',\n",
|
||||
" '02': 'Freie Hansestadt Hamburg',\n",
|
||||
" '03': 'Niedersachsen',\n",
|
||||
" '04': 'Freie Hansestadt Bremen',\n",
|
||||
" '05': 'Nordrhein-Westfalen',\n",
|
||||
" '06': 'Hessen',\n",
|
||||
" '07': 'Rheinland-Pfalz',\n",
|
||||
" '08': 'Baden-Württemberg',\n",
|
||||
" '09': 'Freistaat Bayern',\n",
|
||||
" '10': 'Saarland',\n",
|
||||
" '11': 'Berlin',\n",
|
||||
" '12': 'Brandenburg',\n",
|
||||
" '13': 'Mecklenburg-Vorpommern',\n",
|
||||
" '14': 'Freistaat Sachsen',\n",
|
||||
" '15': 'Sachsen-Anhalt',\n",
|
||||
" '16': 'Freistaat Thüringen'})\n",
|
||||
" return dataFrame\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -128,7 +118,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"timeSeries = KreisAndBundeslandColumnAdder(kreisByKreisschluessel, bundeslandById).addKreisAndBundeslandColumn(timeSeries)\n",
|
||||
"timeSeries = KreisAndBundeslandColumnAdder(kreisByKreisschluessel).addKreisAndBundeslandColumn(timeSeries)\n",
|
||||
"timeSeries"
|
||||
]
|
||||
},
|
||||
@@ -139,14 +129,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def readTimeseries(bundesland = None):\n",
|
||||
" dataFrame = TimeseriesReader().readTimeseries()\n",
|
||||
" display(dataFrame)\n",
|
||||
" display(dataFrame.info())\n",
|
||||
" if bundesland is not None:\n",
|
||||
" return dataFrame[dataFrame['Bundesland'] == bundesland][['date', 'betten_belegt', 'betten_frei']]\n",
|
||||
"def aggregateData(timeSeries, kreis = None):\n",
|
||||
" if kreis is not None:\n",
|
||||
" return timeSeries[timeSeries['Kreis'] == kreis][['date', 'betten_belegt', 'betten_frei']]\n",
|
||||
" else:\n",
|
||||
" return dataFrame.groupby('date').agg(**{\n",
|
||||
" return timeSeries.groupby('date').agg(**{\n",
|
||||
" 'betten_belegt': pd.NamedAgg(column = 'betten_belegt', aggfunc = 'sum'),\n",
|
||||
" 'betten_frei': pd.NamedAgg(column = 'betten_frei', aggfunc = 'sum') \n",
|
||||
" }).reset_index()"
|
||||
@@ -159,23 +146,13 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def readAndPersistTimeseries(bundesland = None):\n",
|
||||
" dataFrame = readTimeseries(bundesland)\n",
|
||||
" dataFrame.to_csv(_getFilename(bundesland), index = False)\n",
|
||||
"def aggregateAndPersistData(timeSeries, kreis = None):\n",
|
||||
" dataFrame = aggregateData(timeSeries, kreis)\n",
|
||||
" dataFrame.to_csv(_getFilename(kreis), index = False)\n",
|
||||
" return dataFrame\n",
|
||||
"\n",
|
||||
"def _getFilename(bundesland):\n",
|
||||
" return '../../docs/data/intensivstationen/intensivstationen{suffix}.csv'.format(suffix = '-' + bundesland if bundesland is not None else '-de')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "63ca93a4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# readAndPersistTimeseries(bundesland = 'BADEN_WUERTTEMBERG')"
|
||||
"def _getFilename(kreis):\n",
|
||||
" return '../../docs/data/intensivstationen/intensivstationen{suffix}.csv'.format(suffix = '-' + kreis if kreis is not None else '-de')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -185,27 +162,28 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataFrame = readAndPersistTimeseries()\n",
|
||||
"dataFrame"
|
||||
"aggregateAndPersistData(timeSeries)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "21776a31",
|
||||
"id": "1b97137f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataFrame.info()"
|
||||
"aggregateAndPersistData(timeSeries, 'Tübingen, Landkreis')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cc5974cd",
|
||||
"id": "e183fad4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"aggregateAndPersistData(timeSeries, 'Reutlingen, Landkreis')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
Reference in New Issue
Block a user