refactoring

This commit is contained in:
frankknoll
2022-03-02 11:34:58 +01:00
parent 29a59d512b
commit 932f35c8a1

View File

@@ -14,6 +14,27 @@
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f179762b",
"metadata": {},
"outputs": [],
"source": [
"def readTimeseries():\n",
" timeseries = pd.read_csv(\n",
" 'zeitreihe-tagesdaten.csv',\n",
" low_memory = False,\n",
" usecols = ['date', 'bundesland', 'gemeindeschluessel', 'betten_belegt', 'betten_frei'],\n",
" parse_dates = ['date'],\n",
" date_parser = lambda dateStr: pd.to_datetime(dateStr, format = \"%Y-%m-%d\"),\n",
" dtype = {\n",
" 'gemeindeschluessel': 'string',\n",
" 'bundesland': 'string'\n",
" })\n",
" return timeseries.sort_values(by = 'date', ascending = True)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -21,48 +42,15 @@
"metadata": {},
"outputs": [],
"source": [
"kreise = pd.read_excel(\n",
" '04-kreise.xlsx',\n",
" sheet_name = 'Kreisfreie Städte u. Landkreise',\n",
" header = 5,\n",
" index_col = 0)\n",
"kreise = kreise.rename(columns = {'2': 'Bundesland', 3: 'Kreis', 6: 'Einwohnerzahl'})[['Bundesland', 'Kreis', 'Einwohnerzahl']]\n",
"kreise.index.set_names(\"Key\", inplace = True)\n",
"kreise"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c1dea262",
"metadata": {},
"outputs": [],
"source": [
"class TimeseriesReader:\n",
" \n",
" def readTimeseries(self):\n",
" timeseries = pd.read_csv(\n",
" 'zeitreihe-tagesdaten.csv',\n",
" low_memory = False,\n",
" usecols = ['date', 'bundesland', 'gemeindeschluessel', 'betten_belegt', 'betten_frei'],\n",
" parse_dates = ['date'],\n",
" date_parser = lambda dateStr: pd.to_datetime(dateStr, format = \"%Y-%m-%d\"),\n",
" dtype = {\n",
" 'gemeindeschluessel': 'string',\n",
" 'bundesland': 'string'\n",
" })\n",
" return timeseries.sort_values(by = 'date', ascending = True)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d978b069",
"metadata": {},
"outputs": [],
"source": [
"timeSeries = TimeseriesReader().readTimeseries()\n",
"timeSeries"
"def readKreise():\n",
" kreise = pd.read_excel(\n",
" '04-kreise.xlsx',\n",
" sheet_name = 'Kreisfreie Städte u. Landkreise',\n",
" header = 5,\n",
" index_col = 0)\n",
" kreise = kreise.rename(columns = {'2': 'Bundesland', 3: 'Kreis', 6: 'Einwohnerzahl'})[['Bundesland', 'Kreis', 'Einwohnerzahl']]\n",
" kreise.index.set_names(\"Key\", inplace = True)\n",
" return kreise"
]
},
{
@@ -102,7 +90,7 @@
"metadata": {},
"outputs": [],
"source": [
"timeSeries = ColumnsAdder(kreise).addKreisAndBundeslandAndEinwohnerzahlColumns(timeSeries)\n",
"timeSeries = ColumnsAdder(readKreise()).addKreisAndBundeslandAndEinwohnerzahlColumns(readTimeseries())\n",
"timeSeries"
]
},