generating histogram files

This commit is contained in:
frankknoll
2023-01-28 10:15:59 +01:00
parent e208727ff7
commit 9be119eee3
3 changed files with 18 additions and 270 deletions

View File

@@ -1,6 +1,3 @@
import json
class DictByBatchcodeTable2DictConverter: class DictByBatchcodeTable2DictConverter:
@staticmethod @staticmethod

View File

@@ -87,6 +87,16 @@
"international_VAERSVAX_Covid19" "international_VAERSVAX_Covid19"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "3e0908fe",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSSYMPTOMS"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -98,16 +108,6 @@
"batchcodes" "batchcodes"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "4119f1a3",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSSYMPTOMS"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -116,20 +116,12 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n", "from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n",
"\n",
"symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(international_VAERSVAX_Covid19, international_VAERSSYMPTOMS)\n", "symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(international_VAERSVAX_Covid19, international_VAERSSYMPTOMS)\n",
"symptomByBatchcodeTable.to_pickle('tmp/symptomByBatchcodeTable.pkl')\n",
"symptomByBatchcodeTable" "symptomByBatchcodeTable"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "817525c1",
"metadata": {},
"outputs": [],
"source": [
"symptomByBatchcodeTable.to_pickle('tmp/symptomByBatchcodeTable.pkl')"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -175,6 +167,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from HistogramTable2DictTableConverter import HistogramTable2DictTableConverter\n", "from HistogramTable2DictTableConverter import HistogramTable2DictTableConverter\n",
"\n",
"dictByBatchcodeTable = HistogramTable2DictTableConverter.convertHistogramTable2DictTable(symptomHistogramByBatchcodeTable)\n", "dictByBatchcodeTable = HistogramTable2DictTableConverter.convertHistogramTable2DictTable(symptomHistogramByBatchcodeTable)\n",
"dictByBatchcodeTable" "dictByBatchcodeTable"
] ]
@@ -190,96 +183,11 @@
"\n", "\n",
"histogramDescriptionPersister = HistogramDescriptionPersister('../docs/data/histograms')\n", "histogramDescriptionPersister = HistogramDescriptionPersister('../docs/data/histograms')\n",
"histogramDescriptionPersister.saveHistogramDescriptionsForBatchcodes(\n", "histogramDescriptionPersister.saveHistogramDescriptionsForBatchcodes(\n",
" batchcodes[:100],\n", " batchcodes,\n",
" dictByBatchcodeTable,\n", " dictByBatchcodeTable,\n",
" progress = lambda count, size, batchcode: print(f'{count}/{size}: {batchcode}'))" " progress = lambda count, size, batchcode: print(f'{count}/{size}: {batchcode}'))"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "97e2a4b6",
"metadata": {},
"outputs": [],
"source": [
"print(json.dumps(json.loads(jsonActual), indent=2))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "25a0b41d",
"metadata": {},
"outputs": [],
"source": [
"jsonTable.to_excel('tmp/jsonTable.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1d5a7864",
"metadata": {},
"outputs": [],
"source": [
"filteredTable.to_excel('tmp/filteredTable.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0daf7fdc",
"metadata": {},
"outputs": [],
"source": [
"from TestHelper import TestHelper\n",
"batchcode = '1808982'\n",
"symptomHistogramByBatchcodeTable = TestHelper.createDataFrame(\n",
" columns = ['SYMPTOM_COUNT_BY_VAX_LOT'],\n",
" data = [ ['{\"Blood pressure orthostatic abnormal\":5,\"Chest discomfort\":1}'],\n",
" ['{\"Chest discomfort\":2}']],\n",
" index = pd.MultiIndex.from_tuples(\n",
" names = ['VAX_LOT1', 'VAX_LOT2'],\n",
" tuples = [[batchcode, 'EW0175'],\n",
" ['015M20A', batchcode]]))\n",
"symptomHistogramByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8e63a507",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"result= symptomHistogramByBatchcodeTable.to_json()\n",
"parsed = json.loads(result)\n",
"parsed\n",
"#print(json.dumps(parsed, indent=4))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e55301c8",
"metadata": {},
"outputs": [],
"source": [
"res = jsonTable.loc[('!D0181', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan')]['SYMPTOM_COUNT_BY_VAX_LOT']\n",
"res"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5710177",
"metadata": {},
"outputs": [],
"source": [
"res.to_json()"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -287,166 +195,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"symptomHistogramByBatchcodeTable.loc[('!D0181', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan')].plot(kind='bar')" "# symptomHistogramByBatchcodeTable.loc[('!D0181', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan')].plot(kind='bar')\n",
] "# symptomByBatchcodeTable.iloc[:1000].to_excel('tmp/symptomByBatchcodeTable.xlsx')"
},
{
"cell_type": "code",
"execution_count": null,
"id": "07b1b418",
"metadata": {},
"outputs": [],
"source": [
"symptomHistogramByBatchcodeTable.sort_values(by='SYMPTOM_COUNT_BY_VAX_LOT', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9acd49bc",
"metadata": {},
"outputs": [],
"source": [
"table.loc[('!D0181', 'nan')]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d9a7eb51",
"metadata": {},
"outputs": [],
"source": [
"table.loc[('!D0181', 'nan')][:30].plot(kind='bar')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9e3f86a0",
"metadata": {},
"outputs": [],
"source": [
"df.groupby('VAX_LOT1')['SYMPTOMS']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6ab1c3f7",
"metadata": {},
"outputs": [],
"source": [
"df[(df['VAX_LOT1'] != 'nan') & (df['VAX_LOT2'] != 'nan') & (df['VAX_LOT3'] != 'nan') & (df['VAX_LOT4'] != 'nan')& (df['VAX_LOT5'] != 'nan') & (df['VAX_LOT6'] != 'nan') & (df['VAX_LOT7'] != 'nan') & (df['VAX_LOT8'] != 'nan')]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4753dd56",
"metadata": {},
"outputs": [],
"source": [
"df[df['VAX_LOT3'] == 'EN6201']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce3d6dfd",
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"# EN6201, FE6208\n",
"df[df['VAX_LOT2'] == 'EN6201'].hist(by=['VAX_LOT2'], column='SYMPTOMS', figsize=(200, 60))\n",
"# plt.savefig('EN6201.png')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cc962341",
"metadata": {},
"outputs": [],
"source": [
"df['SYMPTOMS'].hist(by=df['VAX_LOT1'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "63fa4111",
"metadata": {},
"outputs": [],
"source": [
"df.hist(by=['VAX_LOT1'], column='SYMPTOMS')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1e97b5e3",
"metadata": {},
"outputs": [],
"source": [
"from pandas import DataFrame\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"grouped = df.groupby(['VAX_LOT1'])\n",
"\n",
"for index, group in grouped:\n",
" display(index, group)\n",
" #plt.figure(figsize=(20, 10), edgecolor='green')\n",
" #plt.title(index)\n",
" #plt.hist(group['SYMPTOMS'], align='left')\n",
" #plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6429c3d9",
"metadata": {},
"outputs": [],
"source": [
"type(pd.NA)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3ebf8ada",
"metadata": {},
"outputs": [],
"source": [
"from pandas import DataFrame\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"x = ['A']*300 + ['B']*400 + ['C']*300\n",
"y = np.random.randn(1000)\n",
"df = DataFrame({'Letter':x, 'N':y})\n",
"grouped = df.groupby('Letter')\n",
"\n",
"for index, group in grouped:\n",
" display(group)\n",
" plt.figure()\n",
" plt.title(index)\n",
" plt.hist(group.N)\n",
"\n",
"# plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5b4df095",
"metadata": {},
"outputs": [],
"source": [
"symptomByBatchcodeTable.iloc[:1000].to_excel('tmp/symptomByBatchcodeTable.xlsx')"
] ]
}, },
{ {
@@ -497,7 +247,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.8" "version": "3.10.8 (main, Nov 24 2022, 14:13:03) [GCC 11.2.0]"
}, },
"vscode": { "vscode": {
"interpreter": { "interpreter": {

View File

@@ -16,6 +16,7 @@ def getInternationalVaersCovid19(years):
def get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years): def get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years):
international_VAERSVAX, international_VAERSSYMPTOMS = _get_international_VAERSVAX_VAERSSYMPTOMS(years) international_VAERSVAX, international_VAERSSYMPTOMS = _get_international_VAERSVAX_VAERSSYMPTOMS(years)
international_VAERSVAX.dropna(subset = ['VAX_LOT'], inplace = True)
international_VAERSVAX_Covid19 = DataFrameFilter().filterByCovid19(international_VAERSVAX) international_VAERSVAX_Covid19 = DataFrameFilter().filterByCovid19(international_VAERSVAX)
return international_VAERSVAX_Covid19, international_VAERSSYMPTOMS return international_VAERSVAX_Covid19, international_VAERSSYMPTOMS