refining LinesFactoryTest

This commit is contained in:
frankknoll
2023-11-19 16:39:29 +01:00
parent 7081d9014b
commit 95da66087d
5 changed files with 103 additions and 26 deletions

View File

@@ -666,9 +666,7 @@
"source": [
"from SymptomsCausedByVaccines.MultiLineFitting.MultiLineFitter import MultiLineFitter\n",
"from SymptomsCausedByVaccines.MultiLineFitting.SymptomCombinationsProvider import SymptomCombinationsProvider\n",
"import numpy as np\n",
"from matplotlib import pyplot as plt\n",
"from skspatial.objects import Line\n"
"from matplotlib import pyplot as plt\n"
]
},
{
@@ -677,8 +675,8 @@
"metadata": {},
"outputs": [],
"source": [
"symptomX = 'Abdominal abscess' # HIV test' # 'Immunosuppression'\n",
"symptomY = 'Abdominal discomfort' # 'Infection' # 'Immunoglobulin therapy'"
"# symptomX = 'Abdominal discomfort' # HIV test' # 'Immunosuppression'\n",
"# symptomY = 'Abdominal distension' # 'Infection' # 'Immunoglobulin therapy'"
]
},
{
@@ -687,9 +685,20 @@
"metadata": {},
"outputs": [],
"source": [
"df = prrByLotAndSymptom[[symptomX, symptomY]]\n",
"df = df[(df[symptomX] != 0) & (df[symptomY] != 0)]\n",
"df"
"# df = prrByLotAndSymptom[[symptomX, symptomY]]\n",
"# df = df[(df[symptomX] != 0) & (df[symptomY] != 0)]\n",
"# df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# retain only those columns of prrByLotAndSymptom that have more than 400 PRRs != 0\n",
"# prrByLotAndSymptom2 = prrByLotAndSymptom.loc[:, (prrByLotAndSymptom != 0).sum() >= 400]\n",
"# prrByLotAndSymptom2"
]
},
{
@@ -699,8 +708,8 @@
"outputs": [],
"source": [
"symptomCombinations = SymptomCombinationsProvider.generateSymptomCombinations(\n",
" prrByLotAndSymptom[prrByLotAndSymptom.columns[:500]],\n",
" dataFramePredicate = lambda df: 30 <= len(df) <= 35)"
" prrByLotAndSymptom,\n",
" dataFramePredicate = lambda df: 40 <= len(df) <= 50)"
]
},
{
@@ -709,8 +718,19 @@
"metadata": {},
"outputs": [],
"source": [
"for symptomCombination in symptomCombinations:\n",
" print(list(symptomCombination.columns))"
"from SymptomsCausedByVaccines.MultiLineFitting.Utils import take\n",
"\n",
"df = take(symptomCombinations, 1)[0]\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"symptomX, symptomY = df.columns"
]
},
{
@@ -758,7 +778,10 @@
"metadata": {},
"outputs": [],
"source": [
"clustersAscending, linesAscending = MultiLineFitter.fitPointsByAscendingLines(points, consensusThreshold = 0.001)"
"clustersAscending, linesAscending = MultiLineFitter.fitPointsByAscendingLines(\n",
" points,\n",
" consensusThreshold = 0.01,\n",
" maxNumLines = None)"
]
},
{
@@ -767,7 +790,7 @@
"metadata": {},
"outputs": [],
"source": [
"draw(points, clustersAscending, linesAscending, symptomX, symptomY, minClusterSize = 2)"
"draw(points, clustersAscending, linesAscending, symptomX, symptomY, minClusterSize = 5)"
]
},
{
@@ -776,7 +799,10 @@
"metadata": {},
"outputs": [],
"source": [
"clusters, lines = MultiLineFitter.fitPointsByLines(points, consensusThreshold = 0.1)"
"clusters, lines = MultiLineFitter.fitPointsByLines(\n",
" points,\n",
" consensusThreshold = 0.01,\n",
" maxNumLines = None)"
]
},
{