Compare commits
25 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
95da66087d | ||
|
|
7081d9014b | ||
|
|
b773e34f3d | ||
|
|
d40116ba6f | ||
|
|
36492ae88b | ||
|
|
ee524ef036 | ||
|
|
2d43c31f95 | ||
|
|
c8849301fe | ||
|
|
a5dc008310 | ||
|
|
a14c1cd217 | ||
|
|
1941337066 | ||
|
|
2b81552d7a | ||
|
|
62ce64308c | ||
|
|
fcd8614420 | ||
|
|
fe7c2b1c88 | ||
|
|
8231453ae2 | ||
|
|
c2f900504a | ||
|
|
2caae0e198 | ||
|
|
ba628774f5 | ||
|
|
44e383734c | ||
|
|
457b2c6dd7 | ||
|
|
487ff3eff0 | ||
|
|
4c7da48bf9 | ||
|
|
da6e356e11 | ||
|
|
0d19a31574 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -17,3 +17,4 @@ google-chrome-stable_current_amd64*
|
|||||||
src/captcha/__pycache__
|
src/captcha/__pycache__
|
||||||
src/GoogleAnalytics/__pycache__
|
src/GoogleAnalytics/__pycache__
|
||||||
src/SymptomsCausedByVaccines/__pycache__
|
src/SymptomsCausedByVaccines/__pycache__
|
||||||
|
src/SymptomsCausedByVaccines/MultiLineFitting/__pycache__
|
||||||
|
|||||||
@@ -1,12 +1,14 @@
|
|||||||
name: howbadismybatch-venv
|
name: howbadismybatch-venv
|
||||||
channels:
|
channels:
|
||||||
- defaults
|
- defaults
|
||||||
# - conda-forge
|
- conda-forge
|
||||||
dependencies:
|
dependencies:
|
||||||
- python=3.9
|
- python=3.9
|
||||||
- ipykernel
|
- ipykernel
|
||||||
- numpy
|
- numpy
|
||||||
- pandas
|
- pandas
|
||||||
|
- scikit-learn
|
||||||
|
- scikit-spatial
|
||||||
- urllib3
|
- urllib3
|
||||||
- requests
|
- requests
|
||||||
- bs4
|
- bs4
|
||||||
|
|||||||
@@ -654,14 +654,179 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Multi Line Fitting"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from SymptomsCausedByVaccines.MultiLineFitting.MultiLineFitter import MultiLineFitter\n",
|
||||||
|
"from SymptomsCausedByVaccines.MultiLineFitting.SymptomCombinationsProvider import SymptomCombinationsProvider\n",
|
||||||
|
"from matplotlib import pyplot as plt\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# symptomX = 'Abdominal discomfort' # HIV test' # 'Immunosuppression'\n",
|
||||||
|
"# symptomY = 'Abdominal distension' # 'Infection' # 'Immunoglobulin therapy'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# df = prrByLotAndSymptom[[symptomX, symptomY]]\n",
|
||||||
|
"# df = df[(df[symptomX] != 0) & (df[symptomY] != 0)]\n",
|
||||||
|
"# df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# retain only those columns of prrByLotAndSymptom that have more than 400 PRRs != 0\n",
|
||||||
|
"# prrByLotAndSymptom2 = prrByLotAndSymptom.loc[:, (prrByLotAndSymptom != 0).sum() >= 400]\n",
|
||||||
|
"# prrByLotAndSymptom2"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"symptomCombinations = SymptomCombinationsProvider.generateSymptomCombinations(\n",
|
||||||
|
" prrByLotAndSymptom,\n",
|
||||||
|
" dataFramePredicate = lambda df: 40 <= len(df) <= 50)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from SymptomsCausedByVaccines.MultiLineFitting.Utils import take\n",
|
||||||
|
"\n",
|
||||||
|
"df = take(symptomCombinations, 1)[0]\n",
|
||||||
|
"df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"symptomX, symptomY = df.columns"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"points = [(x, y) for [x, y] in df.values]\n",
|
||||||
|
"points"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def draw(points, clusters, lines, symptomX, symptomY, minClusterSize):\n",
|
||||||
|
" _, ax = plt.subplots()\n",
|
||||||
|
" plt.scatter(_getXs(points), _getYs(points), color = \"blue\", marker = \".\", s = 100)\n",
|
||||||
|
" for cluster, line in zip(clusters, lines):\n",
|
||||||
|
" if len(cluster) >= minClusterSize:\n",
|
||||||
|
" _drawLine(line, cluster, ax)\n",
|
||||||
|
" plt.scatter(_getXs(cluster), _getYs(cluster), marker = \".\", s = 100)\n",
|
||||||
|
" plt.xlabel(symptomX)\n",
|
||||||
|
" plt.ylabel(symptomY)\n",
|
||||||
|
" plt.show()\n",
|
||||||
|
"\n",
|
||||||
|
"def _drawLine(line, cluster, ax):\n",
|
||||||
|
" coords = line.transform_points(cluster)\n",
|
||||||
|
" magnitude = line.direction.norm()\n",
|
||||||
|
" line.plot_2d(ax, t_1 = min(coords) / magnitude, t_2 = max(coords) / magnitude)\n",
|
||||||
|
"\n",
|
||||||
|
"def _getXs(xys):\n",
|
||||||
|
" return [x for (x, _) in xys]\n",
|
||||||
|
"\n",
|
||||||
|
"def _getYs(xys):\n",
|
||||||
|
" return [y for (_, y) in xys]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"clustersAscending, linesAscending = MultiLineFitter.fitPointsByAscendingLines(\n",
|
||||||
|
" points,\n",
|
||||||
|
" consensusThreshold = 0.01,\n",
|
||||||
|
" maxNumLines = None)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"draw(points, clustersAscending, linesAscending, symptomX, symptomY, minClusterSize = 5)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"clusters, lines = MultiLineFitter.fitPointsByLines(\n",
|
||||||
|
" points,\n",
|
||||||
|
" consensusThreshold = 0.01,\n",
|
||||||
|
" maxNumLines = None)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"draw(points, clusters, lines, symptomX, symptomY, minClusterSize = 5)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": []
|
"source": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "howbadismybatch-venv",
|
"display_name": "howbadismybatch-venv-kernel",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "howbadismybatch-venv-kernel"
|
||||||
},
|
},
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
|
|||||||
@@ -0,0 +1,11 @@
|
|||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class CharacteristicFunctions:
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def apply(characteristicFunction, elements):
|
||||||
|
return np.array(elements)[CharacteristicFunctions._getIndexes(characteristicFunction)]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _getIndexes(characteristicFunction):
|
||||||
|
return [index for (index, value) in enumerate(characteristicFunction) if value == 1]
|
||||||
@@ -0,0 +1,51 @@
|
|||||||
|
from skspatial.objects import Line
|
||||||
|
from SymptomsCausedByVaccines.MultiLineFitting.Utils import generatePairs, take
|
||||||
|
|
||||||
|
|
||||||
|
class LinesFactory:
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def createLines(points, maxNumLines = None):
|
||||||
|
return LinesFactory._getUniqueLines(
|
||||||
|
take(
|
||||||
|
LinesFactory._generateAllLines(points),
|
||||||
|
maxNumLines))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def createAscendingLines(points, maxNumLines = None):
|
||||||
|
return LinesFactory._getUniqueLines(
|
||||||
|
take(
|
||||||
|
LinesFactory._generateAllAscendingLines(points),
|
||||||
|
maxNumLines))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _generateAllAscendingLines(points):
|
||||||
|
return (line for line in LinesFactory._generateAllLines(points) if LinesFactory._isAscending(line.direction))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _generateAllLines(points):
|
||||||
|
return (Line.from_points(pointA, pointB) for (pointA, pointB) in LinesFactory._generatePairs(points))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _isAscending(direction):
|
||||||
|
return (direction[0] >= 0 and direction[1] >= 0) or (direction[0] <= 0 and direction[1] <= 0)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _generatePairs(points):
|
||||||
|
return ((points[i], points[j]) for (i, j) in generatePairs(len(points)))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _getUniqueLines(lines):
|
||||||
|
uniqueLines = []
|
||||||
|
for i in range(len(lines)):
|
||||||
|
line = lines[i]
|
||||||
|
if not LinesFactory._isLineCloseToAnyOtherLine(line, lines[i + 1:]):
|
||||||
|
uniqueLines.append(line)
|
||||||
|
return uniqueLines
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _isLineCloseToAnyOtherLine(line, otherLines):
|
||||||
|
for otherLine in otherLines:
|
||||||
|
if line.is_close(otherLine):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
@@ -0,0 +1,68 @@
|
|||||||
|
import unittest
|
||||||
|
from skspatial.objects import Line
|
||||||
|
from SymptomsCausedByVaccines.MultiLineFitting.LinesFactory import LinesFactory
|
||||||
|
|
||||||
|
|
||||||
|
class LinesFactoryTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_createLines(self):
|
||||||
|
# Given
|
||||||
|
points = [(1, 0), (2, 0), (3, 0)]
|
||||||
|
|
||||||
|
# When
|
||||||
|
lines = LinesFactory.createLines(points)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
self.assertEqual(len(lines), 1)
|
||||||
|
self.assertTrue(lines[0].is_close(Line(point = [0, 0], direction = [1, 0])))
|
||||||
|
|
||||||
|
|
||||||
|
def test_createLines2(self):
|
||||||
|
# Given
|
||||||
|
points = [(0, 0), (1, 0), (0, 1)]
|
||||||
|
|
||||||
|
# When
|
||||||
|
lines = LinesFactory.createLines(points)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
self.assertEqual(len(lines), 3)
|
||||||
|
self.assertTrue(lines[0].is_close(Line(point = [0, 0], direction = [1, 0])))
|
||||||
|
self.assertTrue(lines[1].is_close(Line(point = [0, 0], direction = [0, 1])))
|
||||||
|
self.assertTrue(lines[2].is_close(Line(point = [0, 1], direction = [1, -1])))
|
||||||
|
|
||||||
|
|
||||||
|
def test_createLines_maxNumLines(self):
|
||||||
|
# Given
|
||||||
|
points = [(0, 0), (1, 0), (0, 1)]
|
||||||
|
|
||||||
|
# When
|
||||||
|
lines = LinesFactory.createLines(points, maxNumLines = 2)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
self.assertEqual(len(lines), 2)
|
||||||
|
self.assertTrue(lines[0].is_close(Line(point = [0, 0], direction = [1, 0])))
|
||||||
|
self.assertTrue(lines[1].is_close(Line(point = [0, 0], direction = [0, 1])))
|
||||||
|
|
||||||
|
|
||||||
|
def test_createAscendingLines(self):
|
||||||
|
# Given
|
||||||
|
points = [(0, 0), (1, 0), (0, 1)]
|
||||||
|
|
||||||
|
# When
|
||||||
|
lines = LinesFactory.createAscendingLines(points)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
self.assertEqual(len(lines), 2)
|
||||||
|
self.assertTrue(lines[0].is_close(Line(point = [0, 0], direction = [1, 0])))
|
||||||
|
self.assertTrue(lines[1].is_close(Line(point = [0, 0], direction = [0, 1])))
|
||||||
|
|
||||||
|
def test_createAscendingLines_maxNumLines(self):
|
||||||
|
# Given
|
||||||
|
points = [(0, 0), (1, 0), (0, 1)]
|
||||||
|
|
||||||
|
# When
|
||||||
|
lines = LinesFactory.createAscendingLines(points, maxNumLines = 1)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
self.assertEqual(len(lines), 1)
|
||||||
|
self.assertTrue(lines[0].is_close(Line(point = [0, 0], direction = [1, 0])))
|
||||||
102
src/SymptomsCausedByVaccines/MultiLineFitting/MultiLineFitter.py
Normal file
102
src/SymptomsCausedByVaccines/MultiLineFitting/MultiLineFitter.py
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
import numpy as np
|
||||||
|
from SymptomsCausedByVaccines.MultiLineFitting.LinesFactory import LinesFactory
|
||||||
|
from SymptomsCausedByVaccines.MultiLineFitting.Utils import generatePairs
|
||||||
|
from SymptomsCausedByVaccines.MultiLineFitting.CharacteristicFunctions import CharacteristicFunctions
|
||||||
|
|
||||||
|
# implementation of "Robust Multiple Structures Estimation with J-linkage" adapted from https://github.com/fkluger/vp-linkage
|
||||||
|
class MultiLineFitter:
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def fitPointsByLines(points, consensusThreshold, maxNumLines = None):
|
||||||
|
return MultiLineFitter.fitLines(
|
||||||
|
points,
|
||||||
|
LinesFactory.createLines(points, maxNumLines),
|
||||||
|
consensusThreshold)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def fitPointsByAscendingLines(points, consensusThreshold, maxNumLines = None):
|
||||||
|
return MultiLineFitter.fitLines(
|
||||||
|
points,
|
||||||
|
LinesFactory.createAscendingLines(points, maxNumLines),
|
||||||
|
consensusThreshold)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def fitLines(points, lines, consensusThreshold):
|
||||||
|
preferenceMatrix = MultiLineFitter._createPreferenceMatrix(points, lines, consensusThreshold)
|
||||||
|
_, preferenceMatrix4Clusters = MultiLineFitter._createClusters(preferenceMatrix)
|
||||||
|
fittedLines = MultiLineFitter._getLines(lines, preferenceMatrix4Clusters)
|
||||||
|
return (
|
||||||
|
MultiLineFitter._getFittedPointsList(points, fittedLines, consensusThreshold),
|
||||||
|
fittedLines)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _getFittedPointsList(points, lines, consensusThreshold):
|
||||||
|
return MultiLineFitter._getPointsList(
|
||||||
|
points,
|
||||||
|
MultiLineFitter._createPreferenceMatrix(points, lines, consensusThreshold))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _getPointsList(points, preferenceMatrix):
|
||||||
|
characteristicFunctionsOfConsensusSets = np.transpose(preferenceMatrix)
|
||||||
|
return [CharacteristicFunctions.apply(characteristicFunctionOfConsensusSet, points) for characteristicFunctionOfConsensusSet in characteristicFunctionsOfConsensusSets]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _createPreferenceMatrix(points, lines, consensusThreshold):
|
||||||
|
preferenceMatrix = np.zeros([len(points), len(lines)], dtype = int)
|
||||||
|
for pointIndex, point in enumerate(points):
|
||||||
|
for lineIndex, line in enumerate(lines):
|
||||||
|
preferenceMatrix[pointIndex, lineIndex] = 1 if line.distance_point(point) <= consensusThreshold else 0
|
||||||
|
return preferenceMatrix
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _createClusters(preferenceMatrix):
|
||||||
|
keepClustering = True
|
||||||
|
numClusters = preferenceMatrix.shape[0]
|
||||||
|
clusters = [[i] for i in range(numClusters)]
|
||||||
|
while keepClustering:
|
||||||
|
maxSimilarity = 0
|
||||||
|
bestClusterIndexCombination = None
|
||||||
|
keepClustering = False
|
||||||
|
numClusters = preferenceMatrix.shape[0]
|
||||||
|
for (clusterIndexA, clusterIndexB) in generatePairs(numClusters):
|
||||||
|
preferenceSetA = preferenceMatrix[clusterIndexA]
|
||||||
|
preferenceSetB = preferenceMatrix[clusterIndexB]
|
||||||
|
similarity = MultiLineFitter._intersectionOverUnion(preferenceSetA, preferenceSetB);
|
||||||
|
if similarity > maxSimilarity:
|
||||||
|
keepClustering = True
|
||||||
|
maxSimilarity = similarity
|
||||||
|
bestClusterIndexCombination = (clusterIndexA, clusterIndexB)
|
||||||
|
if keepClustering:
|
||||||
|
(clusterIndexA, clusterIndexB) = bestClusterIndexCombination
|
||||||
|
clusters[clusterIndexA] += clusters[clusterIndexB]
|
||||||
|
clusters.pop(clusterIndexB)
|
||||||
|
preferenceMatrix[clusterIndexA] = np.logical_and(preferenceMatrix[clusterIndexA], preferenceMatrix[clusterIndexB])
|
||||||
|
preferenceMatrix = np.delete(preferenceMatrix, clusterIndexB, axis = 0)
|
||||||
|
return clusters, preferenceMatrix
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _intersectionOverUnion(setA, setB):
|
||||||
|
intersection = np.count_nonzero(np.logical_and(setA, setB))
|
||||||
|
union = np.count_nonzero(np.logical_or(setA, setB))
|
||||||
|
return 1. * intersection / union if intersection > 0.0 else 0
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _getLines(lines, preferenceMatrix):
|
||||||
|
return np.array(lines)[MultiLineFitter._getLineIndexes(preferenceMatrix)]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _getLineIndexes(preferenceMatrix):
|
||||||
|
lineIndexes = (MultiLineFitter._index(lines, 1) for lines in preferenceMatrix)
|
||||||
|
return [lineIndex for lineIndex in lineIndexes if lineIndex is not None]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _index(xs, x):
|
||||||
|
try:
|
||||||
|
return list(xs).index(x)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _getClusterPoints(points, clusters):
|
||||||
|
sortedClusters = [sorted(cluster) for cluster in clusters]
|
||||||
|
return [list(np.array(points)[cluster]) for cluster in sortedClusters]
|
||||||
@@ -0,0 +1,152 @@
|
|||||||
|
import unittest
|
||||||
|
import numpy as np
|
||||||
|
from skspatial.objects import Line
|
||||||
|
from SymptomsCausedByVaccines.MultiLineFitting.MultiLineFitter import MultiLineFitter
|
||||||
|
|
||||||
|
|
||||||
|
class MultiLineFitterTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_createPreferenceMatrix(self):
|
||||||
|
# Given
|
||||||
|
points = [(1, 3), (10, 20)]
|
||||||
|
lines = [Line.from_points([0, 0], [100, 0])]
|
||||||
|
consensusThreshold = 4.0
|
||||||
|
|
||||||
|
# When
|
||||||
|
preferenceMatrix = MultiLineFitter._createPreferenceMatrix(points, lines, consensusThreshold)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
np.testing.assert_array_equal(
|
||||||
|
preferenceMatrix,
|
||||||
|
np.array(
|
||||||
|
[
|
||||||
|
[1],
|
||||||
|
[0]
|
||||||
|
]))
|
||||||
|
|
||||||
|
def test_createPreferenceMatrix2(self):
|
||||||
|
# Given
|
||||||
|
points = [(1, 0), (2, 0), (3, 0), (1, 1), (2, 2), (3, 3)]
|
||||||
|
lines = [Line.from_points([0, 0], [1, 0]), Line.from_points([0, 0], [1, 1])]
|
||||||
|
consensusThreshold = 0.001
|
||||||
|
|
||||||
|
# When
|
||||||
|
preferenceMatrix = MultiLineFitter._createPreferenceMatrix(points, lines, consensusThreshold)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
np.testing.assert_array_equal(
|
||||||
|
preferenceMatrix,
|
||||||
|
np.array(
|
||||||
|
[
|
||||||
|
[1, 0],
|
||||||
|
[1, 0],
|
||||||
|
[1, 0],
|
||||||
|
[0, 1],
|
||||||
|
[0, 1],
|
||||||
|
[0, 1]
|
||||||
|
]))
|
||||||
|
|
||||||
|
def test_createClusters(self):
|
||||||
|
# Given
|
||||||
|
preferenceMatrix = np.array(
|
||||||
|
[
|
||||||
|
[1],
|
||||||
|
[1]
|
||||||
|
])
|
||||||
|
|
||||||
|
# When
|
||||||
|
clusters, _ = MultiLineFitter._createClusters(preferenceMatrix)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
np.testing.assert_array_equal(
|
||||||
|
clusters,
|
||||||
|
np.array(
|
||||||
|
[
|
||||||
|
[1, 0]
|
||||||
|
]))
|
||||||
|
|
||||||
|
def test_createClusters2(self):
|
||||||
|
# Given
|
||||||
|
preferenceMatrix = np.array(
|
||||||
|
[
|
||||||
|
[1, 1],
|
||||||
|
[1, 0],
|
||||||
|
[1, 0],
|
||||||
|
[0, 1],
|
||||||
|
[0, 1]
|
||||||
|
])
|
||||||
|
|
||||||
|
# When
|
||||||
|
clusters, preferenceMatrix4Clusters = MultiLineFitter._createClusters(preferenceMatrix)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
np.testing.assert_array_equal(
|
||||||
|
clusters,
|
||||||
|
np.array(
|
||||||
|
[
|
||||||
|
[2, 1, 0],
|
||||||
|
[4, 3]
|
||||||
|
]))
|
||||||
|
np.testing.assert_array_equal(
|
||||||
|
preferenceMatrix4Clusters,
|
||||||
|
np.array(
|
||||||
|
[
|
||||||
|
[1, 0],
|
||||||
|
[0, 1]
|
||||||
|
]))
|
||||||
|
|
||||||
|
def test_getLineIndexes(self):
|
||||||
|
# Given
|
||||||
|
preferenceMatrix = np.array(
|
||||||
|
[
|
||||||
|
[0, 0, 1],
|
||||||
|
[0, 1, 1]
|
||||||
|
])
|
||||||
|
|
||||||
|
# When
|
||||||
|
lineIndexes = MultiLineFitter._getLineIndexes(preferenceMatrix)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
np.testing.assert_array_equal(lineIndexes, [2, 1])
|
||||||
|
|
||||||
|
def test_fitLines(self):
|
||||||
|
# Given
|
||||||
|
points = [(0, 0), (1, 0), (2, 0), (1, 1), (2, 2)]
|
||||||
|
line1 = Line.from_points([0, 0], [1, 0])
|
||||||
|
line2 = Line.from_points([0, 0], [1, 1])
|
||||||
|
line3 = Line.from_points([-10, 0], [-10, 1])
|
||||||
|
|
||||||
|
# When
|
||||||
|
clusters, fittedLines = MultiLineFitter.fitLines(points, lines = [line1, line2, line3], consensusThreshold = 0.001)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
np.testing.assert_array_equal(
|
||||||
|
fittedLines,
|
||||||
|
[
|
||||||
|
line1,
|
||||||
|
line2
|
||||||
|
])
|
||||||
|
np.testing.assert_array_equal(
|
||||||
|
clusters,
|
||||||
|
[
|
||||||
|
[(0, 0), (1, 0), (2, 0)],
|
||||||
|
[(0, 0), (1, 1), (2, 2)]
|
||||||
|
])
|
||||||
|
|
||||||
|
def test_fitPointsByLines(self):
|
||||||
|
# Given
|
||||||
|
points = [(0, 0), (1, 0), (2, 0), (1, 1), (2, 2)]
|
||||||
|
|
||||||
|
# When
|
||||||
|
clusters, lines = MultiLineFitter.fitPointsByLines(points, consensusThreshold = 0.001)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
self.assertEqual(len(lines), 2)
|
||||||
|
self.assertTrue(lines[0].is_close(Line.from_points([0, 0], [1, 0])))
|
||||||
|
self.assertTrue(lines[1].is_close(Line.from_points([0, 0], [1, 1])))
|
||||||
|
np.testing.assert_array_equal(
|
||||||
|
clusters,
|
||||||
|
[
|
||||||
|
[(0, 0), (1, 0), (2, 0)],
|
||||||
|
[(0, 0), (1, 1), (2, 2)]
|
||||||
|
])
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
from SymptomsCausedByVaccines.MultiLineFitting.Utils import generatePairs
|
||||||
|
|
||||||
|
class SymptomCombinationsProvider:
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def generateSymptomCombinations(prrByLotAndSymptom, dataFramePredicate):
|
||||||
|
symptomPairs = SymptomCombinationsProvider._generatePairs(prrByLotAndSymptom.columns)
|
||||||
|
symptomCombinations = (SymptomCombinationsProvider._generateSymptomCombination(prrByLotAndSymptom, symptomX, symptomY) for (symptomY, symptomX) in symptomPairs)
|
||||||
|
return SymptomCombinationsProvider._filter(symptomCombinations, dataFramePredicate)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _generatePairs(symptoms):
|
||||||
|
return ((symptoms[i], symptoms[j]) for (i, j) in generatePairs(len(symptoms)))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _generateSymptomCombination(prrByLotAndSymptom, symptomX, symptomY):
|
||||||
|
df = prrByLotAndSymptom[[symptomX, symptomY]]
|
||||||
|
return df[(df[symptomX] != 0) & (df[symptomY] != 0)]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _filter(dataFrames, dataFramePredicate):
|
||||||
|
return (dataFrame for dataFrame in dataFrames if dataFramePredicate(dataFrame))
|
||||||
@@ -0,0 +1,113 @@
|
|||||||
|
import unittest
|
||||||
|
from pandas.testing import assert_frame_equal
|
||||||
|
from TestHelper import TestHelper
|
||||||
|
import pandas as pd
|
||||||
|
from SymptomsCausedByVaccines.MultiLineFitting.SymptomCombinationsProvider import SymptomCombinationsProvider
|
||||||
|
|
||||||
|
class SymptomCombinationsProviderTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_generateSymptomCombinations(self):
|
||||||
|
# Given
|
||||||
|
prrByLotAndSymptom = TestHelper.createDataFrame(
|
||||||
|
columns = ['SymptomA', 'SymptomB', 'SymptomC', 'SymptomD'],
|
||||||
|
data = [ [0.6, 1.5, 1.2, 0.0]],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAX_LOT',
|
||||||
|
data = [
|
||||||
|
'LOT-1'
|
||||||
|
]))
|
||||||
|
|
||||||
|
# When
|
||||||
|
symptomCombinations = list(
|
||||||
|
SymptomCombinationsProvider.generateSymptomCombinations(
|
||||||
|
prrByLotAndSymptom,
|
||||||
|
dataFramePredicate = lambda df: len(df) >= 1))
|
||||||
|
|
||||||
|
# Then
|
||||||
|
self.assertEqual(len(symptomCombinations), 3)
|
||||||
|
assert_frame_equal(
|
||||||
|
symptomCombinations[0],
|
||||||
|
TestHelper.createDataFrame(
|
||||||
|
columns = ['SymptomA', 'SymptomB'],
|
||||||
|
data = [ [0.6, 1.5]],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAX_LOT',
|
||||||
|
data = [
|
||||||
|
'LOT-1'
|
||||||
|
])))
|
||||||
|
assert_frame_equal(
|
||||||
|
symptomCombinations[1],
|
||||||
|
TestHelper.createDataFrame(
|
||||||
|
columns = ['SymptomA', 'SymptomC'],
|
||||||
|
data = [ [0.6, 1.2]],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAX_LOT',
|
||||||
|
data = [
|
||||||
|
'LOT-1'
|
||||||
|
])))
|
||||||
|
assert_frame_equal(
|
||||||
|
symptomCombinations[2],
|
||||||
|
TestHelper.createDataFrame(
|
||||||
|
columns = ['SymptomB', 'SymptomC'],
|
||||||
|
data = [ [1.5, 1.2]],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAX_LOT',
|
||||||
|
data = [
|
||||||
|
'LOT-1'
|
||||||
|
])))
|
||||||
|
|
||||||
|
def test_generateSymptomCombinations_minSizeOfDataFrame_2(self):
|
||||||
|
# Given
|
||||||
|
prrByLotAndSymptom = TestHelper.createDataFrame(
|
||||||
|
columns = ['SymptomA', 'SymptomB'],
|
||||||
|
data = [ [0.6, 1.5],
|
||||||
|
[1.6, 2.5]],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAX_LOT',
|
||||||
|
data = [
|
||||||
|
'LOT-1',
|
||||||
|
'LOT-2'
|
||||||
|
]))
|
||||||
|
|
||||||
|
# When
|
||||||
|
symptomCombinations = list(
|
||||||
|
SymptomCombinationsProvider.generateSymptomCombinations(
|
||||||
|
prrByLotAndSymptom,
|
||||||
|
dataFramePredicate = lambda df: len(df) >= 2))
|
||||||
|
|
||||||
|
# Then
|
||||||
|
self.assertEqual(len(symptomCombinations), 1)
|
||||||
|
assert_frame_equal(
|
||||||
|
symptomCombinations[0],
|
||||||
|
TestHelper.createDataFrame(
|
||||||
|
columns = ['SymptomA', 'SymptomB'],
|
||||||
|
data = [ [0.6, 1.5],
|
||||||
|
[1.6, 2.5]],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAX_LOT',
|
||||||
|
data = [
|
||||||
|
'LOT-1',
|
||||||
|
'LOT-2'
|
||||||
|
])))
|
||||||
|
|
||||||
|
def test_generateSymptomCombinations_minSizeOfDataFrame_3(self):
|
||||||
|
# Given
|
||||||
|
prrByLotAndSymptom = TestHelper.createDataFrame(
|
||||||
|
columns = ['SymptomA', 'SymptomB'],
|
||||||
|
data = [ [0.6, 1.5],
|
||||||
|
[1.6, 2.5]],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAX_LOT',
|
||||||
|
data = [
|
||||||
|
'LOT-1',
|
||||||
|
'LOT-2'
|
||||||
|
]))
|
||||||
|
|
||||||
|
# When
|
||||||
|
symptomCombinations = list(
|
||||||
|
SymptomCombinationsProvider.generateSymptomCombinations(
|
||||||
|
prrByLotAndSymptom,
|
||||||
|
dataFramePredicate = lambda df: len(df) >= 3))
|
||||||
|
|
||||||
|
# Then
|
||||||
|
self.assertEqual(len(symptomCombinations), 0)
|
||||||
9
src/SymptomsCausedByVaccines/MultiLineFitting/Utils.py
Normal file
9
src/SymptomsCausedByVaccines/MultiLineFitting/Utils.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
import itertools
|
||||||
|
|
||||||
|
def generatePairs(n):
|
||||||
|
for i in range(n):
|
||||||
|
for j in range(i):
|
||||||
|
yield (i, j)
|
||||||
|
|
||||||
|
def take(iterable, numElements):
|
||||||
|
return list(itertools.islice(iterable, numElements)) if numElements is not None else list(iterable)
|
||||||
4615
src/data/GoogleAnalytics/CountryByBatchcode 20231110-20231114.csv
Normal file
4615
src/data/GoogleAnalytics/CountryByBatchcode 20231110-20231114.csv
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user