Compare commits
25 Commits
Drugs-for-
...
RANSAC
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
95da66087d | ||
|
|
7081d9014b | ||
|
|
b773e34f3d | ||
|
|
d40116ba6f | ||
|
|
36492ae88b | ||
|
|
ee524ef036 | ||
|
|
2d43c31f95 | ||
|
|
c8849301fe | ||
|
|
a5dc008310 | ||
|
|
a14c1cd217 | ||
|
|
1941337066 | ||
|
|
2b81552d7a | ||
|
|
62ce64308c | ||
|
|
fcd8614420 | ||
|
|
fe7c2b1c88 | ||
|
|
8231453ae2 | ||
|
|
c2f900504a | ||
|
|
2caae0e198 | ||
|
|
ba628774f5 | ||
|
|
44e383734c | ||
|
|
457b2c6dd7 | ||
|
|
487ff3eff0 | ||
|
|
4c7da48bf9 | ||
|
|
da6e356e11 | ||
|
|
0d19a31574 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -17,3 +17,4 @@ google-chrome-stable_current_amd64*
|
||||
src/captcha/__pycache__
|
||||
src/GoogleAnalytics/__pycache__
|
||||
src/SymptomsCausedByVaccines/__pycache__
|
||||
src/SymptomsCausedByVaccines/MultiLineFitting/__pycache__
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
name: howbadismybatch-venv
|
||||
channels:
|
||||
- defaults
|
||||
# - conda-forge
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- python=3.9
|
||||
- ipykernel
|
||||
- numpy
|
||||
- pandas
|
||||
- scikit-learn
|
||||
- scikit-spatial
|
||||
- urllib3
|
||||
- requests
|
||||
- bs4
|
||||
|
||||
@@ -654,14 +654,179 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Multi Line Fitting"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from SymptomsCausedByVaccines.MultiLineFitting.MultiLineFitter import MultiLineFitter\n",
|
||||
"from SymptomsCausedByVaccines.MultiLineFitting.SymptomCombinationsProvider import SymptomCombinationsProvider\n",
|
||||
"from matplotlib import pyplot as plt\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# symptomX = 'Abdominal discomfort' # HIV test' # 'Immunosuppression'\n",
|
||||
"# symptomY = 'Abdominal distension' # 'Infection' # 'Immunoglobulin therapy'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# df = prrByLotAndSymptom[[symptomX, symptomY]]\n",
|
||||
"# df = df[(df[symptomX] != 0) & (df[symptomY] != 0)]\n",
|
||||
"# df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# retain only those columns of prrByLotAndSymptom that have more than 400 PRRs != 0\n",
|
||||
"# prrByLotAndSymptom2 = prrByLotAndSymptom.loc[:, (prrByLotAndSymptom != 0).sum() >= 400]\n",
|
||||
"# prrByLotAndSymptom2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"symptomCombinations = SymptomCombinationsProvider.generateSymptomCombinations(\n",
|
||||
" prrByLotAndSymptom,\n",
|
||||
" dataFramePredicate = lambda df: 40 <= len(df) <= 50)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from SymptomsCausedByVaccines.MultiLineFitting.Utils import take\n",
|
||||
"\n",
|
||||
"df = take(symptomCombinations, 1)[0]\n",
|
||||
"df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"symptomX, symptomY = df.columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"points = [(x, y) for [x, y] in df.values]\n",
|
||||
"points"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def draw(points, clusters, lines, symptomX, symptomY, minClusterSize):\n",
|
||||
" _, ax = plt.subplots()\n",
|
||||
" plt.scatter(_getXs(points), _getYs(points), color = \"blue\", marker = \".\", s = 100)\n",
|
||||
" for cluster, line in zip(clusters, lines):\n",
|
||||
" if len(cluster) >= minClusterSize:\n",
|
||||
" _drawLine(line, cluster, ax)\n",
|
||||
" plt.scatter(_getXs(cluster), _getYs(cluster), marker = \".\", s = 100)\n",
|
||||
" plt.xlabel(symptomX)\n",
|
||||
" plt.ylabel(symptomY)\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
"def _drawLine(line, cluster, ax):\n",
|
||||
" coords = line.transform_points(cluster)\n",
|
||||
" magnitude = line.direction.norm()\n",
|
||||
" line.plot_2d(ax, t_1 = min(coords) / magnitude, t_2 = max(coords) / magnitude)\n",
|
||||
"\n",
|
||||
"def _getXs(xys):\n",
|
||||
" return [x for (x, _) in xys]\n",
|
||||
"\n",
|
||||
"def _getYs(xys):\n",
|
||||
" return [y for (_, y) in xys]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"clustersAscending, linesAscending = MultiLineFitter.fitPointsByAscendingLines(\n",
|
||||
" points,\n",
|
||||
" consensusThreshold = 0.01,\n",
|
||||
" maxNumLines = None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"draw(points, clustersAscending, linesAscending, symptomX, symptomY, minClusterSize = 5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"clusters, lines = MultiLineFitter.fitPointsByLines(\n",
|
||||
" points,\n",
|
||||
" consensusThreshold = 0.01,\n",
|
||||
" maxNumLines = None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"draw(points, clusters, lines, symptomX, symptomY, minClusterSize = 5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "howbadismybatch-venv",
|
||||
"display_name": "howbadismybatch-venv-kernel",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "howbadismybatch-venv-kernel"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
import numpy as np
|
||||
|
||||
class CharacteristicFunctions:
|
||||
|
||||
@staticmethod
|
||||
def apply(characteristicFunction, elements):
|
||||
return np.array(elements)[CharacteristicFunctions._getIndexes(characteristicFunction)]
|
||||
|
||||
@staticmethod
|
||||
def _getIndexes(characteristicFunction):
|
||||
return [index for (index, value) in enumerate(characteristicFunction) if value == 1]
|
||||
@@ -0,0 +1,51 @@
|
||||
from skspatial.objects import Line
|
||||
from SymptomsCausedByVaccines.MultiLineFitting.Utils import generatePairs, take
|
||||
|
||||
|
||||
class LinesFactory:
|
||||
|
||||
@staticmethod
|
||||
def createLines(points, maxNumLines = None):
|
||||
return LinesFactory._getUniqueLines(
|
||||
take(
|
||||
LinesFactory._generateAllLines(points),
|
||||
maxNumLines))
|
||||
|
||||
@staticmethod
|
||||
def createAscendingLines(points, maxNumLines = None):
|
||||
return LinesFactory._getUniqueLines(
|
||||
take(
|
||||
LinesFactory._generateAllAscendingLines(points),
|
||||
maxNumLines))
|
||||
|
||||
@staticmethod
|
||||
def _generateAllAscendingLines(points):
|
||||
return (line for line in LinesFactory._generateAllLines(points) if LinesFactory._isAscending(line.direction))
|
||||
|
||||
@staticmethod
|
||||
def _generateAllLines(points):
|
||||
return (Line.from_points(pointA, pointB) for (pointA, pointB) in LinesFactory._generatePairs(points))
|
||||
|
||||
@staticmethod
|
||||
def _isAscending(direction):
|
||||
return (direction[0] >= 0 and direction[1] >= 0) or (direction[0] <= 0 and direction[1] <= 0)
|
||||
|
||||
@staticmethod
|
||||
def _generatePairs(points):
|
||||
return ((points[i], points[j]) for (i, j) in generatePairs(len(points)))
|
||||
|
||||
@staticmethod
|
||||
def _getUniqueLines(lines):
|
||||
uniqueLines = []
|
||||
for i in range(len(lines)):
|
||||
line = lines[i]
|
||||
if not LinesFactory._isLineCloseToAnyOtherLine(line, lines[i + 1:]):
|
||||
uniqueLines.append(line)
|
||||
return uniqueLines
|
||||
|
||||
@staticmethod
|
||||
def _isLineCloseToAnyOtherLine(line, otherLines):
|
||||
for otherLine in otherLines:
|
||||
if line.is_close(otherLine):
|
||||
return True
|
||||
return False
|
||||
@@ -0,0 +1,68 @@
|
||||
import unittest
|
||||
from skspatial.objects import Line
|
||||
from SymptomsCausedByVaccines.MultiLineFitting.LinesFactory import LinesFactory
|
||||
|
||||
|
||||
class LinesFactoryTest(unittest.TestCase):
|
||||
|
||||
def test_createLines(self):
|
||||
# Given
|
||||
points = [(1, 0), (2, 0), (3, 0)]
|
||||
|
||||
# When
|
||||
lines = LinesFactory.createLines(points)
|
||||
|
||||
# Then
|
||||
self.assertEqual(len(lines), 1)
|
||||
self.assertTrue(lines[0].is_close(Line(point = [0, 0], direction = [1, 0])))
|
||||
|
||||
|
||||
def test_createLines2(self):
|
||||
# Given
|
||||
points = [(0, 0), (1, 0), (0, 1)]
|
||||
|
||||
# When
|
||||
lines = LinesFactory.createLines(points)
|
||||
|
||||
# Then
|
||||
self.assertEqual(len(lines), 3)
|
||||
self.assertTrue(lines[0].is_close(Line(point = [0, 0], direction = [1, 0])))
|
||||
self.assertTrue(lines[1].is_close(Line(point = [0, 0], direction = [0, 1])))
|
||||
self.assertTrue(lines[2].is_close(Line(point = [0, 1], direction = [1, -1])))
|
||||
|
||||
|
||||
def test_createLines_maxNumLines(self):
|
||||
# Given
|
||||
points = [(0, 0), (1, 0), (0, 1)]
|
||||
|
||||
# When
|
||||
lines = LinesFactory.createLines(points, maxNumLines = 2)
|
||||
|
||||
# Then
|
||||
self.assertEqual(len(lines), 2)
|
||||
self.assertTrue(lines[0].is_close(Line(point = [0, 0], direction = [1, 0])))
|
||||
self.assertTrue(lines[1].is_close(Line(point = [0, 0], direction = [0, 1])))
|
||||
|
||||
|
||||
def test_createAscendingLines(self):
|
||||
# Given
|
||||
points = [(0, 0), (1, 0), (0, 1)]
|
||||
|
||||
# When
|
||||
lines = LinesFactory.createAscendingLines(points)
|
||||
|
||||
# Then
|
||||
self.assertEqual(len(lines), 2)
|
||||
self.assertTrue(lines[0].is_close(Line(point = [0, 0], direction = [1, 0])))
|
||||
self.assertTrue(lines[1].is_close(Line(point = [0, 0], direction = [0, 1])))
|
||||
|
||||
def test_createAscendingLines_maxNumLines(self):
|
||||
# Given
|
||||
points = [(0, 0), (1, 0), (0, 1)]
|
||||
|
||||
# When
|
||||
lines = LinesFactory.createAscendingLines(points, maxNumLines = 1)
|
||||
|
||||
# Then
|
||||
self.assertEqual(len(lines), 1)
|
||||
self.assertTrue(lines[0].is_close(Line(point = [0, 0], direction = [1, 0])))
|
||||
102
src/SymptomsCausedByVaccines/MultiLineFitting/MultiLineFitter.py
Normal file
102
src/SymptomsCausedByVaccines/MultiLineFitting/MultiLineFitter.py
Normal file
@@ -0,0 +1,102 @@
|
||||
import numpy as np
|
||||
from SymptomsCausedByVaccines.MultiLineFitting.LinesFactory import LinesFactory
|
||||
from SymptomsCausedByVaccines.MultiLineFitting.Utils import generatePairs
|
||||
from SymptomsCausedByVaccines.MultiLineFitting.CharacteristicFunctions import CharacteristicFunctions
|
||||
|
||||
# implementation of "Robust Multiple Structures Estimation with J-linkage" adapted from https://github.com/fkluger/vp-linkage
|
||||
class MultiLineFitter:
|
||||
|
||||
@staticmethod
|
||||
def fitPointsByLines(points, consensusThreshold, maxNumLines = None):
|
||||
return MultiLineFitter.fitLines(
|
||||
points,
|
||||
LinesFactory.createLines(points, maxNumLines),
|
||||
consensusThreshold)
|
||||
|
||||
@staticmethod
|
||||
def fitPointsByAscendingLines(points, consensusThreshold, maxNumLines = None):
|
||||
return MultiLineFitter.fitLines(
|
||||
points,
|
||||
LinesFactory.createAscendingLines(points, maxNumLines),
|
||||
consensusThreshold)
|
||||
|
||||
@staticmethod
|
||||
def fitLines(points, lines, consensusThreshold):
|
||||
preferenceMatrix = MultiLineFitter._createPreferenceMatrix(points, lines, consensusThreshold)
|
||||
_, preferenceMatrix4Clusters = MultiLineFitter._createClusters(preferenceMatrix)
|
||||
fittedLines = MultiLineFitter._getLines(lines, preferenceMatrix4Clusters)
|
||||
return (
|
||||
MultiLineFitter._getFittedPointsList(points, fittedLines, consensusThreshold),
|
||||
fittedLines)
|
||||
|
||||
@staticmethod
|
||||
def _getFittedPointsList(points, lines, consensusThreshold):
|
||||
return MultiLineFitter._getPointsList(
|
||||
points,
|
||||
MultiLineFitter._createPreferenceMatrix(points, lines, consensusThreshold))
|
||||
|
||||
@staticmethod
|
||||
def _getPointsList(points, preferenceMatrix):
|
||||
characteristicFunctionsOfConsensusSets = np.transpose(preferenceMatrix)
|
||||
return [CharacteristicFunctions.apply(characteristicFunctionOfConsensusSet, points) for characteristicFunctionOfConsensusSet in characteristicFunctionsOfConsensusSets]
|
||||
|
||||
@staticmethod
|
||||
def _createPreferenceMatrix(points, lines, consensusThreshold):
|
||||
preferenceMatrix = np.zeros([len(points), len(lines)], dtype = int)
|
||||
for pointIndex, point in enumerate(points):
|
||||
for lineIndex, line in enumerate(lines):
|
||||
preferenceMatrix[pointIndex, lineIndex] = 1 if line.distance_point(point) <= consensusThreshold else 0
|
||||
return preferenceMatrix
|
||||
|
||||
@staticmethod
|
||||
def _createClusters(preferenceMatrix):
|
||||
keepClustering = True
|
||||
numClusters = preferenceMatrix.shape[0]
|
||||
clusters = [[i] for i in range(numClusters)]
|
||||
while keepClustering:
|
||||
maxSimilarity = 0
|
||||
bestClusterIndexCombination = None
|
||||
keepClustering = False
|
||||
numClusters = preferenceMatrix.shape[0]
|
||||
for (clusterIndexA, clusterIndexB) in generatePairs(numClusters):
|
||||
preferenceSetA = preferenceMatrix[clusterIndexA]
|
||||
preferenceSetB = preferenceMatrix[clusterIndexB]
|
||||
similarity = MultiLineFitter._intersectionOverUnion(preferenceSetA, preferenceSetB);
|
||||
if similarity > maxSimilarity:
|
||||
keepClustering = True
|
||||
maxSimilarity = similarity
|
||||
bestClusterIndexCombination = (clusterIndexA, clusterIndexB)
|
||||
if keepClustering:
|
||||
(clusterIndexA, clusterIndexB) = bestClusterIndexCombination
|
||||
clusters[clusterIndexA] += clusters[clusterIndexB]
|
||||
clusters.pop(clusterIndexB)
|
||||
preferenceMatrix[clusterIndexA] = np.logical_and(preferenceMatrix[clusterIndexA], preferenceMatrix[clusterIndexB])
|
||||
preferenceMatrix = np.delete(preferenceMatrix, clusterIndexB, axis = 0)
|
||||
return clusters, preferenceMatrix
|
||||
|
||||
@staticmethod
|
||||
def _intersectionOverUnion(setA, setB):
|
||||
intersection = np.count_nonzero(np.logical_and(setA, setB))
|
||||
union = np.count_nonzero(np.logical_or(setA, setB))
|
||||
return 1. * intersection / union if intersection > 0.0 else 0
|
||||
|
||||
@staticmethod
|
||||
def _getLines(lines, preferenceMatrix):
|
||||
return np.array(lines)[MultiLineFitter._getLineIndexes(preferenceMatrix)]
|
||||
|
||||
@staticmethod
|
||||
def _getLineIndexes(preferenceMatrix):
|
||||
lineIndexes = (MultiLineFitter._index(lines, 1) for lines in preferenceMatrix)
|
||||
return [lineIndex for lineIndex in lineIndexes if lineIndex is not None]
|
||||
|
||||
@staticmethod
|
||||
def _index(xs, x):
|
||||
try:
|
||||
return list(xs).index(x)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _getClusterPoints(points, clusters):
|
||||
sortedClusters = [sorted(cluster) for cluster in clusters]
|
||||
return [list(np.array(points)[cluster]) for cluster in sortedClusters]
|
||||
@@ -0,0 +1,152 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
from skspatial.objects import Line
|
||||
from SymptomsCausedByVaccines.MultiLineFitting.MultiLineFitter import MultiLineFitter
|
||||
|
||||
|
||||
class MultiLineFitterTest(unittest.TestCase):
|
||||
|
||||
def test_createPreferenceMatrix(self):
|
||||
# Given
|
||||
points = [(1, 3), (10, 20)]
|
||||
lines = [Line.from_points([0, 0], [100, 0])]
|
||||
consensusThreshold = 4.0
|
||||
|
||||
# When
|
||||
preferenceMatrix = MultiLineFitter._createPreferenceMatrix(points, lines, consensusThreshold)
|
||||
|
||||
# Then
|
||||
np.testing.assert_array_equal(
|
||||
preferenceMatrix,
|
||||
np.array(
|
||||
[
|
||||
[1],
|
||||
[0]
|
||||
]))
|
||||
|
||||
def test_createPreferenceMatrix2(self):
|
||||
# Given
|
||||
points = [(1, 0), (2, 0), (3, 0), (1, 1), (2, 2), (3, 3)]
|
||||
lines = [Line.from_points([0, 0], [1, 0]), Line.from_points([0, 0], [1, 1])]
|
||||
consensusThreshold = 0.001
|
||||
|
||||
# When
|
||||
preferenceMatrix = MultiLineFitter._createPreferenceMatrix(points, lines, consensusThreshold)
|
||||
|
||||
# Then
|
||||
np.testing.assert_array_equal(
|
||||
preferenceMatrix,
|
||||
np.array(
|
||||
[
|
||||
[1, 0],
|
||||
[1, 0],
|
||||
[1, 0],
|
||||
[0, 1],
|
||||
[0, 1],
|
||||
[0, 1]
|
||||
]))
|
||||
|
||||
def test_createClusters(self):
|
||||
# Given
|
||||
preferenceMatrix = np.array(
|
||||
[
|
||||
[1],
|
||||
[1]
|
||||
])
|
||||
|
||||
# When
|
||||
clusters, _ = MultiLineFitter._createClusters(preferenceMatrix)
|
||||
|
||||
# Then
|
||||
np.testing.assert_array_equal(
|
||||
clusters,
|
||||
np.array(
|
||||
[
|
||||
[1, 0]
|
||||
]))
|
||||
|
||||
def test_createClusters2(self):
|
||||
# Given
|
||||
preferenceMatrix = np.array(
|
||||
[
|
||||
[1, 1],
|
||||
[1, 0],
|
||||
[1, 0],
|
||||
[0, 1],
|
||||
[0, 1]
|
||||
])
|
||||
|
||||
# When
|
||||
clusters, preferenceMatrix4Clusters = MultiLineFitter._createClusters(preferenceMatrix)
|
||||
|
||||
# Then
|
||||
np.testing.assert_array_equal(
|
||||
clusters,
|
||||
np.array(
|
||||
[
|
||||
[2, 1, 0],
|
||||
[4, 3]
|
||||
]))
|
||||
np.testing.assert_array_equal(
|
||||
preferenceMatrix4Clusters,
|
||||
np.array(
|
||||
[
|
||||
[1, 0],
|
||||
[0, 1]
|
||||
]))
|
||||
|
||||
def test_getLineIndexes(self):
|
||||
# Given
|
||||
preferenceMatrix = np.array(
|
||||
[
|
||||
[0, 0, 1],
|
||||
[0, 1, 1]
|
||||
])
|
||||
|
||||
# When
|
||||
lineIndexes = MultiLineFitter._getLineIndexes(preferenceMatrix)
|
||||
|
||||
# Then
|
||||
np.testing.assert_array_equal(lineIndexes, [2, 1])
|
||||
|
||||
def test_fitLines(self):
|
||||
# Given
|
||||
points = [(0, 0), (1, 0), (2, 0), (1, 1), (2, 2)]
|
||||
line1 = Line.from_points([0, 0], [1, 0])
|
||||
line2 = Line.from_points([0, 0], [1, 1])
|
||||
line3 = Line.from_points([-10, 0], [-10, 1])
|
||||
|
||||
# When
|
||||
clusters, fittedLines = MultiLineFitter.fitLines(points, lines = [line1, line2, line3], consensusThreshold = 0.001)
|
||||
|
||||
# Then
|
||||
np.testing.assert_array_equal(
|
||||
fittedLines,
|
||||
[
|
||||
line1,
|
||||
line2
|
||||
])
|
||||
np.testing.assert_array_equal(
|
||||
clusters,
|
||||
[
|
||||
[(0, 0), (1, 0), (2, 0)],
|
||||
[(0, 0), (1, 1), (2, 2)]
|
||||
])
|
||||
|
||||
def test_fitPointsByLines(self):
|
||||
# Given
|
||||
points = [(0, 0), (1, 0), (2, 0), (1, 1), (2, 2)]
|
||||
|
||||
# When
|
||||
clusters, lines = MultiLineFitter.fitPointsByLines(points, consensusThreshold = 0.001)
|
||||
|
||||
# Then
|
||||
self.assertEqual(len(lines), 2)
|
||||
self.assertTrue(lines[0].is_close(Line.from_points([0, 0], [1, 0])))
|
||||
self.assertTrue(lines[1].is_close(Line.from_points([0, 0], [1, 1])))
|
||||
np.testing.assert_array_equal(
|
||||
clusters,
|
||||
[
|
||||
[(0, 0), (1, 0), (2, 0)],
|
||||
[(0, 0), (1, 1), (2, 2)]
|
||||
])
|
||||
@@ -0,0 +1,22 @@
|
||||
from SymptomsCausedByVaccines.MultiLineFitting.Utils import generatePairs
|
||||
|
||||
class SymptomCombinationsProvider:
|
||||
|
||||
@staticmethod
|
||||
def generateSymptomCombinations(prrByLotAndSymptom, dataFramePredicate):
|
||||
symptomPairs = SymptomCombinationsProvider._generatePairs(prrByLotAndSymptom.columns)
|
||||
symptomCombinations = (SymptomCombinationsProvider._generateSymptomCombination(prrByLotAndSymptom, symptomX, symptomY) for (symptomY, symptomX) in symptomPairs)
|
||||
return SymptomCombinationsProvider._filter(symptomCombinations, dataFramePredicate)
|
||||
|
||||
@staticmethod
|
||||
def _generatePairs(symptoms):
|
||||
return ((symptoms[i], symptoms[j]) for (i, j) in generatePairs(len(symptoms)))
|
||||
|
||||
@staticmethod
|
||||
def _generateSymptomCombination(prrByLotAndSymptom, symptomX, symptomY):
|
||||
df = prrByLotAndSymptom[[symptomX, symptomY]]
|
||||
return df[(df[symptomX] != 0) & (df[symptomY] != 0)]
|
||||
|
||||
@staticmethod
|
||||
def _filter(dataFrames, dataFramePredicate):
|
||||
return (dataFrame for dataFrame in dataFrames if dataFramePredicate(dataFrame))
|
||||
@@ -0,0 +1,113 @@
|
||||
import unittest
|
||||
from pandas.testing import assert_frame_equal
|
||||
from TestHelper import TestHelper
|
||||
import pandas as pd
|
||||
from SymptomsCausedByVaccines.MultiLineFitting.SymptomCombinationsProvider import SymptomCombinationsProvider
|
||||
|
||||
class SymptomCombinationsProviderTest(unittest.TestCase):
|
||||
|
||||
def test_generateSymptomCombinations(self):
|
||||
# Given
|
||||
prrByLotAndSymptom = TestHelper.createDataFrame(
|
||||
columns = ['SymptomA', 'SymptomB', 'SymptomC', 'SymptomD'],
|
||||
data = [ [0.6, 1.5, 1.2, 0.0]],
|
||||
index = pd.Index(
|
||||
name = 'VAX_LOT',
|
||||
data = [
|
||||
'LOT-1'
|
||||
]))
|
||||
|
||||
# When
|
||||
symptomCombinations = list(
|
||||
SymptomCombinationsProvider.generateSymptomCombinations(
|
||||
prrByLotAndSymptom,
|
||||
dataFramePredicate = lambda df: len(df) >= 1))
|
||||
|
||||
# Then
|
||||
self.assertEqual(len(symptomCombinations), 3)
|
||||
assert_frame_equal(
|
||||
symptomCombinations[0],
|
||||
TestHelper.createDataFrame(
|
||||
columns = ['SymptomA', 'SymptomB'],
|
||||
data = [ [0.6, 1.5]],
|
||||
index = pd.Index(
|
||||
name = 'VAX_LOT',
|
||||
data = [
|
||||
'LOT-1'
|
||||
])))
|
||||
assert_frame_equal(
|
||||
symptomCombinations[1],
|
||||
TestHelper.createDataFrame(
|
||||
columns = ['SymptomA', 'SymptomC'],
|
||||
data = [ [0.6, 1.2]],
|
||||
index = pd.Index(
|
||||
name = 'VAX_LOT',
|
||||
data = [
|
||||
'LOT-1'
|
||||
])))
|
||||
assert_frame_equal(
|
||||
symptomCombinations[2],
|
||||
TestHelper.createDataFrame(
|
||||
columns = ['SymptomB', 'SymptomC'],
|
||||
data = [ [1.5, 1.2]],
|
||||
index = pd.Index(
|
||||
name = 'VAX_LOT',
|
||||
data = [
|
||||
'LOT-1'
|
||||
])))
|
||||
|
||||
def test_generateSymptomCombinations_minSizeOfDataFrame_2(self):
|
||||
# Given
|
||||
prrByLotAndSymptom = TestHelper.createDataFrame(
|
||||
columns = ['SymptomA', 'SymptomB'],
|
||||
data = [ [0.6, 1.5],
|
||||
[1.6, 2.5]],
|
||||
index = pd.Index(
|
||||
name = 'VAX_LOT',
|
||||
data = [
|
||||
'LOT-1',
|
||||
'LOT-2'
|
||||
]))
|
||||
|
||||
# When
|
||||
symptomCombinations = list(
|
||||
SymptomCombinationsProvider.generateSymptomCombinations(
|
||||
prrByLotAndSymptom,
|
||||
dataFramePredicate = lambda df: len(df) >= 2))
|
||||
|
||||
# Then
|
||||
self.assertEqual(len(symptomCombinations), 1)
|
||||
assert_frame_equal(
|
||||
symptomCombinations[0],
|
||||
TestHelper.createDataFrame(
|
||||
columns = ['SymptomA', 'SymptomB'],
|
||||
data = [ [0.6, 1.5],
|
||||
[1.6, 2.5]],
|
||||
index = pd.Index(
|
||||
name = 'VAX_LOT',
|
||||
data = [
|
||||
'LOT-1',
|
||||
'LOT-2'
|
||||
])))
|
||||
|
||||
def test_generateSymptomCombinations_minSizeOfDataFrame_3(self):
|
||||
# Given
|
||||
prrByLotAndSymptom = TestHelper.createDataFrame(
|
||||
columns = ['SymptomA', 'SymptomB'],
|
||||
data = [ [0.6, 1.5],
|
||||
[1.6, 2.5]],
|
||||
index = pd.Index(
|
||||
name = 'VAX_LOT',
|
||||
data = [
|
||||
'LOT-1',
|
||||
'LOT-2'
|
||||
]))
|
||||
|
||||
# When
|
||||
symptomCombinations = list(
|
||||
SymptomCombinationsProvider.generateSymptomCombinations(
|
||||
prrByLotAndSymptom,
|
||||
dataFramePredicate = lambda df: len(df) >= 3))
|
||||
|
||||
# Then
|
||||
self.assertEqual(len(symptomCombinations), 0)
|
||||
9
src/SymptomsCausedByVaccines/MultiLineFitting/Utils.py
Normal file
9
src/SymptomsCausedByVaccines/MultiLineFitting/Utils.py
Normal file
@@ -0,0 +1,9 @@
|
||||
import itertools
|
||||
|
||||
def generatePairs(n):
|
||||
for i in range(n):
|
||||
for j in range(i):
|
||||
yield (i, j)
|
||||
|
||||
def take(iterable, numElements):
|
||||
return list(itertools.islice(iterable, numElements)) if numElements is not None else list(iterable)
|
||||
4615
src/data/GoogleAnalytics/CountryByBatchcode 20231110-20231114.csv
Normal file
4615
src/data/GoogleAnalytics/CountryByBatchcode 20231110-20231114.csv
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user