Count lines with more the X dots on it

Question

I have Python a Algorithm class with a calculate method that takes the list of tuples where every tuple have 2 integers (coordinates of the dots) and goal argument and returns integer - a number of lines that have more than int(goal) - 1 dots on it.

For example, if we have four dots on one line (like . . . ., with coordinates [(0, 0), (1, 0), (2, 0), (3,0)]) and goal is 3, then the algorithm must return 1, instead of 4.

I have a Python-Tkinter UI that has a field with dots. A user can choose some of them, and program must show points for currently chosen dots after every new click (so speed matters).

""" This module contains algorithms. Dot, Vector and Chain classes represents objects, while Algorithm class provides just an interface for use. See `Dot.build_vectors` and `Dot.check_connection` for understanding. """ from settings import GOAL from .logger import calc_lg # ================= # helpful-functions # ================= def greatest_common_divisor(a, b): """ Euclid's algorithm """ while b: a, b = b, a % b return a def _check_type(obj, *expected) -> object: if type(obj) not in expected: raise RuntimeError('Expected type: "{}", got: "{}".'.format(type(obj), expected)) return obj # ======= # Classes # ======= class Dot: """ Represents dots on the xOy (e.g. checkers on the field). """ dots = [] """ Field for storing all instantiated `Dot` objects. """ build_all_lines = False def __init__(self, x: int, y: int): self.x = x self.y = y self.vectors = {} self.chains = [] # to not use custom `register` method as for `Chain` and `Vector` self.dots.append(self) def __repr__(self): return '<Dot x:{} y:{}>'.format(self.x, self.y) def build_vectors(self): """ Builds vectors to all other dots (that are in `dots` field). """ for dot in self.dots: if dot != self: # if it is another dot vector = Vector(self, dot).register() # add vector to storage self.check_connection(vector, dot) def check_connection(self, vector, dot): """ Checks if `self` have connection to this `dot` by this `vector`. If have, than creates new Chain, else store it in `vectors` dictionary. **Note**: this method is the most important """ _check_type(vector, Vector) _check_type(dot, Dot) if vector in self.vectors: # case when we have `dot` for this `vector` # create `Chain` object with 3 dots inside: # 1. `self`, 2. dot associated by `vector` 3. `dot` Chain(vector, self).add(self.vectors[vector]).add(dot).register() else: # add `dot` to the dictionary with `vector` as key, to # have access to this later self.vectors[vector] = dot if self.build_all_lines: # ! (4-question point) ! Chain(vector, self).add(dot).register() class Vector: """ Represents connections between Dots (e.g. lines between checkers). """ vectors = [] """ Field for storing all vectors together. """ def __init__(self, dot1: Dot, dot2: Dot): _check_type(dot1, Dot) _check_type(dot2, Dot) self.x, self.y = self.calculate_vector(dot1, dot2) def __repr__(self): return '<Vector x:{} y:{}>'.format(self.x, self.y) def __hash__(self): return id(self).__hash__() def __eq__(self, other): """ Compares vectors by their coordinates. Returns `True` if they ar collinear, else - `False` """ return (self.x == other.x and self.y == other.y) or \ (self.x == -other.x and self.y == -other.y) @staticmethod def calculate_vector(dot1: Dot, dot2: Dot) -> tuple: """ Returns pair of integers: x, y - coordinates of the vector between given dots. """ _check_type(dot1, Dot) _check_type(dot2, Dot) x = dot1.x - dot2.x y = dot1.y - dot2.y gcd = greatest_common_divisor(max(x, y), min(x, y)) return int(x/gcd), int(y/gcd) def register(self): """ Checks if it is equal vector in `vectors` field. If so - deletes `self`, else - appends `self` to `vectors`""" for item in self.vectors: if self == item: del self # very dangerous place !!! return item else: self.vectors.append(self) return self class Chain: """ Represents groups of Dots connected with the same Vectors (e.g. checkers on one line). """ chains = [] """ Field for storing all Chain objects. """ def __init__(self, vector: Vector, dot1: Dot): _check_type(vector, Vector) _check_type(dot1, Dot) self.vector = vector self.dots = [dot1] self.len = 1 def __repr__(self): return '<Chain [Vector: {}, len: {}, first Dot: {}]>'.format(self.vector, self.len, self.dots[0]) def add(self, dot: Dot): """ If `dot` isn't in `dots` attribute (list) - append it and increment `len` attribute. """ if dot not in self.dots: self.dots.append(dot) self.len += 1 return self def _check_in(self, other): """ Checks if it is even one dot in both Chain objects. If so - return `True`, else - `False`. """ _check_type(other, Chain) for dot in self.dots: if dot in other.dots: return True else: return False def _merge_in(self, other): """ Appends all dots that aren't in `other` to `other`. """ _check_type(other, Chain) for dot in self.dots: if dot not in other.dots: other.add(dot) def register(self): """ Check if it is equivalent chain in `chains` field. Do comparing by calling `_check_in` method and comparing vectors, and if they are equal - call `merge_in` and deletes `self`, and returns `other`, else - return `self`. """ for item in self.chains: if self.vector == item.vector and self._check_in(item): self._merge_in(item) del self return item else: self.chains.append(self) return self class Algorithm: def __init__(self): self.Chain = Chain self.Dot = Dot self.Vector = Vector def init_dots(self, array: list): """ Initiate dots from coordinates in `array`.""" for x, y in array: self.Dot(x, y) def init_vectors(self): """ Initiate Vectors by building connections from every dot. """ for dot in self.Dot.dots: dot.build_vectors() def clear(self): """ Clear all initiated objects. """ # this definition must remove all references to # created objects, so garbage collector must # delete them for ever self.Dot.dots = [] self.Vector.vectors = [] self.Chain.chains = [] def calculate(self, array: list or tuple, goal: int =GOAL, build_all_lines: bool =False, auto_correct: bool =True) -> int: """ Calculates how main points you have. Initiates all objects, counts points, logs result.""" points = '' # checking input data _check_type(goal, int) _check_type(build_all_lines, bool) _check_type(auto_correct, bool) _check_type(array, list, tuple) if goal == 2 and not build_all_lines: if auto_correct: calc_lg.debug('Cannot calculate when "build_all_lines" is "False".') build_all_lines = True calc_lg.debug('Auto-correction performed: set "build_all_lines" to "True".') else: msg = 'Cannot calculate when "build_all_lines" is "False" and "goal" is "2".' calc_lg.error(msg) raise RuntimeError(msg) elif goal < 2: # we cannot build Chain with less then 2 dots inside raise RuntimeError('Cannot calculate when "goal < 2".') elif build_all_lines: if auto_correct: calc_lg.debug('Overhead: "build_all_lines" will increase calculation time, setting it to "False".') build_all_lines = False calc_lg.debug('Auto-correction performed: set "build_all_lines" to "False".') else: calc_lg.debug('Overhead: "build_all_lines" will increase calculation time') else: calc_lg.debug('Starting calculation.') # actually calculation try: self.Dot.build_all_lines = build_all_lines self.init_dots(array) self.init_vectors() points = self._get_points(goal) except Exception as e: calc_lg.exception(e, exc_info=True) points = 'ERROR' raise e else: calc_lg.info('Points - {} ; {}'.format(points, str(array))) finally: self.clear() return points def _get_points(self, goal: int) -> int: """ Counts how many chains have more chan `goal` dots inside. """ points = 0 for chain in self.Chain.chains: if chain.len >= goal: points += 1 return points algo = Algorithm()

See more:

Version for question
Tests for this version (I have built the project with idea to use it without downloadable Python packages, so this module is weird)

Questions:

Is code formatting and documentation good, readable and easy to understand?
Does this class implementation need improvement to meet some unknown for me common practices?
I want Algorithm.clear method to clear memory and prepare for next calculations, but I'm not sure that it is the best way to do it.
To improve performance, I have implemented build_all_lines argument in the Algorithm.calculate method, because in Dot.check_connection at ! (4-question point) ! if statement depends on it. If this argument is True, then the code is three times slower than without. Actually, the auto_correct argument was added to auto-correct build_all_lines argument depending on the goal argument. I'm not sure if it's okay.

Gareth Rees · Accepted Answer · 2018-10-11 15:20:43Z

1. Dynamic problem

The post says:

I have a Python-Tkinter UI that has a field with dots. A user can choose some of them, and program must show points for currently chosen dots after every new click (so speed matters).

This kind of situation, where there are small changes to the input (here, adding a dot to the set of selected dots) and the algorithm must update its output (here, the score) in a timely manner, is known as a dynamic problem.

The code in the post starts again from scratch whenever the input changes. But this is inefficient: when a new dot is chosen, the code in Algorithm.init_vectors looks at the vector from each dot to every other dot. So if there are \$n\$ dots, it takes \$O(n^2)\$ to update the score.

However, as we'll see below, an algorithm that maintains a suitable data structure can process each new dot and update the total number of points in constant time. (Strictly speaking, in time that depends on the size of the field of dots and the goal value, but this is constant for any particular game.)

Accordingly, I won't review the code in the post, but instead present the better algorithm.

2. Algorithm sketch

The key observation is that there is a fixed set of lines through each dot that have the goal number of dots (or more). For example, if the field is an 8×8 checkerboard of dots, and the goal is four dots per line, then there are eight lines through the red dot that we might need to consider:

What this means is that we can keep a mapping from lines to the number of selected dots on each line, for example using collections.Counter:

from collections import Counter class Dots: """Object representing the state of a rectangular grid of dots.""" def __init__(self, width, height, goal): """Construct rectangular grid of dots with the given width and height, and the goal being to construct lines with the given number of dots. """ self.width = width self.height = height self.goal = goal self.selected = set() # Set of selected dots self.line_dots = Counter() # line -> count of selected dots on line self.goal_lines = 0 # Lines with selected dots >= goal

And then when the user selects a new dot, we can update the mapping and the score like this:

def select_dot(self, dot): """Select the dot with the given coordinates.""" assert dot not in self.selected self.selected.add(dot) for line in self.lines_through_dot(dot): self.line_dots[line] += 1 if self.line_dots[line] == goal: self.goal_lines += 1

3. Representing lines

What kind of objects are the line values generated by lines_through_dot?

A natural way to uniquely represent a line is by the pair \$n, n·p\$ where \$n\$ is a normal vector to the line, \$p\$ is any point on the line and \$·\$ is the dot product. (This works as a representation because the value of \$n·p\$ is the same for every point on the line.)

So lines_through_dot can be implemented like this:

def lines_through_dot(self, dot): """Generate the lines passing through dot that might have at least goal dots, in a rectangular grid of dots with the given width and height. Each line is a pair (n, n @ dot) where n is a normal vector to the line. """ for normal in self.line_normals: yield normal, sum(i * j for i, j in zip(normal, dot))

Note that because we're going to be using these line representations as dictionary keys, I've been careful to avoid the need for floating-point arithmetic, because then it might not be the case that \$n·p\$ is the same for every \$p\$ due to the limitations of floating point. This is why I've not required \$n\$ to be a unit normal.

4. Finding the lines

How are we going to implement line_normals? Well, it depends on how the field of dots is arranged, and the post doesn't explain this. But the code for the user interface suggests that it is a rectangular grid of dots. Where the dots form a regular grid it's possible to enumerate the lines by considering the possible slopes for the line. For example, if the field is an 8×8 checkerboard of dots, and the goal is four dots per line, then there are 74 lines altogether:

In the figure I've grouped the lines according to their direction, given as a vector \$x, y\$ below the group. The normals to these directions can be generated like this, using itertools.product and math.gcd:

from functools import lru_cache from itertools import product from math import gcd @property @lru_cache(maxsize=1) def line_normals(self): """List of distinct normal vectors (x, y) to lines that might have at least goal dots. """ x_max = (self.width - 1) // (self.goal - 1) y_max = (self.height - 1) // (self.goal - 1) result = [] for x, y in product(range(x_max + 1), range(-y_max, y_max + 1)): if gcd(x, y) == 1 and (x, y) != (0, -1): result.append((-y, x)) return result

Notes:

The reason for the call to gcd is that, for example, the direction \$1, 1\$ is the same as \$2, 2\$, but we need to generate each direction just once to avoid double-counting.
The reason for the special case (x, y) != (0, -1) is that the directions \$0, 1\$ and \$0, −1\$ both have greatest common divisor 1, but represent the same set of lines, so we need to drop one of them to avoid double-counting.
I've made this a cached property using functools.lru_cache so that we can call it multiple times without worrying about the cost of recomputing the result.

For example:

>>> Dots(8, 8, 4).line_normals [(-1, 0), (2, 1), (1, 1), (0, 1), (-1, 1), (-2, 1), (1, 2), (-1, 2)]

5. Summary

The select_dot method updates self.goal_lines in time \$O\left({wh \over g^2}\right)\$, where \$w\$ and \$h\$ are the width and height of the grid, and \$g\$ is the goal (dots per line). This does not depend on the number of dots and so is constant for any particular game.

Stack Exchange Network

Count lines with more the X dots on it

1 Answer 1

1. Dynamic problem

2. Algorithm sketch

3. Representing lines

4. Finding the lines

5. Summary

You must log in to answer this question.

Hot Network Questions

Count lines with more the X dots on it

1 Answer 1

1. Dynamic problem

2. Algorithm sketch

3. Representing lines

4. Finding the lines

5. Summary

You must log in to answer this question.

Related

Hot Network Questions