Skip to content

Core API

kmeanssa_ng.core

Core abstractions and algorithms for k-means on metric spaces.

Center

Bases: Point

Abstract base class for cluster centers.

A center is a special type of point that can move through the space using two mechanisms: - Brownian motion: Random exploration - Drift: Directed movement toward a target point

This class is used in simulated annealing for k-means clustering.

Source code in kmeanssa_ng/core/abstract.py
class Center(Point):
    """Abstract base class for cluster centers.

    A center is a special type of point that can move through the space
    using two mechanisms:
    - Brownian motion: Random exploration
    - Drift: Directed movement toward a target point

    This class is used in simulated annealing for k-means clustering.
    """

    @abstractmethod
    def brownian_motion(self, time_to_travel: float) -> None:
        """Perform random Brownian motion in the space.

        Args:
            time_to_travel: Time parameter controlling the magnitude of motion.
                Typical distance traveled is proportional to sqrt(time_to_travel).
        """
        raise NotImplementedError

    @abstractmethod
    def drift(self, target_point: Point, prop_to_travel: float) -> None:
        """Move toward a target point.

        Args:
            target_point: The point to move toward.
            prop_to_travel: Proportion of the distance to travel (between 0 and 1).
                0 means no movement, 1 means move all the way to target.
        """
        raise NotImplementedError

brownian_motion(time_to_travel) abstractmethod

Perform random Brownian motion in the space.

Parameters:

Name Type Description Default
time_to_travel float

Time parameter controlling the magnitude of motion. Typical distance traveled is proportional to sqrt(time_to_travel).

required
Source code in kmeanssa_ng/core/abstract.py
@abstractmethod
def brownian_motion(self, time_to_travel: float) -> None:
    """Perform random Brownian motion in the space.

    Args:
        time_to_travel: Time parameter controlling the magnitude of motion.
            Typical distance traveled is proportional to sqrt(time_to_travel).
    """
    raise NotImplementedError

drift(target_point, prop_to_travel) abstractmethod

Move toward a target point.

Parameters:

Name Type Description Default
target_point Point

The point to move toward.

required
prop_to_travel float

Proportion of the distance to travel (between 0 and 1). 0 means no movement, 1 means move all the way to target.

required
Source code in kmeanssa_ng/core/abstract.py
@abstractmethod
def drift(self, target_point: Point, prop_to_travel: float) -> None:
    """Move toward a target point.

    Args:
        target_point: The point to move toward.
        prop_to_travel: Proportion of the distance to travel (between 0 and 1).
            0 means no movement, 1 means move all the way to target.
    """
    raise NotImplementedError

Point

Bases: ABC

Abstract base class for points in a metric space.

A point is an element of a metric space with a fixed location. Concrete implementations must define which space the point belongs to.

Source code in kmeanssa_ng/core/abstract.py
class Point(ABC):
    """Abstract base class for points in a metric space.

    A point is an element of a metric space with a fixed location.
    Concrete implementations must define which space the point belongs to.
    """

    @property
    @abstractmethod
    def space(self) -> Space:
        """The metric space this point belongs to.

        Returns:
            The Space instance containing this point.
        """
        raise NotImplementedError

space abstractmethod property

The metric space this point belongs to.

Returns:

Type Description
Space

The Space instance containing this point.

SimulatedAnnealing

Simulated annealing for offline k-means clustering.

This algorithm solves the k-means problem on arbitrary metric spaces using simulated annealing. Centers perform Brownian motion (exploration) and drift toward observations (exploitation), with temperature controlled by an inhomogeneous Poisson process.

Attributes:

Name Type Description
space Space

The metric space containing the observations.

k int

Number of clusters.

observations list[Point]

List of points to cluster.

centers list[Center]

Current cluster centers.

Example
# Create observations and space
space = QuantumGraph(...)
points = space.sample_points(100)

# Run simulated annealing with the interleaved algorithm
sa = SimulatedAnnealing(points, k=5)
centers = sa.run_interleaved(robust_prop=0.1)
Source code in kmeanssa_ng/core/simulated_annealing.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
class SimulatedAnnealing:
    """Simulated annealing for offline k-means clustering.

    This algorithm solves the k-means problem on arbitrary metric spaces using
    simulated annealing. Centers perform Brownian motion (exploration) and drift
    toward observations (exploitation), with temperature controlled by an
    inhomogeneous Poisson process.

    Attributes:
        space: The metric space containing the observations.
        k: Number of clusters.
        observations: List of points to cluster.
        centers: Current cluster centers.

    Example:
        ```python
        # Create observations and space
        space = QuantumGraph(...)
        points = space.sample_points(100)

        # Run simulated annealing with the interleaved algorithm
        sa = SimulatedAnnealing(points, k=5)
        centers = sa.run_interleaved(robust_prop=0.1)
        ```
    """

    def __init__(
        self,
        observations: list[Point],
        k: int,
        lambda0: float = 1.0,
        beta0: float = 1.0,
        step_size: float = 0.1,
        energy_mode: str = "uniform",
        random_state: int | np.random.Generator | None = None,
    ) -> None:
        """Initialize the simulated annealing algorithm.

        Args:
            observations: List of points to cluster, all in the same metric space.
            k: Number of clusters.
            lambda0: Initial Brownian motion intensity parameter (must be > 0).
                Controls the magnitude of random exploration.

                Mathematical role: Scales the diffusion coefficient in the
                Brownian motion component. The standard deviation of each
                Brownian step is proportional to lambda0 * sqrt(step_size).

                Practical effect:
                - Higher values (1.5-3.0): More exploration, slower convergence,
                  better escape from local minima
                - Lower values (0.3-0.8): Less exploration, faster convergence,
                  higher risk of local minima
                - Recommended default: 1.0 for balanced exploration/exploitation

                TODO: Add reference to article on HAL/ArXiv when available.

            beta0: Initial drift intensity parameter (must be > 0).
                Controls how strongly centers are pulled toward observations.

                Mathematical role: The drift proportion at time t is computed as
                alpha(t) = min(h * beta0 * log(1 + t), 1) where h is the time
                interval. This controls the strength of attraction toward the
                nearest observation.

                Practical effect:
                - Higher values (2.0-5.0): Stronger drift, faster convergence,
                  more exploitation of current best positions
                - Lower values (0.3-0.8): Weaker drift, more exploration,
                  slower convergence
                - Recommended default: 1.0-2.0 for most cases

                TODO: Add reference to article on HAL/ArXiv when available.

            step_size: Time discretization step for the SDE solver (must be > 0).
                Controls the temporal resolution of the stochastic process.

                Mathematical role: Euler discretization step Δt for solving the
                stochastic differential equation. Smaller values give more
                accurate simulation at the cost of more computation.

                Practical effect:
                - Smaller values (0.001-0.01): More accurate simulation, slower
                - Larger values (0.05-0.1): Faster but less accurate
                - Recommended default: 0.01 for good accuracy/speed tradeoff
                - Rule of thumb: Use step_size much smaller than the typical
                  time scale of the Poisson process (~ 1/lambda0)

            energy_mode: Energy calculation mode, either "uniform" or "obs".
                TODO: Document the difference between these modes.

            random_state: Controls randomness for reproducibility.
                Determines random number generation for all random operations:
                - Shuffling observations
                - Poisson process time generation
                - Brownian motion (via centers)
                - Initialization strategies (KMeansPlusPlus, RandomInit)
                - Space-specific random operations

                Pass an int for reproducible results across multiple function calls.
                When an int is provided, both random.seed() and np.random.seed()
                are set globally to ensure full reproducibility across all components.

                Pass a Generator instance for fine-grained control without affecting
                global state (advanced usage).

                Pass None for non-deterministic behavior (default).

                Example:
                    >>> # Reproducible with seed (recommended)
                    >>> sa1 = SimulatedAnnealing(points, k=3, random_state=42)
                    >>> sa2 = SimulatedAnnealing(points, k=3, random_state=42)
                    >>> # sa1 and sa2 will produce identical results
                    >>>
                    >>> # Advanced: use Generator without global state
                    >>> rng = np.random.default_rng(42)
                    >>> sa = SimulatedAnnealing(points, k=3, random_state=rng)
                    >>> # Note: This only controls operations using self._rng
                    >>> # Other components may still use global random state

        Raises:
            ValueError: If observations is empty, k <= 0, points are in different spaces,
                or hyperparameters are invalid.

        References:
            TODO: Add reference to your article:
            [1] Your Name. "Title of your paper". HAL/ArXiv, 2025.
                URL: https://...

        Example:
            >>> # Quick convergence setup
            >>> sa = SimulatedAnnealing(
            ...     points, k=5,
            ...     lambda0=0.5,  # Less exploration
            ...     beta0=3.0,     # Stronger drift
            ...     step_size=0.01
            ... )
            >>>
            >>> # Thorough search setup (avoid local minima)
            >>> sa = SimulatedAnnealing(
            ...     points, k=5,
            ...     lambda0=2.0,   # More exploration
            ...     beta0=1.0,     # Gentler drift
            ...     step_size=0.01
            ... )
        """
        self._validate_constructor_parameters(
            observations, k, lambda0, beta0, step_size
        )
        self._initialize_random_generator(random_state)

        self._space = observations[0].space
        self._observations = observations.copy()
        self._k = k
        self._lambda = float(lambda0)
        self._beta = float(beta0)
        self._step_size = float(step_size)
        self._energy_mode = energy_mode

        # Use random.shuffle (global state) for consistency with rest of codebase
        # If we used self._rng.shuffle, it would desynchronize from global state
        random.shuffle(self._observations)
        self._centers: list[Center] = []

    def _validate_constructor_parameters(
        self,
        observations: list[Point],
        k: int,
        lambda0: float,
        beta0: float,
        step_size: float,
    ) -> None:
        """Validate parameters for the constructor."""
        if not observations:
            raise ValueError("Observations must be a non-empty list of points.")
        if k <= 0:
            raise ValueError("Number of clusters 'k' must be greater than zero.")
        if any(obs.space != observations[0].space for obs in observations):
            raise ValueError("All observations must belong to the same metric space.")

        self._validate_positive_float(lambda0, "lambda0")
        self._validate_positive_float(beta0, "beta0")
        self._validate_positive_float(step_size, "step_size")

    def _validate_positive_float(self, value: float, name: str) -> None:
        """Validate that a value is a positive float."""
        try:
            float_value = float(value)
        except (TypeError, ValueError) as e:
            raise ValueError(
                f"{name} must be a number, got {type(value).__name__}"
            ) from e
        if float_value <= 0:
            raise ValueError(f"{name} must be positive, got {float_value}")

    def _initialize_random_generator(
        self, random_state: int | np.random.Generator | None
    ) -> None:
        """Initialize the random number generator."""
        if isinstance(random_state, np.random.Generator):
            self._rng = random_state
        else:
            self._rng = np.random.default_rng(random_state)
            # Also set global random state for full reproducibility
            # This affects: initialization strategies, centers' brownian motion, space operations
            if isinstance(random_state, int):
                random.seed(random_state)
                np.random.seed(random_state)

    @property
    def n(self) -> int:
        """Number of observations."""
        return len(self._observations)

    @property
    def observations(self) -> list[Point]:
        """List of observation points."""
        return self._observations

    @property
    def centers(self) -> list[Center]:
        """Current cluster centers."""
        return self._centers

    @property
    def space(self) -> Space:
        """Metric space containing the observations."""
        return self._space

    @property
    def k(self) -> int:
        """Number of clusters."""
        return self._k

    def _clone_centers(self, centers: list[Center]) -> list[Center]:
        """Create independent copies of centers.

        Uses the clone() method if available (much faster than deepcopy),
        otherwise falls back to deepcopy for compatibility.

        Args:
            centers: List of centers to clone.

        Returns:
            List of cloned centers with independent state.
        """
        if hasattr(centers[0], "clone"):
            return [center.clone() for center in centers]
        else:
            # Fallback for custom Center implementations without clone()
            from copy import deepcopy

            return deepcopy(centers)

    def _initialize_times(self, n: int) -> np.ndarray:
        """Generate inhomogeneous Poisson times.

        Args:
            n: Number of time points to generate.

        Returns:
            Array of n+1 time points.
        """
        T = np.zeros(n + 1)
        poiss_sum = 0.0
        for i in range(n):
            # Use random.random() for consistency with global state
            poiss_sum += -1 / self._lambda * np.log(random.random())
            T[i + 1] = np.sqrt(poiss_sum + 1) - 1
        return T

    def calculate_energy_fallback(
        self, centers: list[Center], points: list[Point]
    ) -> float:
        """Calculate k-means energy for given centers.

        Args:
            centers: List of cluster centers.
            points: List of points.

        Returns:
            Average squared distance to nearest center.
        """
        energy = sum(
            min(self.space.distance(center, point) ** 2 for center in centers)
            for point in points
        )
        return energy / len(points)

    def calculate_energy(self, centers: list[Center]) -> float:
        """Calculate k-means energy for given centers based on the energy mode."""
        if (
            hasattr(self.space, "calculate_energy_numba")
            and self.space.calculate_energy_numba is not None
        ):
            return self.space.calculate_energy_numba(centers, how=self._energy_mode)
        return self.space.calculate_energy(centers, how=self._energy_mode)

    def _prepare_run(
        self,
        robust_prop: float,
        initialization_strategy: InitializationStrategy,
        robustification_strategy: RobustificationStrategy,
    ) -> tuple[int, RobustificationStrategy]:
        """Prepare the simulation by initializing centers and strategy."""
        if robust_prop < 0 or robust_prop > 1:
            raise ValueError("The proportion must be in [0,1]")

        i0 = int(np.floor((self.n - 1) * (1 - robust_prop)))

        self._centers = initialization_strategy.initialize_centers(self)

        robustification_strategy.initialize(self)
        return i0, robustification_strategy

    def run_interleaved(
        self,
        initialization_strategy: InitializationStrategy,
        robustification_strategy: RobustificationStrategy,
        robust_prop: float = 0.0,
    ):
        """Run SA with interleaved drift and brownian motion."""
        return self._run_algorithm(
            "interleaved",
            initialization_strategy,
            robustification_strategy,
            robust_prop,
        )

    def run_sequential(
        self,
        initialization_strategy: InitializationStrategy,
        robustification_strategy: RobustificationStrategy,
        robust_prop: float = 0.0,
    ):
        """Run SA with sequential brownian motion then drift."""
        return self._run_algorithm(
            "sequential",
            initialization_strategy,
            robustification_strategy,
            robust_prop,
        )

    def _run_algorithm(
        self,
        mode: str,
        initialization_strategy: InitializationStrategy,
        robustification_strategy: RobustificationStrategy,
        robust_prop: float = 0.0,
    ):
        """Core SA algorithm, supporting interleaved and sequential modes."""
        logger.info(
            "Starting %s SA: k=%d, n_obs=%d, lambda0=%.3f, beta0=%.3f, "
            "step_size=%.4f, robust_prop=%.2f",
            mode,
            self._k,
            self.n,
            self._lambda,
            self._beta,
            self._step_size,
            robust_prop,
        )

        i0, strategy = self._prepare_run(
            robust_prop, initialization_strategy, robustification_strategy
        )
        times = self._initialize_times(self.n)
        time = 0.0
        progress_interval = max(1, self.n // 10)

        for i, point in enumerate(self._observations):
            T = times[i] if mode == "interleaved" else times[i + 1]

            if i % progress_interval == 0 and i > 0:
                progress = 100 * i / self.n
                logger.info(
                    "Progress: %.1f%% (%d/%d observations processed)",
                    progress,
                    i,
                    self.n,
                )

            logger.debug("Processing observation %d, target time T=%.4f", i, T)

            if mode == "interleaved":
                while time <= T - self._step_size:
                    h = min(time + self._step_size, T) - time
                    prop = min(h * self._beta * np.log(1 + time), 1)
                    logger.debug(
                        "Time step: time=%.4f, h=%.4f, drift_prop=%.4f", time, h, prop
                    )
                    for center in self._centers:
                        center.brownian_motion(h)
                    distances = self.space.distances_from_centers(self._centers, point)
                    closest_idx = np.argmin(distances)
                    self._centers[closest_idx].drift(point, prop)
                    time += h
            else:  # sequential
                while time <= T - self._step_size:
                    h = min(time + self._step_size, T) - time
                    logger.debug("Brownian motion: time=%.4f, h=%.4f", time, h)
                    for center in self._centers:
                        center.brownian_motion(h)
                    time += h
                distances = self.space.distances_from_centers(self._centers, point)
                closest_idx = np.argmin(distances)
                prop = min((times[i + 1] - times[i]) * self._beta * np.log(1 + time), 1)
                logger.debug(
                    "Drift: closest_center=%d, drift_prop=%.4f", closest_idx, prop
                )
                self._centers[closest_idx].drift(point, prop)
                time = T

            if i >= i0:
                strategy.collect(self)
                logger.debug(
                    "Collected centers for robustification at observation %d", i
                )

        result = strategy.get_result()
        logger.info("%s SA completed successfully", mode.capitalize())
        return result

centers property

Current cluster centers.

k property

Number of clusters.

n property

Number of observations.

observations property

List of observation points.

space property

Metric space containing the observations.

__init__(observations, k, lambda0=1.0, beta0=1.0, step_size=0.1, energy_mode='uniform', random_state=None)

Initialize the simulated annealing algorithm.

Parameters:

Name Type Description Default
observations list[Point]

List of points to cluster, all in the same metric space.

required
k int

Number of clusters.

required
lambda0 float

Initial Brownian motion intensity parameter (must be > 0). Controls the magnitude of random exploration.

Mathematical role: Scales the diffusion coefficient in the Brownian motion component. The standard deviation of each Brownian step is proportional to lambda0 * sqrt(step_size).

Practical effect: - Higher values (1.5-3.0): More exploration, slower convergence, better escape from local minima - Lower values (0.3-0.8): Less exploration, faster convergence, higher risk of local minima - Recommended default: 1.0 for balanced exploration/exploitation

TODO: Add reference to article on HAL/ArXiv when available.

1.0
beta0 float

Initial drift intensity parameter (must be > 0). Controls how strongly centers are pulled toward observations.

Mathematical role: The drift proportion at time t is computed as alpha(t) = min(h * beta0 * log(1 + t), 1) where h is the time interval. This controls the strength of attraction toward the nearest observation.

Practical effect: - Higher values (2.0-5.0): Stronger drift, faster convergence, more exploitation of current best positions - Lower values (0.3-0.8): Weaker drift, more exploration, slower convergence - Recommended default: 1.0-2.0 for most cases

TODO: Add reference to article on HAL/ArXiv when available.

1.0
step_size float

Time discretization step for the SDE solver (must be > 0). Controls the temporal resolution of the stochastic process.

Mathematical role: Euler discretization step Δt for solving the stochastic differential equation. Smaller values give more accurate simulation at the cost of more computation.

Practical effect: - Smaller values (0.001-0.01): More accurate simulation, slower - Larger values (0.05-0.1): Faster but less accurate - Recommended default: 0.01 for good accuracy/speed tradeoff - Rule of thumb: Use step_size much smaller than the typical time scale of the Poisson process (~ 1/lambda0)

0.1
energy_mode str

Energy calculation mode, either "uniform" or "obs". TODO: Document the difference between these modes.

'uniform'
random_state int | Generator | None

Controls randomness for reproducibility. Determines random number generation for all random operations: - Shuffling observations - Poisson process time generation - Brownian motion (via centers) - Initialization strategies (KMeansPlusPlus, RandomInit) - Space-specific random operations

Pass an int for reproducible results across multiple function calls. When an int is provided, both random.seed() and np.random.seed() are set globally to ensure full reproducibility across all components.

Pass a Generator instance for fine-grained control without affecting global state (advanced usage).

Pass None for non-deterministic behavior (default).

Example: >>> # Reproducible with seed (recommended) >>> sa1 = SimulatedAnnealing(points, k=3, random_state=42) >>> sa2 = SimulatedAnnealing(points, k=3, random_state=42) >>> # sa1 and sa2 will produce identical results >>> >>> # Advanced: use Generator without global state >>> rng = np.random.default_rng(42) >>> sa = SimulatedAnnealing(points, k=3, random_state=rng) >>> # Note: This only controls operations using self._rng >>> # Other components may still use global random state

None

Raises:

Type Description
ValueError

If observations is empty, k <= 0, points are in different spaces, or hyperparameters are invalid.

References

TODO: Add reference to your article: [1] Your Name. "Title of your paper". HAL/ArXiv, 2025. URL: https://...

Example
Quick convergence setup

sa = SimulatedAnnealing( ... points, k=5, ... lambda0=0.5, # Less exploration ... beta0=3.0, # Stronger drift ... step_size=0.01 ... )

Thorough search setup (avoid local minima)

sa = SimulatedAnnealing( ... points, k=5, ... lambda0=2.0, # More exploration ... beta0=1.0, # Gentler drift ... step_size=0.01 ... )

Source code in kmeanssa_ng/core/simulated_annealing.py
def __init__(
    self,
    observations: list[Point],
    k: int,
    lambda0: float = 1.0,
    beta0: float = 1.0,
    step_size: float = 0.1,
    energy_mode: str = "uniform",
    random_state: int | np.random.Generator | None = None,
) -> None:
    """Initialize the simulated annealing algorithm.

    Args:
        observations: List of points to cluster, all in the same metric space.
        k: Number of clusters.
        lambda0: Initial Brownian motion intensity parameter (must be > 0).
            Controls the magnitude of random exploration.

            Mathematical role: Scales the diffusion coefficient in the
            Brownian motion component. The standard deviation of each
            Brownian step is proportional to lambda0 * sqrt(step_size).

            Practical effect:
            - Higher values (1.5-3.0): More exploration, slower convergence,
              better escape from local minima
            - Lower values (0.3-0.8): Less exploration, faster convergence,
              higher risk of local minima
            - Recommended default: 1.0 for balanced exploration/exploitation

            TODO: Add reference to article on HAL/ArXiv when available.

        beta0: Initial drift intensity parameter (must be > 0).
            Controls how strongly centers are pulled toward observations.

            Mathematical role: The drift proportion at time t is computed as
            alpha(t) = min(h * beta0 * log(1 + t), 1) where h is the time
            interval. This controls the strength of attraction toward the
            nearest observation.

            Practical effect:
            - Higher values (2.0-5.0): Stronger drift, faster convergence,
              more exploitation of current best positions
            - Lower values (0.3-0.8): Weaker drift, more exploration,
              slower convergence
            - Recommended default: 1.0-2.0 for most cases

            TODO: Add reference to article on HAL/ArXiv when available.

        step_size: Time discretization step for the SDE solver (must be > 0).
            Controls the temporal resolution of the stochastic process.

            Mathematical role: Euler discretization step Δt for solving the
            stochastic differential equation. Smaller values give more
            accurate simulation at the cost of more computation.

            Practical effect:
            - Smaller values (0.001-0.01): More accurate simulation, slower
            - Larger values (0.05-0.1): Faster but less accurate
            - Recommended default: 0.01 for good accuracy/speed tradeoff
            - Rule of thumb: Use step_size much smaller than the typical
              time scale of the Poisson process (~ 1/lambda0)

        energy_mode: Energy calculation mode, either "uniform" or "obs".
            TODO: Document the difference between these modes.

        random_state: Controls randomness for reproducibility.
            Determines random number generation for all random operations:
            - Shuffling observations
            - Poisson process time generation
            - Brownian motion (via centers)
            - Initialization strategies (KMeansPlusPlus, RandomInit)
            - Space-specific random operations

            Pass an int for reproducible results across multiple function calls.
            When an int is provided, both random.seed() and np.random.seed()
            are set globally to ensure full reproducibility across all components.

            Pass a Generator instance for fine-grained control without affecting
            global state (advanced usage).

            Pass None for non-deterministic behavior (default).

            Example:
                >>> # Reproducible with seed (recommended)
                >>> sa1 = SimulatedAnnealing(points, k=3, random_state=42)
                >>> sa2 = SimulatedAnnealing(points, k=3, random_state=42)
                >>> # sa1 and sa2 will produce identical results
                >>>
                >>> # Advanced: use Generator without global state
                >>> rng = np.random.default_rng(42)
                >>> sa = SimulatedAnnealing(points, k=3, random_state=rng)
                >>> # Note: This only controls operations using self._rng
                >>> # Other components may still use global random state

    Raises:
        ValueError: If observations is empty, k <= 0, points are in different spaces,
            or hyperparameters are invalid.

    References:
        TODO: Add reference to your article:
        [1] Your Name. "Title of your paper". HAL/ArXiv, 2025.
            URL: https://...

    Example:
        >>> # Quick convergence setup
        >>> sa = SimulatedAnnealing(
        ...     points, k=5,
        ...     lambda0=0.5,  # Less exploration
        ...     beta0=3.0,     # Stronger drift
        ...     step_size=0.01
        ... )
        >>>
        >>> # Thorough search setup (avoid local minima)
        >>> sa = SimulatedAnnealing(
        ...     points, k=5,
        ...     lambda0=2.0,   # More exploration
        ...     beta0=1.0,     # Gentler drift
        ...     step_size=0.01
        ... )
    """
    self._validate_constructor_parameters(
        observations, k, lambda0, beta0, step_size
    )
    self._initialize_random_generator(random_state)

    self._space = observations[0].space
    self._observations = observations.copy()
    self._k = k
    self._lambda = float(lambda0)
    self._beta = float(beta0)
    self._step_size = float(step_size)
    self._energy_mode = energy_mode

    # Use random.shuffle (global state) for consistency with rest of codebase
    # If we used self._rng.shuffle, it would desynchronize from global state
    random.shuffle(self._observations)
    self._centers: list[Center] = []

calculate_energy(centers)

Calculate k-means energy for given centers based on the energy mode.

Source code in kmeanssa_ng/core/simulated_annealing.py
def calculate_energy(self, centers: list[Center]) -> float:
    """Calculate k-means energy for given centers based on the energy mode."""
    if (
        hasattr(self.space, "calculate_energy_numba")
        and self.space.calculate_energy_numba is not None
    ):
        return self.space.calculate_energy_numba(centers, how=self._energy_mode)
    return self.space.calculate_energy(centers, how=self._energy_mode)

calculate_energy_fallback(centers, points)

Calculate k-means energy for given centers.

Parameters:

Name Type Description Default
centers list[Center]

List of cluster centers.

required
points list[Point]

List of points.

required

Returns:

Type Description
float

Average squared distance to nearest center.

Source code in kmeanssa_ng/core/simulated_annealing.py
def calculate_energy_fallback(
    self, centers: list[Center], points: list[Point]
) -> float:
    """Calculate k-means energy for given centers.

    Args:
        centers: List of cluster centers.
        points: List of points.

    Returns:
        Average squared distance to nearest center.
    """
    energy = sum(
        min(self.space.distance(center, point) ** 2 for center in centers)
        for point in points
    )
    return energy / len(points)

run_interleaved(initialization_strategy, robustification_strategy, robust_prop=0.0)

Run SA with interleaved drift and brownian motion.

Source code in kmeanssa_ng/core/simulated_annealing.py
def run_interleaved(
    self,
    initialization_strategy: InitializationStrategy,
    robustification_strategy: RobustificationStrategy,
    robust_prop: float = 0.0,
):
    """Run SA with interleaved drift and brownian motion."""
    return self._run_algorithm(
        "interleaved",
        initialization_strategy,
        robustification_strategy,
        robust_prop,
    )

run_sequential(initialization_strategy, robustification_strategy, robust_prop=0.0)

Run SA with sequential brownian motion then drift.

Source code in kmeanssa_ng/core/simulated_annealing.py
def run_sequential(
    self,
    initialization_strategy: InitializationStrategy,
    robustification_strategy: RobustificationStrategy,
    robust_prop: float = 0.0,
):
    """Run SA with sequential brownian motion then drift."""
    return self._run_algorithm(
        "sequential",
        initialization_strategy,
        robustification_strategy,
        robust_prop,
    )

Space

Bases: ABC

Abstract base class for metric spaces.

A metric space provides: - Distance computation between points - Sampling of random points and centers - Cluster computation and energy calculation

Source code in kmeanssa_ng/core/abstract.py
class Space(ABC):
    """Abstract base class for metric spaces.

    A metric space provides:
    - Distance computation between points
    - Sampling of random points and centers
    - Cluster computation and energy calculation
    """

    @abstractmethod
    def distance(self, p1: Point, p2: Point) -> float:
        """Compute the distance between two points.

        Args:
            p1: First point.
            p2: Second point.

        Returns:
            The distance between p1 and p2.
        """
        raise NotImplementedError

    def sample_points(self, n: int, strategy) -> list[Point]:
        """Sample n points using the specified sampling strategy.

        Args:
            n: Number of points to sample
            strategy: Sampling strategy defining the probability distribution.
                     Must be a SamplingStrategy instance specific to the space type.

        Returns:
            List of n sampled points

        Example:
            ```python
            # For quantum graphs
            from kmeanssa_ng.quantum_graph.sampling import UniformNodeSampling
            points = graph.sample_points(100, strategy=UniformNodeSampling())

            # For Riemannian manifolds
            from kmeanssa_ng.riemannian_manifold.sampling import UniformManifoldSampling
            points = manifold.sample_points(100, strategy=UniformManifoldSampling())
            ```

        Note:
            The strategy parameter is REQUIRED to avoid ambiguity about which
            probability distribution to use. Each space type has its own
            specific sampling strategies in space-specific modules.
        """
        return strategy.sample(self, n)

    @abstractmethod
    def compute_clusters(self, centers: list[Center]) -> None:
        """Assign points to their nearest center.

        This method typically updates internal state or annotations
        indicating which cluster each point belongs to.

        Args:
            centers: List of cluster centers.
        """
        raise NotImplementedError

    @abstractmethod
    def calculate_energy(self, centers: list[Center]) -> float:
        """Calculate the k-means energy (distortion) for given centers.

        The energy is the sum of squared distances from each point
        to its nearest center.

        Args:
            centers: List of cluster centers.

        Returns:
            The total energy (sum of squared distances).
        """
        raise NotImplementedError

    @abstractmethod
    def distances_from_centers(self, centers: list[Center], target: Point):
        """Compute distances from multiple centers to a single target point.

        This method is used by the simulated annealing algorithm to efficiently
        find the nearest center to a given observation point.

        Args:
            centers: List of k centers to compute distances from.
            target: The target point.

        Returns:
            Array of shape (k,) with distances from each center to target.

        Example:
            ```python
            centers = space.sample_centers(5)
            target = space.sample_points(1)[0]
            distances = space.distances_from_centers(centers, target)
            closest_idx = np.argmin(distances)
            ```
        """
        raise NotImplementedError

    @abstractmethod
    def center_from_point(self, point: Point) -> Center:
        """Create a Center object from a Point object."""
        raise NotImplementedError

calculate_energy(centers) abstractmethod

Calculate the k-means energy (distortion) for given centers.

The energy is the sum of squared distances from each point to its nearest center.

Parameters:

Name Type Description Default
centers list[Center]

List of cluster centers.

required

Returns:

Type Description
float

The total energy (sum of squared distances).

Source code in kmeanssa_ng/core/abstract.py
@abstractmethod
def calculate_energy(self, centers: list[Center]) -> float:
    """Calculate the k-means energy (distortion) for given centers.

    The energy is the sum of squared distances from each point
    to its nearest center.

    Args:
        centers: List of cluster centers.

    Returns:
        The total energy (sum of squared distances).
    """
    raise NotImplementedError

center_from_point(point) abstractmethod

Create a Center object from a Point object.

Source code in kmeanssa_ng/core/abstract.py
@abstractmethod
def center_from_point(self, point: Point) -> Center:
    """Create a Center object from a Point object."""
    raise NotImplementedError

compute_clusters(centers) abstractmethod

Assign points to their nearest center.

This method typically updates internal state or annotations indicating which cluster each point belongs to.

Parameters:

Name Type Description Default
centers list[Center]

List of cluster centers.

required
Source code in kmeanssa_ng/core/abstract.py
@abstractmethod
def compute_clusters(self, centers: list[Center]) -> None:
    """Assign points to their nearest center.

    This method typically updates internal state or annotations
    indicating which cluster each point belongs to.

    Args:
        centers: List of cluster centers.
    """
    raise NotImplementedError

distance(p1, p2) abstractmethod

Compute the distance between two points.

Parameters:

Name Type Description Default
p1 Point

First point.

required
p2 Point

Second point.

required

Returns:

Type Description
float

The distance between p1 and p2.

Source code in kmeanssa_ng/core/abstract.py
@abstractmethod
def distance(self, p1: Point, p2: Point) -> float:
    """Compute the distance between two points.

    Args:
        p1: First point.
        p2: Second point.

    Returns:
        The distance between p1 and p2.
    """
    raise NotImplementedError

distances_from_centers(centers, target) abstractmethod

Compute distances from multiple centers to a single target point.

This method is used by the simulated annealing algorithm to efficiently find the nearest center to a given observation point.

Parameters:

Name Type Description Default
centers list[Center]

List of k centers to compute distances from.

required
target Point

The target point.

required

Returns:

Type Description

Array of shape (k,) with distances from each center to target.

Example
centers = space.sample_centers(5)
target = space.sample_points(1)[0]
distances = space.distances_from_centers(centers, target)
closest_idx = np.argmin(distances)
Source code in kmeanssa_ng/core/abstract.py
@abstractmethod
def distances_from_centers(self, centers: list[Center], target: Point):
    """Compute distances from multiple centers to a single target point.

    This method is used by the simulated annealing algorithm to efficiently
    find the nearest center to a given observation point.

    Args:
        centers: List of k centers to compute distances from.
        target: The target point.

    Returns:
        Array of shape (k,) with distances from each center to target.

    Example:
        ```python
        centers = space.sample_centers(5)
        target = space.sample_points(1)[0]
        distances = space.distances_from_centers(centers, target)
        closest_idx = np.argmin(distances)
        ```
    """
    raise NotImplementedError

sample_points(n, strategy)

Sample n points using the specified sampling strategy.

Parameters:

Name Type Description Default
n int

Number of points to sample

required
strategy

Sampling strategy defining the probability distribution. Must be a SamplingStrategy instance specific to the space type.

required

Returns:

Type Description
list[Point]

List of n sampled points

Example
# For quantum graphs
from kmeanssa_ng.quantum_graph.sampling import UniformNodeSampling
points = graph.sample_points(100, strategy=UniformNodeSampling())

# For Riemannian manifolds
from kmeanssa_ng.riemannian_manifold.sampling import UniformManifoldSampling
points = manifold.sample_points(100, strategy=UniformManifoldSampling())
Note

The strategy parameter is REQUIRED to avoid ambiguity about which probability distribution to use. Each space type has its own specific sampling strategies in space-specific modules.

Source code in kmeanssa_ng/core/abstract.py
def sample_points(self, n: int, strategy) -> list[Point]:
    """Sample n points using the specified sampling strategy.

    Args:
        n: Number of points to sample
        strategy: Sampling strategy defining the probability distribution.
                 Must be a SamplingStrategy instance specific to the space type.

    Returns:
        List of n sampled points

    Example:
        ```python
        # For quantum graphs
        from kmeanssa_ng.quantum_graph.sampling import UniformNodeSampling
        points = graph.sample_points(100, strategy=UniformNodeSampling())

        # For Riemannian manifolds
        from kmeanssa_ng.riemannian_manifold.sampling import UniformManifoldSampling
        points = manifold.sample_points(100, strategy=UniformManifoldSampling())
        ```

    Note:
        The strategy parameter is REQUIRED to avoid ambiguity about which
        probability distribution to use. Each space type has its own
        specific sampling strategies in space-specific modules.
    """
    return strategy.sample(self, n)

run_parallel(space, n_points, k, sampling_strategy, initialization_strategy, robustification_strategy, n_runs=10, algorithm='interleaved', lambda0=1, beta0=1.0, step_size=0.1, energy_mode='uniform', robust_prop=0.0, n_jobs=-1, seeds=None, return_all=False, mp_context=None)

Run simulated annealing multiple times in parallel with different seeds.

This function executes n_runs independent simulated annealing runs in parallel, each with a different random seed. Each run samples its own observations, generates its own Poisson process, and initializes differently, ensuring complete independence between runs.

Parameters:

Name Type Description Default
space 'Space'

The metric space to sample points from.

required
n_points int

Number of points to sample for each run.

required
k int

Number of clusters.

required
n_runs int

Number of parallel runs to execute.

10
algorithm Literal['interleaved', 'sequential']

Which algorithm variant to use ("interleaved" or "sequential").

'interleaved'
lambda_param

Poisson process intensity parameter (must be > 0).

required
beta

Inverse temperature parameter (must be > 0).

required
step_size float

Time step for updating centers (must be > 0).

0.1
sampling_strategy SamplingStrategy

Strategy for sampling points from the space (required).

required
initialization_strategy InitializationStrategy

Strategy for initializing centers (required).

required
robustification_strategy RobustificationStrategy

Strategy for robustifying results (required).

required
robust_prop float

Proportion of final observations to use for robustification (0-1).

0.0
n_jobs int

Number of parallel jobs. -1 uses all available cores.

-1
seeds list[int] | None

Optional list of specific seeds to use. If None, generates random seeds.

None
return_all bool

If True, return all results; if False, return only the best.

False
mp_context Literal['fork', 'spawn', 'forkserver'] | None

Multiprocessing context to use ('fork', 'spawn', 'forkserver'). If None, uses the system default. Use 'fork' for Jupyter/Quarto compatibility.

None

Returns:

Type Description
list[Center] | tuple[list[Center], list[tuple[list[Center], float, int]]]

If return_all is False: List of best centers (lowest energy).

list[Center] | tuple[list[Center], list[tuple[list[Center], float, int]]]

If return_all is True: Tuple of (best_centers, all_results) where all_results is a list of (centers, energy, seed) tuples sorted by energy.

Raises:

Type Description
ValueError

If n_runs <= 0 or other parameters are invalid.

Example
from kmeanssa_ng import run_parallel

# Generate a graph
graph = QuantumGraph(...)

# Run 10 parallel executions, each sampling its own 100 points
best_centers = run_parallel(graph, n_points=100, k=5, n_runs=10)

# Get all results for analysis
best, all_results = run_parallel(graph, n_points=100, k=5, n_runs=10, return_all=True)
for centers, energy, seed in all_results:
    print(f"Seed {seed}: energy = {energy:.4f}")
Source code in kmeanssa_ng/core/parallel.py
def run_parallel(
    space: "Space",
    n_points: int,
    k: int,
    sampling_strategy: SamplingStrategy,
    initialization_strategy: InitializationStrategy,
    robustification_strategy: RobustificationStrategy,
    n_runs: int = 10,
    algorithm: Literal["interleaved", "sequential"] = "interleaved",
    lambda0: float = 1,
    beta0: float = 1.0,
    step_size: float = 0.1,
    energy_mode: str = "uniform",
    robust_prop: float = 0.0,
    n_jobs: int = -1,
    seeds: list[int] | None = None,
    return_all: bool = False,
    mp_context: Literal["fork", "spawn", "forkserver"] | None = None,
) -> list[Center] | tuple[list[Center], list[tuple[list[Center], float, int]]]:
    """Run simulated annealing multiple times in parallel with different seeds.

    This function executes n_runs independent simulated annealing runs in parallel,
    each with a different random seed. Each run samples its own observations,
    generates its own Poisson process, and initializes differently, ensuring
    complete independence between runs.

    Args:
        space: The metric space to sample points from.
        n_points: Number of points to sample for each run.
        k: Number of clusters.
        n_runs: Number of parallel runs to execute.
        algorithm: Which algorithm variant to use ("interleaved" or "sequential").
        lambda_param: Poisson process intensity parameter (must be > 0).
        beta: Inverse temperature parameter (must be > 0).
        step_size: Time step for updating centers (must be > 0).
        sampling_strategy: Strategy for sampling points from the space (required).
        initialization_strategy: Strategy for initializing centers (required).
        robustification_strategy: Strategy for robustifying results (required).
        robust_prop: Proportion of final observations to use for robustification (0-1).
        n_jobs: Number of parallel jobs. -1 uses all available cores.
        seeds: Optional list of specific seeds to use. If None, generates random seeds.
        return_all: If True, return all results; if False, return only the best.
        mp_context: Multiprocessing context to use ('fork', 'spawn', 'forkserver').
            If None, uses the system default. Use 'fork' for Jupyter/Quarto compatibility.

    Returns:
        If return_all is False: List of best centers (lowest energy).
        If return_all is True: Tuple of (best_centers, all_results) where all_results
            is a list of (centers, energy, seed) tuples sorted by energy.

    Raises:
        ValueError: If n_runs <= 0 or other parameters are invalid.

    Example:
        ```python
        from kmeanssa_ng import run_parallel

        # Generate a graph
        graph = QuantumGraph(...)

        # Run 10 parallel executions, each sampling its own 100 points
        best_centers = run_parallel(graph, n_points=100, k=5, n_runs=10)

        # Get all results for analysis
        best, all_results = run_parallel(graph, n_points=100, k=5, n_runs=10, return_all=True)
        for centers, energy, seed in all_results:
            print(f"Seed {seed}: energy = {energy:.4f}")
        ```
    """
    if n_runs <= 0:
        raise ValueError(f"n_runs must be positive, got {n_runs}")

    # Check n_jobs and issue a warning if it's too high
    import os
    import warnings

    cpu_count = os.cpu_count() or 1
    if n_jobs > cpu_count:
        warnings.warn(
            f"n_jobs={n_jobs} is greater than the number of available CPUs ({cpu_count}). "
            "This may lead to performance degradation.",
            UserWarning,
        )

    # Determine number of workers
    if n_jobs == -1:
        n_jobs = cpu_count
    elif n_jobs == -2:
        n_jobs = max(1, cpu_count - 1)

    # Generate seeds if not provided
    if seeds is None:
        rng = np.random.default_rng()
        seeds = rng.integers(0, 2**31, size=n_runs).tolist()
    elif len(seeds) != n_runs:
        raise ValueError(f"Length of seeds ({len(seeds)}) must match n_runs ({n_runs})")

    # Run all jobs in parallel
    results: list[tuple[list[Center], float, int]] = []

    # Set up multiprocessing context if specified
    executor_kwargs = {"max_workers": n_jobs}
    if mp_context is not None:
        if mp_context not in mp.get_all_start_methods():
            raise ValueError(
                f"Multiprocessing context '{mp_context}' not available on this platform. "
                f"Available: {mp.get_all_start_methods()}"
            )
        executor_kwargs["mp_context"] = mp.get_context(mp_context)

    with ProcessPoolExecutor(**executor_kwargs) as executor:
        # Submit all jobs
        futures = [
            executor.submit(
                _run_with_seed,
                space,
                n_points,
                k,
                seed,
                algorithm,
                lambda0,
                beta0,
                step_size,
                energy_mode,
                robust_prop,
                sampling_strategy,
                initialization_strategy,
                robustification_strategy,
            )
            for seed in seeds
        ]

        # Collect results as they complete
        for future in as_completed(futures):
            centers, energy, seed = future.result()
            results.append((centers, energy, seed))

    # Sort by energy (best first)
    results.sort(key=lambda x: x[1])

    # Return results
    if return_all:
        return results[0][0], results
    else:
        return results[0][0]

run_parallel_with_callback(space, n_points, k, sampling_strategy, initialization_strategy, robustification_strategy, n_runs=10, algorithm='interleaved', lambda0=1.0, beta0=1.0, step_size=0.1, energy_mode='uniform', robust_prop=0.0, n_jobs=-1, seeds=None, callback=None, mp_context=None)

Run parallel simulated annealing with progress callback.

Similar to run_parallel but calls a callback function after each run completes, useful for progress tracking and real-time monitoring. Each run samples its own observations with its specific seed.

Parameters:

Name Type Description Default
space 'Space'

The metric space to sample points from.

required
n_points int

Number of points to sample for each run.

required
k int

Number of clusters.

required
n_runs int

Number of parallel runs to execute.

10
algorithm Literal['interleaved', 'sequential']

Which algorithm variant to use.

'interleaved'
lambda_param

Poisson process intensity parameter.

required
beta

Inverse temperature parameter.

required
step_size float

Time step for updating centers.

0.1
robust_prop float

Proportion for robustification.

0.0
n_jobs int

Number of parallel jobs (-1 = all cores).

-1
seeds list[int] | None

Optional list of specific seeds.

None
callback Callable[[int, int, float], None] | None

Optional function(run_index, seed, energy) called after each run.

None
mp_context Literal['fork', 'spawn', 'forkserver'] | None

Multiprocessing context to use ('fork', 'spawn', 'forkserver'). If None, uses the system default. Use 'fork' for Jupyter/Quarto compatibility.

None

Returns:

Type Description
list[Center]

List of best centers (lowest energy).

Example
def progress_callback(run_idx, seed, energy):
    print(f"Run {run_idx+1}/{n_runs}: energy = {energy:.4f} (seed={seed})")

graph = QuantumGraph(...)
centers = run_parallel_with_callback(
    graph, n_points=100, k=5, n_runs=10, callback=progress_callback
)
Source code in kmeanssa_ng/core/parallel.py
def run_parallel_with_callback(
    space: "Space",
    n_points: int,
    k: int,
    sampling_strategy: SamplingStrategy,
    initialization_strategy: InitializationStrategy,
    robustification_strategy: RobustificationStrategy,
    n_runs: int = 10,
    algorithm: Literal["interleaved", "sequential"] = "interleaved",
    lambda0: float = 1.0,
    beta0: float = 1.0,
    step_size: float = 0.1,
    energy_mode: str = "uniform",
    robust_prop: float = 0.0,
    n_jobs: int = -1,
    seeds: list[int] | None = None,
    callback: Callable[[int, int, float], None] | None = None,
    mp_context: Literal["fork", "spawn", "forkserver"] | None = None,
) -> list[Center]:
    """Run parallel simulated annealing with progress callback.

    Similar to run_parallel but calls a callback function after each run completes,
    useful for progress tracking and real-time monitoring. Each run samples its own
    observations with its specific seed.

    Args:
        space: The metric space to sample points from.
        n_points: Number of points to sample for each run.
        k: Number of clusters.
        n_runs: Number of parallel runs to execute.
        algorithm: Which algorithm variant to use.
        lambda_param: Poisson process intensity parameter.
        beta: Inverse temperature parameter.
        step_size: Time step for updating centers.
        robust_prop: Proportion for robustification.
        n_jobs: Number of parallel jobs (-1 = all cores).
        seeds: Optional list of specific seeds.
        callback: Optional function(run_index, seed, energy) called after each run.
        mp_context: Multiprocessing context to use ('fork', 'spawn', 'forkserver').
            If None, uses the system default. Use 'fork' for Jupyter/Quarto compatibility.

    Returns:
        List of best centers (lowest energy).

    Example:
        ```python
        def progress_callback(run_idx, seed, energy):
            print(f"Run {run_idx+1}/{n_runs}: energy = {energy:.4f} (seed={seed})")

        graph = QuantumGraph(...)
        centers = run_parallel_with_callback(
            graph, n_points=100, k=5, n_runs=10, callback=progress_callback
        )
        ```
    """
    if n_runs <= 0:
        raise ValueError(f"n_runs must be positive, got {n_runs}")

    # Check n_jobs and issue a warning if it's too high
    import os
    import warnings

    cpu_count = os.cpu_count() or 1
    if n_jobs > cpu_count:
        warnings.warn(
            f"n_jobs={n_jobs} is greater than the number of available CPUs ({cpu_count}). "
            "This may lead to performance degradation.",
            UserWarning,
        )

    # Determine number of workers
    if n_jobs == -1:
        n_jobs = cpu_count
    elif n_jobs == -2:
        n_jobs = max(1, cpu_count - 1)

    # Generate seeds if not provided
    if seeds is None:
        rng = np.random.default_rng()
        seeds = rng.integers(0, 2**31, size=n_runs).tolist()
    elif len(seeds) != n_runs:
        raise ValueError(f"Length of seeds ({len(seeds)}) must match n_runs ({n_runs})")

    # Run all jobs in parallel with progress tracking
    results: list[tuple[list[Center], float, int]] = []
    completed_count = 0

    # Set up multiprocessing context if specified
    executor_kwargs = {"max_workers": n_jobs}
    if mp_context is not None:
        if mp_context not in mp.get_all_start_methods():
            raise ValueError(
                f"Multiprocessing context '{mp_context}' not available on this platform. "
                f"Available: {mp.get_all_start_methods()}"
            )
        executor_kwargs["mp_context"] = mp.get_context(mp_context)

    with ProcessPoolExecutor(**executor_kwargs) as executor:
        # Submit all jobs
        future_to_index = {
            executor.submit(
                _run_with_seed,
                space,
                n_points,
                k,
                seed,
                algorithm,
                lambda0,
                beta0,
                step_size,
                energy_mode,
                robust_prop,
                sampling_strategy,
                initialization_strategy,
                robustification_strategy,
            ): (idx, seed)
            for idx, seed in enumerate(seeds)
        }

        # Collect results as they complete
        for future in as_completed(future_to_index):
            idx, seed = future_to_index[future]
            centers, energy, result_seed = future.result()
            results.append((centers, energy, result_seed))
            completed_count += 1

            # Call callback if provided
            if callback is not None:
                callback(idx, result_seed, energy)

    # Sort by energy and return best
    results.sort(key=lambda x: x[1])
    return results[0][0]