Overview

mySpellChecker’s plugin architecture lets you replace or extend every major component — segmenters, dictionary providers, validation strategies, and POS taggers — by implementing the corresponding base class and passing your instance to SpellChecker.

Custom Segmenter

If you have a proprietary segmentation model or want to use a different library:

Subclass Segmenter.
Implement segment_syllables and segment_words.
Pass it to SpellChecker.

from myspellchecker.segmenters import Segmenter
from typing import List

class SpaceSegmenter(Segmenter):
    """Custom segmenter that uses spaces as word boundaries."""

    def segment_syllables(self, text: str) -> List[str]:
        # Your syllable segmentation logic here
        return list(text)

    def segment_words(self, text: str) -> List[str]:
        return text.split(" ")

# Use the custom segmenter
checker = SpellChecker(segmenter=SpaceSegmenter())

BiLSTM Segmenter Example

from myspellchecker.segmenters import Segmenter
import torch

class BiLSTMSegmenter(Segmenter):
    """Segmenter using a trained BiLSTM model."""

    def __init__(self, model_path: str):
        self.model = torch.load(model_path)

    def segment_syllables(self, text: str) -> List[str]:
        with torch.no_grad():
            # Your model inference logic
            predictions = self.model(text)
            return self._decode_predictions(predictions)

    def segment_words(self, text: str) -> List[str]:
        syllables = self.segment_syllables(text)
        # Group syllables into words based on your model
        return self._group_into_words(syllables)

Custom Dictionary Provider

To load data from a Redis cache, API, or other source:

Subclass DictionaryProvider.
Implement required abstract methods.

Redis Provider Example

from myspellchecker.providers import DictionaryProvider
from typing import List, Optional, Tuple
import redis

class RedisProvider(DictionaryProvider):
    """Dictionary provider backed by Redis cache."""

    def __init__(self, redis_client, prefix: str = "spell:"):
        self.redis = redis_client
        self.prefix = prefix

    def is_valid_word(self, word: str) -> bool:
        return self.redis.exists(f"{self.prefix}word:{word}")

    def is_valid_syllable(self, syllable: str) -> bool:
        return self.redis.exists(f"{self.prefix}syl:{syllable}")

    def get_word_frequency(self, word: str) -> int:
        freq = self.redis.get(f"{self.prefix}freq:{word}")
        return int(freq) if freq else 0

    def get_bigram_probability(self, word1: str, word2: str) -> float:
        prob = self.redis.get(f"{self.prefix}bigram:{word1}:{word2}")
        return float(prob) if prob else 0.0

    def get_trigram_probability(self, word1: str, word2: str, word3: str) -> float:
        prob = self.redis.get(f"{self.prefix}trigram:{word1}:{word2}:{word3}")
        return float(prob) if prob else 0.0

    def get_top_continuations(self, prev_word: str, limit: int = 20) -> List[Tuple[str, float]]:
        # Use Redis SCAN for prefix matching
        cursor = 0
        results = []
        while len(results) < limit:
            cursor, keys = self.redis.scan(
                cursor, f"{self.prefix}word:{prefix}*", count=100
            )
            for key in keys:
                word = key.decode().replace(f"{self.prefix}word:", "")
                freq = self.get_word_frequency(word)
                results.append((word, freq))
            if cursor == 0:
                break
        return sorted(results, key=lambda x: -x[1])[:limit]

    def close(self) -> None:
        self.redis.close()

# Usage
redis_client = redis.Redis(host='localhost', port=6379, db=0)
provider = RedisProvider(redis_client)
checker = SpellChecker(provider=provider)

REST API Provider Example

import requests
from myspellchecker.providers import DictionaryProvider

class APIProvider(DictionaryProvider):
    """Dictionary provider using REST API backend."""

    def __init__(self, base_url: str, api_key: Optional[str] = None):
        self.base_url = base_url.rstrip("/")
        self.session = requests.Session()
        if api_key:
            self.session.headers["Authorization"] = f"Bearer {api_key}"

    def is_valid_word(self, word: str) -> bool:
        response = self.session.get(f"{self.base_url}/words/{word}")
        return response.status_code == 200

    def get_word_frequency(self, word: str) -> int:
        response = self.session.get(f"{self.base_url}/frequency/{word}")
        if response.status_code == 200:
            return response.json().get("frequency", 0)
        return 0

    # ... implement other methods similarly

Custom Validation Strategies

The ContextValidator uses a strategy pattern for extensible validation.

Creating a Custom Strategy

from myspellchecker.core.validation_strategies import ValidationStrategy, ValidationContext
from myspellchecker.core.response import Error
from typing import List

class ProfanityFilterStrategy(ValidationStrategy):
    """Strategy to detect and flag profanity."""

    def __init__(self, blocked_words: List[str]):
        self.blocked_words = set(blocked_words)

    def priority(self) -> int:
        """Lower values run first. Default strategies use 10-70."""
        return 25  # Run after tone validation (10) but before POS (30)

    def validate(self, context: ValidationContext) -> List[Error]:
        errors = []
        for i, word in enumerate(context.words):
            if word.lower() in self.blocked_words:
                errors.append(Error(
                    text=word,
                    position=context.word_positions[i],
                    suggestions=["[redacted]"],
                    error_type="profanity",
                    confidence=1.0,
                ))
        return errors

# Register the strategy
from myspellchecker.core.context_validator import ContextValidator

strategies = [
    ProfanityFilterStrategy(["bad_word1", "bad_word2"]),
    # ... other strategies
]
validator = ContextValidator(config, segmenter, strategies=strategies)

Strategy Priority Guidelines

Priority	Category	Description
10	Tone	Tone mark disambiguation
15	Orthography	Medial order and compatibility validation
20	Syntactic	Grammar rule validation
25	Custom	Custom validation strategies
30	POS	Part-of-speech sequence validation
40	Question	Question particle validation
45	Homophone	Homophone confusion detection
50	N-gram	Statistical context validation
70	Semantic	AI-powered semantic validation

Factory Pattern Usage

mySpellChecker uses factories for creating configured components.

Using Component Factory

from myspellchecker.core.component_factory import ComponentFactory
from myspellchecker.core.config import SpellCheckerConfig

config = SpellCheckerConfig(
    max_edit_distance=2,
    use_phonetic=True,
    use_context_checker=True,
)

factory = ComponentFactory(config)

# Create individual components
symspell = factory.create_symspell(provider)
components = factory.create_all(provider, segmenter)

POS Tagger Factory

from myspellchecker.algorithms.pos_tagger_factory import POSTaggerFactory
from myspellchecker.core.config import POSTaggerConfig

# Create rule-based tagger (fast, no dependencies)
tagger = POSTaggerFactory.create("rule_based")

# Create Viterbi tagger (better accuracy)
tagger = POSTaggerFactory.create("viterbi", provider=provider)

# Create transformer tagger (best accuracy, requires torch)
tagger = POSTaggerFactory.create(
    "transformer",
    model_name="chuuhtetnaing/myanmar-pos-model",
    device=0  # GPU
)

Advanced Configuration Patterns

Environment-Based Configuration

from myspellchecker.core.config.loader import ConfigLoader

loader = ConfigLoader()

# Load from profile with environment overrides
config = loader.load(
    profile="production",
    use_env=True,  # Read MYSPELL_* environment variables
)

# Available environment variables:
# MYSPELL_MAX_EDIT_DISTANCE=3
# MYSPELL_MAX_SUGGESTIONS=10
# MYSPELL_USE_CONTEXT_CHECKER=true
# MYSPELL_DATABASE_PATH=/path/to/custom.db

Profile-Based Configuration

from myspellchecker.core.config.loader import load_config

# Fast profile - optimized for speed
config = load_config(profile="fast")

# Accurate profile - maximum accuracy
config = load_config(profile="accurate")

# Production profile - balanced
config = load_config(profile="production")

Programmatic Configuration

from myspellchecker.core.config import (
    SpellCheckerConfig,
    SymSpellConfig,
    NgramContextConfig,
    POSTaggerConfig,
)

config = SpellCheckerConfig(
    max_edit_distance=2,
    max_suggestions=10,
    use_phonetic=True,
    use_context_checker=True,
    symspell=SymSpellConfig(
        prefix_length=10,
        beam_width=150,
    ),
    ngram_context=NgramContextConfig(
        bigram_threshold=0.0005,
        trigram_threshold=0.00005,
    ),
    pos_tagger=POSTaggerConfig(
        tagger_type="transformer",
        model_name="chuuhtetnaing/myanmar-pos-model",
    ),
)

checker = SpellChecker(config=config)

Configuration from File

# myspellchecker.yaml
preset: production
max_edit_distance: 2
max_suggestions: 10

symspell:
  prefix_length: 10
  beam_width: 150

ngram_context:
  bigram_threshold: 0.0005
  trigram_threshold: 0.00005

pos_tagger:
  tagger_type: transformer
  model_name: chuuhtetnaing/myanmar-pos-model

from myspellchecker.core.config.loader import load_config

config = load_config(config_file="myspellchecker.yaml")

Getting Started

Dictionary Building

Spell Checking

Grammar

Language Processing

AI-Powered Checking

Text Utilities

Performance & Scale

Customization

Integration & Deployment

Help & FAQ

Custom Segmenter

BiLSTM Segmenter Example

Custom Dictionary Provider

Redis Provider Example

REST API Provider Example

Custom Validation Strategies

Creating a Custom Strategy

Strategy Priority Guidelines

Factory Pattern Usage

Using Component Factory

POS Tagger Factory

Advanced Configuration Patterns

Environment-Based Configuration

Profile-Based Configuration

Programmatic Configuration

Configuration from File

Getting Started

Dictionary Building

Spell Checking

Grammar

Language Processing

AI-Powered Checking

Text Utilities

Performance & Scale

Customization

Integration & Deployment

Help & FAQ

​Custom Segmenter

​BiLSTM Segmenter Example

​Custom Dictionary Provider

​Redis Provider Example

​REST API Provider Example

​Custom Validation Strategies

​Creating a Custom Strategy

​Strategy Priority Guidelines

​Factory Pattern Usage

​Using Component Factory

​POS Tagger Factory

​Advanced Configuration Patterns

​Environment-Based Configuration

​Profile-Based Configuration

​Programmatic Configuration

​Configuration from File

Custom Segmenter

BiLSTM Segmenter Example

Custom Dictionary Provider

Redis Provider Example

REST API Provider Example

Custom Validation Strategies

Creating a Custom Strategy

Strategy Priority Guidelines

Factory Pattern Usage

Using Component Factory

POS Tagger Factory

Advanced Configuration Patterns

Environment-Based Configuration

Profile-Based Configuration

Programmatic Configuration

Configuration from File