Introduction to error-correcting codes#

FCCT 1990 Hill, Raymond. A First Course in Coding Theory. Clarendon Press: Oxford Applied Mathematics and Computing Science Series.


Programming Environment#

Hide code cell source
import numpy  as np
import pandas as pd

import matplotlib        as mpl
import matplotlib.pyplot as plt
plt.style.use('ggplot');
plt.rcParams.update({'text.usetex' : True});
%matplotlib inline

from   collections import defaultdict
from   itertools   import combinations,product
import itertools

from typing import Set

from IPython.display import display, Math

from   datetime import datetime as d
import locale                   as l
import platform                 as p
import sys                      as s

pad = 20
print(f"{'Executed'.upper():<{pad}}: {d.now()}")
print()
print(f"{'Platform'   :<{pad}}: "
      f"{p.mac_ver()[0]} | "
      f"{p.system()} | "
      f"{p.release()} | "
      f"{p.machine()}")
print(f"{''           :<{pad}}: {l.getpreferredencoding()}")
print()
print(f"{'Python'     :<{pad}}: {s.version}")
print(f"{''           :<{pad}}: {s.version_info}")
print(f"{''           :<{pad}}: {p.python_implementation()}")
print()
print(f"{'Matplotlib' :<{pad}}: {mpl.__version__}")
print(f"{'NumPy'      :<{pad}}: {np .__version__}")
EXECUTED            : 2024-05-21 15:46:16.115980

Platform            : 14.4.1 | Darwin | 23.4.0 | arm64
                    : UTF-8

Python              : 3.11.9 | packaged by conda-forge | (main, Apr 19 2024, 18:34:54) [Clang 16.0.6 ]
                    : sys.version_info(major=3, minor=11, micro=9, releaselevel='final', serial=0)
                    : CPython

Matplotlib          : 3.8.4
NumPy               : 1.26.4
Hide code cell source
def rc (q : int,
        n : int) -> Set[str]:
  """Repetition Code
  Generate a q-ary repetition block code of length n.
  """
  S=set()
  for i in range(q):
    S.add(str(i)*n)
  return S

def fqn (q : int,
         n : int,
         g : int = 0) -> Set[str]:
  """Construct a linear space of dimension n over a finite field of order q.
  
  Parameters
  ==========
  g : If the space is very large, opt for the first g elements of a generator object.
  """
  if bool(g):
    f=itertools.product(range(q),repeat=n)
    return set(''.join(str(i) for i in next(f)) for _ in range(g))
  else:
    return {''.join(str(bit) for bit in word) for word in itertools.product(range(q),repeat=n)}

def qarycode_to_nbitstring (code={'3121','2101'},k=4):
  """Convert a q-ary code """
  for n in code:
    print(' '.join(format(int(i),f'0{k}b') for i in n))

def hd (a : str = '1001',
        b : str = '0101') -> int:
  """HAMMING DISTANCE
  
  Parameters
  ==========
  x : str
  y : str

  Return
  ======
  int
  """
  assert len(a) == len(b), 'x and y must have the same length'
  return sum(x!=y for x,y in zip(a,b))

def nbfmd (c  : Set[str],
           pr : bool = False) -> np.float16:
  """NAIVE BRUTE FORCE MINIMUM DISTANCE d(C)

  Computes the pairwise Hamming distance for all codewords and returns the minimum value.

  This is a naive (i.e., non vectorized) implementation using nested for loops.
  
  Parameters
  ==========
  c  : code
  pr : Print intermediate steps.

  Returns
  =======
  d(C)
  """

  # convert a set of string vectors to a 2D NumPy array of integers
  c=np.array([list(codeword) for codeword in c],dtype=np.float16)

  # intialize empty hamming distance matrix
  hamming = np.empty([c.shape[0]]*2,dtype=np.float16)
  for i,x in enumerate(c):
    for j,y in enumerate(c):
      hamming[i,j]=(x!=y).sum()
  # the diagonal represents the Hamming distance of a codeword with itself, which is always 0.
  np.fill_diagonal(hamming,np.inf)

  if pr == True:
    print(hamming)

  return hamming.min().astype(np.int8)

def one_error_detecting (q    : int,
                         code : Set[str],
                         p    : bool = False) -> bool:
  """Verify that a code is one-error detecting.
  No one-bit error equals a codeword.
  """
  flag=True
  alphabet=set(str(i) for i in range(q))
  for codeword in code:
    if p:
      print()
      print(f"{'orig cw : ':10}{codeword}")
    for i in range(len(codeword)):
      a,b,c=codeword[:i],codeword[i],codeword[i+1:]
      symbols=alphabet-set(codeword[i])
      for symbol in symbols:
        cw=codeword[:i]+symbol+codeword[i+1:] # SINGLE ERROR
        if cw in code:
          flag=False
          if p:
            print(f"{'ERROR':10}{cw}")
        else:
          if p:
            print(f"{'':10}{cw}")
  return flag

# set(''.join(l for l in i) for i in itertools.product('10',repeat=3))
# set(''.join(l for l in i) for i in itertools.combinations_with_replacement('012',r=3))
# set(''.join(l for l in i) for i in itertools.combinations('01',r=2))

Redundant symbols add protection to the message, but are themselves subject to error; so there is no way to guarantee accuracy.

The goal is to maximize the probability of accuracy.

A good code is one in which the codewords resemble each other as little as possible.


Q-Ary Code#

[Q-ARY CODE]

A q-ary code is a set of sequences of symbols where each symbol \(\lambda_i\) is chosed from a set

\( F_q=\{\lambda_1,\lambda_2,...,\lambda_q\} \)

of \(q\) distinct elements called the alphabet which is often the set

\( Z_q=\{0,1,2,...,q-1\} \)

If \(q\) is a prime power (if \(q=p^h\) for some prime number \(p\) and some positive integer \(h\)) then the alphabet \(F_q\) is the finite field of order \(q\).

[EXAMPLE]

The set of all words in the English language is a code over the 26-letter alphabet \(F_{26}=\{\text{A},\text{B},...,\text{Z}\}\)

[EXAMPLE]

The set of all street names is a code over the 27-ary alphabet \(F_{27}=\{\text{A},\text{B},...,\text{Z},\langle\text{SPACE}\rangle\}\)

Binary Code#

[BINARY CODE or 2-ARY CODE]

A binary code is a set of sequences of 0s and 1s called codewords.

Since \(2=2^1\) is a prime power, the alphabet \(F_2=\{0,1\}=Z_2\) is the finite field of order \(2\).

Ternary Code#

[TERNARY CODE or 3-ARY CODE]

A ternary code is a set of sequences of 0s, 1s, and 2s.

Since \(3=3^1\) is a prime power, the alphabet \(F_3=\{0,1,2\}=Z_3\) is the finite field of order \(3\).


Block Code of length n#

[BLOCK CODE OF LENGTH N]

A block code of length \(n\) is a code in which each codeword is a sequence consisting of a fixed number \(n\) of symbols.

A block code \(C\) with \(M\) codewords of length \(n\) is written as an \(M\times n\) array whose rows are the codewords of \(C\).


Repetition Code of length n#

[REPETITION CODE OF LENGTH N]

A repetition code is a code in which each codeword is the repetition of some symbol \(n\) times.

The \(q\)-ary repetition code of length \(n\) whose codewords are

\( \begin{matrix} 0 & 0 & \dots & 0\\ 1 & 1 & \dots & 1\\ \vdots & \vdots & \ddots & \vdots\\ (q-1) & (q-1) & \dots & (q-1)\\ \end{matrix} \)

is an \((n,q,n)\)-code.


\((F_q)^n\)#

Let \((F_q)^n\) denote the set of all ordered \(n\)-tuples \(\mathbf{a}=a_1a_2...a_n\) where each \(a_i\in F_q\).

The elements of \((F_q)^n\) are called vectors or words.

The order of the set \((F_q)^n\) is \(q^n\).

A \(q\)-ary block code of length \(n\) is a subset of \((F_q)^n\).

[EXAMPLE]

\( (F_2)^3 \)

fqn(2,3)
{'000', '001', '010', '011', '100', '101', '110', '111'}
rc(2,3)
{'000', '111'}
rc(2,3).issubset(fqn(2,3))
True

[EXAMPLE]

The set of all \(10\)-digit telephone numbers in the UK is a \(10\)-ary block code of length \(10\).

Little thought appears to have been given to allocating numbers so that the frequency of wrong numbers is minimized.

Even so, this code of about \(82\) million \(10\)-digit telephone numbers such that if just one digit of a number is misdialled the correct connection can still be made.

q=10
n=10
display(Math(
  r"\begin{aligned}"
  r"q&=10\\"
  r"n&=10\\"
  f"q^n&={q**n:,}"
  r"\end{aligned}"
))
\[\begin{split}\displaystyle \begin{aligned}q&=10\\n&=10\\q^n&=10,000,000,000\end{aligned}\end{split}\]

Hamming distance#

[HAMMING DISTANCE]

The Hamming distance \(d(\mathbf{x,y})\) between two vectors \(\mathbf{x}\) and \(\mathbf{y}\) of \((F_q)^n\) is the number of places in which they differ.

The Hamming distance is a metric because it satisfies the following three conditions.

\( \begin{aligned} d(\mathbf{x,y})&=0\iff\mathbf{x=y} \\ d(\mathbf{x,y})&=d(\mathbf{y,x})\,\,\,\forall\mathbf{x,y}\in(F_q)^n \\ d(\mathbf{x,y})&\le d(\mathbf{x,z})+d(\mathbf{z,y})\,\,\,\forall\mathbf{x,y,z}\in(F_q)^n \,\,\,\text{triangle inequality} \end{aligned} \)

Triangle Inequality

  • \(d(\mathbf{x,y})\) is the minimum number of changes of digits (or bit flips) required to change \(\mathbf{x}\) to \(\mathbf{y}\).

  • We can also change \(\mathbf{x}\) to \(\mathbf{y}\) by first making \(d(\mathbf{x,z})\) changes and then \(d(\mathbf{z,y})\) changes.

hd('00111','11001')
4
hd('0122','1220')
3

Nearest Neighbor Decoding#

[NEAREST NEIGHBOR DECODING]

Suppose a codeword \(\mathbf{x}\) has been transmitted and that a vector \(\mathbf{y}\) has been received, which may’ve been distorted by noise.

It seems reasonable to decode \(\mathbf{y}\) as that codeword \(\mathbf{x'}\), hopefully \(\mathbf{x}\), such that \(d(\mathbf{x',y})\) is as small as possible.

NND maximizes the decoder’s likelihood of correcting errors assuming a q-ary symmetric channel.

DECODE A VECTOR TO ITS CLOSEST CODEWORD.

Brute Force Nearest Neighbor Decoding#

[BRUTE-FORCE DECODING]

Compare the received vector with all codewords and to decode to the nearest.


Q-Ary Symmetric Communication Channel#

[Q-ARY SYMMETRIC COMMUNICATION CHANNEL]

Let \(p\) be called the symbol error probability.

(i) Each symbol transmitted has the same probability \(p(\lt\frac{1}{2})\) of being received in error.

(ii) If a symbol is received in error, then each of the \(q-1\) possible errors is equally likely.

Binary Symmetric Communication Channel#

Symbol error probability#

\( \begin{aligned} \text{symbol error probability}\,\,\, p &=P[\text{symbol}\,\,\,i\,\,\,\text{received in error}] \lt\frac{1}{2} \\ (1-p) &=P[\text{symbol}\,\,\,i\,\,\,\text{not received in error}] \gt\frac{1}{2} \end{aligned} \)

A binary codeword of length \(n\) is transmitted.

The probability that no errors occur is

\( \underbrace{(1-p)(1-p)...(1-p)}_{n\,\,\,\text{times}} =(1-p)^n \)

The probability that a single error occurs is

\( \underbrace{p}_{1\,\,\,\text{time}}\underbrace{(1-p)(1-p)...(1-p)}_{n-1\,\,\,\text{times}} =p(1-p)^{n-1} \)

The probability that \(i\) errors occur is

\( \underbrace{pp...p}_{i\,\,\,\text{times}}\underbrace{(1-p)(1-p)...(1-p)}_{n-i\,\,\,\text{times}} =p^i(1-p)^{n-i} \)

\( \begin{aligned} p\lt\frac{1}{2} \implies (1-p)^n \gt p(1-p)^{n-1} \gt p^i(1-p)^{n-i} \end{aligned} \)

This last fact means that for a binary symmetric channel, nearest neighbor decoding is also maximum likelihood decoding.

Word error probability#

Word error probability \(P_{err}=1-P_\text{correct}\) is the probability that a received vector is not correctly decoded as the transmitted codeword.

Certain codes, such as the class of linear codes, have the property that \(P_{err}(C)\) is independent of the actual codeword transmitted.


Minimum distance of a code#

[MINIMUM DISTANCE OF A CODE]

The minimum distance \(d(C)\) of a code \(C\) is the smallest of the distances between distinct codewords.

\(d(C)=\min\{d(\mathbf{x,y})\mid\mathbf{x,y}\in C,\mathbf{x\ne y}\}\)

\(d(C)\) is a measure of how good a code is at error-correcting.

Minimum distance and error-detection and -correction#

[THEOREM]

(i) A code \(C\) can detect up to \(s\) errors in any codeword if \(d(C)\ge s+1\).

(ii) A code \(C\) can correct up to \(t\) errors in any codeword if \(d(C)\ge2t+1\).

PROOF (i)

Suppose \(d(C)\ge s+1\).

[In fact we are working with a code \(C\) whose minimum distance \(d(C)\) is no smaller than \(s+1\) for some nonnegative integer \(s\).]

Suppose a codeword \(\mathbf{x}\) is transmitted and the vector \(\mathbf{y}\) is received in which \(s\) or fewer errors have occurred, so that \(d(\mathbf{x,y})\le s\).

[In fact we receive a vector \(\mathbf{y}\) in which no more than \(s\) errors have actually occurred.]

Then the received vector cannot be a different codeword, and so the errors are detected.

[Since the distance from each codeword to every other codeword is in fact at least one larger than \(s\), \(\mathbf{y}\) cannot be a codeword.]

\(\blacksquare\)

PROOF (ii)

Suppose \(d(C)\ge2t+1\).

[In fact we are working with a code \(C\) whose minimum distance \(d(C)\) is no smaller than \(2t+1\) for some nonnegative integer \(t\).]

Suppose a codeword \(\mathbf{x}\) is transmitted and the vector \(\mathbf{y}\) is received in which \(t\) or fewer errors have occurred, so that \(d(\mathbf{x,y})\le t\).

[In fact we receive a vector \(\mathbf{y}\) in which no more than \(t\) errors have actually occurred.]

If \(\mathbf{x'}\) is any codeword other than \(\mathbf{x}\), then \(d(\mathbf{x',y})\ge t+1\).

[

The distance between \(\mathbf{x}\) and \(\mathbf{y}\) is no greater than \(t\).

By \(d(C)\), the distance between \(\mathbf{x}\) and any other codeword \(\mathbf{x'}\) is no smaller than \(2t+1\).

Thus by the triangle inequality,

\( \begin{aligned} d(\mathbf{x,x'})&\le d(\mathbf{x,y})+d(\mathbf{y,x'}) \\ 2t+1&\le t+d(\mathbf{y,x'}) \\ t+1&\le d(\mathbf{y,x'}) \end{aligned} \)

]

For otherwise, \(d(\mathbf{x',y})\le t\), which implies by the triangle inequality that \(d(\mathbf{x,x'})\le d(\mathbf{x,y})+d(\mathbf{y,x'})\le2t\), which contradicts \(d(C)\ge 2t+1\).

Why does a logical negation work here?

[

If \(d(\mathbf{x',y})\not\ge t+1\) then \(d(\mathbf{x',y})\le t\)

(If a statement if false then its negation is true.)

If \(d(\mathbf{x',y})\le t\) then by the triangle inequality

\( \begin{aligned} d(\mathbf{x,x'})&\le d(\mathbf{x,y})+d(\mathbf{y,x'}) \\ 2t+1&\le d(\mathbf{x,y})+d(\mathbf{y,x'})\le2t \end{aligned} \)

]

Therefore, \(\mathbf{x}\) is the nearest codeword to \(\mathbf{y}\) and nearest neighbor decoding corrects the errors.

\(\blacksquare\)

[COROLLARY]

If a code \(C\) has minimum distance \(d(C)\) then \(C\) can be used either

(i) to detect up to \(d-1\) errors or

(ii) to correct up to \(\lfloor\frac{d-1}{2}\rfloor\) errors in any codeword.

PROOF (i)

\( \begin{aligned} d(C)&\ge s+1 \iff d(C)-1\ge s \end{aligned} \)

[

The number of errors \(s\) must be no larger than one less than the minimum distance of the code.

When there are no more than \(s\) errors in a vector, that vector will not be any codeword and can thus be detected as an error.

]

\(\blacksquare\)

PROOF (ii)

\( \begin{aligned} d(C)\ge2t+1 \iff \frac{d(C)-1}{2}&\ge t \\ \to \left\lfloor\frac{d(C)-1}{2}\right\rfloor&\ge t \end{aligned} \)

The number of errors detected or corrected for a given minimum distance#

print(f"{'d(C)':6}{'s':6}{'t':6}")
print()
for d in range(1,51):
  s = d-1
  t = int(np.floor((d-1)/2))
  print(f"{d:<6}{s:<6}{t:<6}")
d(C)  s     t     

1     0     0     
2     1     0     
3     2     1     
4     3     1     
5     4     2     
6     5     2     
7     6     3     
8     7     3     
9     8     4     
10    9     4     
11    10    5     
12    11    5     
13    12    6     
14    13    6     
15    14    7     
16    15    7     
17    16    8     
18    17    8     
19    18    9     
20    19    9     
21    20    10    
22    21    10    
23    22    11    
24    23    11    
25    24    12    
26    25    12    
27    26    13    
28    27    13    
29    28    14    
30    29    14    
31    30    15    
32    31    15    
33    32    16    
34    33    16    
35    34    17    
36    35    17    
37    36    18    
38    37    18    
39    38    19    
40    39    19    
41    40    20    
42    41    20    
43    42    21    
44    43    21    
45    44    22    
46    45    22    
47    46    23    
48    47    23    
49    48    24    
50    49    24    

(n,M,d)-code#

An \((n,M,d)\)-code is a code of length \(n\), containing \(M\) codewords and having minimum distance \(d\).