Repetition Codes#




Programming Environment#

Hide code cell source
import numpy  as np
import pandas as pd

import matplotlib        as mpl
import matplotlib.pyplot as plt
plt.style.use('ggplot');
plt.rcParams.update({'text.usetex' : True});
%matplotlib inline

from   collections import defaultdict
from   itertools   import combinations,product
import itertools

from typing import Set

from IPython.display import display, Math

from   datetime import datetime as d
import locale                   as l
import platform                 as p
import sys                      as s

pad = 20
print(f"{'Executed'.upper():<{pad}}: {d.now()}")
print()
print(f"{'Platform'   :<{pad}}: "
      f"{p.mac_ver()[0]} | "
      f"{p.system()} | "
      f"{p.release()} | "
      f"{p.machine()}")
print(f"{''           :<{pad}}: {l.getpreferredencoding()}")
print()
print(f"{'Python'     :<{pad}}: {s.version}")
print(f"{''           :<{pad}}: {s.version_info}")
print(f"{''           :<{pad}}: {p.python_implementation()}")
print()
print(f"{'Matplotlib' :<{pad}}: {mpl.__version__}")
print(f"{'NumPy'      :<{pad}}: {np .__version__}")
EXECUTED            : 2025-01-19 12:50:36.044123

Platform            : 15.2 | Darwin | 24.2.0 | arm64
                    : UTF-8

Python              : 3.11.11 | packaged by conda-forge | (main, Dec  5 2024, 14:21:42) [Clang 18.1.8 ]
                    : sys.version_info(major=3, minor=11, micro=11, releaselevel='final', serial=0)
                    : CPython

Matplotlib          : 3.9.3
NumPy               : 2.0.2
Hide code cell source
def rc (q : int,
        n : int) -> Set[str]:
  """Repetition Code
  Generate a q-ary repetition block code of length n.
  """
  S=set()
  for i in range(q):
    S.add(str(i)*n)
  return S

def fqn (q : int,
         n : int,
         g : int = 0) -> Set[str]:
  """Construct a linear space of dimension n over a finite field of order q.
  
  Parameters
  ==========
  g : If the space is very large, opt for the first g elements of a generator object.
  """
  if bool(g):
    f=itertools.product(range(q),repeat=n)
    return set(''.join(str(i) for i in next(f)) for _ in range(g))
  else:
    return {''.join(str(bit) for bit in word) for word in itertools.product(range(q),repeat=n)}

def qarycode_to_nbitstring (code={'3121','2101'},k=4):
  """Convert a q-ary code """
  for n in code:
    print(' '.join(format(int(i),f'0{k}b') for i in n))

def hd (a : str = '1001',
        b : str = '0101') -> int:
  """HAMMING DISTANCE
  
  Parameters
  ==========
  x : str
  y : str

  Return
  ======
  int
  """
  assert len(a) == len(b), 'x and y must have the same length'
  return sum(x!=y for x,y in zip(a,b))

def nbfmd (c  : Set[str],
           pr : bool = False) -> np.float16:
  """NAIVE BRUTE FORCE MINIMUM DISTANCE d(C)

  Computes the pairwise Hamming distance for all codewords and returns the minimum value.

  This is a naive (i.e., non vectorized) implementation using nested for loops.
  
  Parameters
  ==========
  c  : code
  pr : Print intermediate steps.

  Returns
  =======
  d(C)
  """

  # convert a set of string vectors to a 2D NumPy array of integers
  c=np.array([list(codeword) for codeword in c],dtype=np.float16)

  # intialize empty hamming distance matrix
  hamming = np.empty([c.shape[0]]*2,dtype=np.float16)
  for i,x in enumerate(c):
    for j,y in enumerate(c):
      hamming[i,j]=(x!=y).sum()
  # the diagonal represents the Hamming distance of a codeword with itself, which is always 0.
  np.fill_diagonal(hamming,np.inf)

  if pr == True:
    print(hamming)

  return hamming.min().astype(np.int8)

def one_error_detecting (q    : int,
                         code : Set[str],
                         p    : bool = False) -> bool:
  """Verify that a code is one-error detecting.
  No one-bit error equals a codeword.
  """
  flag=True
  alphabet=set(str(i) for i in range(q))
  for codeword in code:
    if p:
      print()
      print(f"{'orig cw : ':10}{codeword}")
    for i in range(len(codeword)):
      a,b,c=codeword[:i],codeword[i],codeword[i+1:]
      symbols=alphabet-set(codeword[i])
      for symbol in symbols:
        cw=codeword[:i]+symbol+codeword[i+1:] # SINGLE ERROR
        if cw in code:
          flag=False
          if p:
            print(f"{'ERROR':10}{cw}")
        else:
          if p:
            print(f"{'':10}{cw}")
  return flag

# set(''.join(l for l in i) for i in itertools.product('10',repeat=3))
# set(''.join(l for l in i) for i in itertools.combinations_with_replacement('012',r=3))
# set(''.join(l for l in i) for i in itertools.combinations('01',r=2))



Binary Codes#


\(n = 3\)#

The binary repetition code of length \(3\)

SUMMARY OF PROPERTIES

block length           n = 3
rank                   m = 1
rate                 m/n = 1/3
minimum distance    d(C) = 3

s = 2, t = 0    double-error detection, no error correction
s = 0, t = 1    no error detection, single-error correction

Upon receiving the message \(101\) the receiver has two options: it can ask for a retransmission, or it can guess that it is more likely that a single \(1\) flipped to \(0\) rather than the case in which two \(0\) s flipped to \(1\) s.

\( \begin{array}{r|r} \text{symbol} & \text{encoding} \\ \hline 0 & 000 \\ 1 & 111 \\ \end {array} \)

  • the code is uneconomical: the encoded message is \(3\) times as long as the original

  • the code can correct single errors in a block of three (the error probability can be moderate); or alternatively where retransmission is possible, the code can be used to detect single or double errors in a block of three (the error probability can be quite high)

Encoder: repeat each bit three times

Error Processor: (assume that the receiver adopts a correction strategy where possible) take the majority vote in each block of three and make all three equal to that

Decoder: strip the last two bits

\( \begin{aligned} &(3,2,3)\text{-code} \\ C_{[2\times3]}&= \begin{cases} 000\\111\\ \end{cases} \end{aligned} \)

\( \begin{aligned} q&=2 \\ Z_q&=\{0,1\} \\ C_{[2\times3]} &\subseteq(F_2)^3 \\ n&=3 \\ M&=2 \\ d(C)&=3 \\ d(C)&\ge3=(s=2)+1 &&\text{two-error detecting} \\ d(C)&\ge3=2(t=1)+1 &&\text{one-error correcting} \end{aligned} \)

q=2
n=3
code=rc(q,n)
code
{'000', '111'}
f=fqn(2,3)
f
{'000', '001', '010', '011', '100', '101', '110', '111'}
hd(*code)
3
code.issubset(f)
True
dC=nbfmd(code,True)
dC
[[inf  3.]
 [ 3. inf]]
np.int8(3)

Suppose codeword \(\mathbf{x}=000\) is transmitted.

Under nearest neighbors decoding, the vectors that will be decoded as the codeword

\(\mathbf{x'}=000\)

are

\( \underbrace{ 000 }_{0}, \underbrace{ 100, 010, 001, }_{1} \)

Under the assumption of a binary symmetric channel the probability that the received vector \(\mathbf{y}\) is decoded as the transmitted codeword \(000\) is

\( \begin{aligned} &P_\text{correct} \\ &=P[\text{0 errors occurred}]+P[\text{1 error occurred}] \\ &=(1-p)^3+3p(1-p)^2 \\ &=(1-p)^2((1-p)+3p) \\ &=(1-p)^2(1+2p) \\ &=(1-2p+p^2)(1+2p) \\ &=1+2p-2p-4p^2+p^2+2p^3 \\ &=1-3p^2+2p^3 \end{aligned} \)

This probability is the same when we assume that the transmitted codeword is

\(\mathbf{x'}=111\)

\( \underbrace{ 111 }_{0}, \underbrace{ 011, 101, 110, }_{1} \)

Therefore, the code \(C\) has a word error probability \(P_{err}(C)\) that is independent of the codeword transmitted.

\( \begin{aligned} &P_{err}(C) \\ &=1-P_\text{correct} \\ &=1-(1-3p^2+2p^3) \\ &=1-1+3p^2-2p^3 \\ &=3p^2-2p^3 \end{aligned} \)

Let’s say that, on average, the channel causes one symbol in a hundred to be received in error

\(p=0.01\)

p   =0.01
Perr=3*p**2-2*p**3

print(f"{'symbol error p = ':15}{p}")
print(f"{'(1 - p)        = ':15}{1-p}")
print(f"{'word   error   = ':15}{Perr:.6f}")
for k in range(0,n+1):
  k_error_probability=p**k * (1-p)**(n-k)
  print(f"{'P['}{k}{' errors]    = '}{k_error_probability:.6f}")
symbol error p = 0.01
(1 - p)        = 0.99
word   error   = 0.000298
P[0 errors]    = 0.970299
P[1 errors]    = 0.009801
P[2 errors]    = 0.000099
P[3 errors]    = 0.000001

\(n = 5\)#

The binary repetition code of length \(5\)

\( \begin{aligned} &(5,2,5)\text{-code} \\ C_{[2\times5]}&= \begin{cases} 00000\\11111\\ \end{cases} \end{aligned} \)

\( \begin{aligned} q&=2 \\ Z_q&=\{0,1\} \\ C_{[2\times5]} &\subseteq(F_2)^5 \\ n&=5 \\ M&=2 \\ d(C)&=5 \\ d(C)&\ge5=(s=4)+1 &&\text{four-error detecting} \\ d(C)&\ge5=2(t=2)+1 &&\text{two-error correcting} \end{aligned} \)

q=2
n=5
code=rc(q,n)
code
{'00000', '11111'}
f=fqn(2,5)
f
{'00000',
 '00001',
 '00010',
 '00011',
 '00100',
 '00101',
 '00110',
 '00111',
 '01000',
 '01001',
 '01010',
 '01011',
 '01100',
 '01101',
 '01110',
 '01111',
 '10000',
 '10001',
 '10010',
 '10011',
 '10100',
 '10101',
 '10110',
 '10111',
 '11000',
 '11001',
 '11010',
 '11011',
 '11100',
 '11101',
 '11110',
 '11111'}
hd(*code)
5
code.issubset(f)
True
dC=nbfmd(code,True)
dC
[[inf  5.]
 [ 5. inf]]
np.int8(5)

Suppose codeword \(\mathbf{x}=00000\) is transmitted.

Under nearest neighbors decoding, the vectors that will be decoded as the codeword

\(\mathbf{x'}=00000\)

are

\( \underbrace{ 00000 }_{0}, \underbrace{ 10000, 01000, 00100, 00010, 00001 }_{1}, \underbrace{ 11000, 10100, 10010, 10001, 01100, 01010, 01001, 00110, 00101, 00011 }_{2} \)

Under the assumption of a binary symmetric channel the probability that the received vector \(\mathbf{y}\) is decoded as the transmitted codeword \(00000\) is

\( \begin{aligned} &P_\text{correct} \\ &=P[\text{0 errors occurred}]+P[\text{1 error occurred}]+P[\text{2 errors occurred}] \\ &=(1-p)^5+5p(1-p)^4+10p^2(1-p)^3 \\ &=(1-p)(1-p)^2((1-p)^2+5p(1-p)+10p^2) \\ &=(1-p)(1-2p+p^2)(1-2p+p^2+5p-5p^2+10p^2) \\ &=(1-2p+p^2-p+2p^2-p^3)(1+3p+6p^2) \\ &=(1-3p+3p^2-p^3)(1+3p+6p^2) \\ &=1+3p+6p^2-3p-9p^2-18p^3+3p^2+9p^3+18p^4-p^3-3p^4-6p^5 \\ &=1-10p^3+15p^4-6p^5 \end{aligned} \)

This probability is the same when we assume that the transmitted codeword is

\(\mathbf{x'}=11111\)

\( \underbrace{ 11111 }_{0}, \underbrace{ 01111, 10111, 11011, 11101, 11110 }_{1}, \underbrace{ 00111, 01011, 01101, 01110, 10011, 10101, 10110, 11001, 11010, 11100 }_{2} \)

Therefore, the code \(C\) has a word error probability \(P_{err}(C)\) that is independent of the codeword transmitted.

\( \begin{aligned} &P_{err}(C) \\ &=1-P_\text{correct} \\ &=1-(1-10p^3+15p^4-6p^5) \\ &=10p^3-15p^4+6p^5 \end{aligned} \)

Let’s say that, on average, the channel causes one symbol in a hundred to be received in error

\(p=0.01\)

p   =0.01
Perr=10*p**3 - 15*p**4 + 6*p**5

print(f"{'symbol error p = ':15}{p}")
print(f"{'(1 - p)        = ':15}{1-p}")
print(f"{'word   error   = ':15}{Perr:.12f}")
for k in range(0,n+1):
  k_error_probability=p**k * (1-p)**(n-k)
  print(f"{'P['}{k}{' errors]    = '}{k_error_probability:.12f}")
symbol error p = 0.01
(1 - p)        = 0.99
word   error   = 0.000009850600
P[0 errors]    = 0.950990049900
P[1 errors]    = 0.009605960100
P[2 errors]    = 0.000097029900
P[3 errors]    = 0.000000980100
P[4 errors]    = 0.000000009900
P[5 errors]    = 0.000000000100



Ternary#


\(n = 3\)#

The ternary repetition code of length \(3\)

\( C= \begin{cases} 000\\ 111\\ 222\\ \end{cases} \)

\((3,3,3)\)-code

Equivalent codes

\( \begin{aligned} C= \begin{cases} 012\\ 120\\ 201\\ \end{cases} \,\,\,\,\, \underset{p_2}{ \left( \begin{matrix} 0&1&2\\ \downarrow&\downarrow&\downarrow\\ 2&0&1\\ \end{matrix} \right) } \,\,\,\,\, C= \begin{cases} 002\\ 110\\ 221\\ \end{cases} \,\,\,\,\, \underset{p_3}{ \left( \begin{matrix} 0&1&2\\ \downarrow&\downarrow&\downarrow\\ 1&2&0\\ \end{matrix} \right) } \,\,\,\,\, C= \begin{cases} 000\\ 111\\ 222\\ \end{cases} \end{aligned} \)

q=3
n=3
print(f"{q**n}")
f=fqn(q,n)
code=rc(q,n)
code
27
{'000', '111', '222'}
f
{'000',
 '001',
 '002',
 '010',
 '011',
 '012',
 '020',
 '021',
 '022',
 '100',
 '101',
 '102',
 '110',
 '111',
 '112',
 '120',
 '121',
 '122',
 '200',
 '201',
 '202',
 '210',
 '211',
 '212',
 '220',
 '221',
 '222'}