Repetition Codes#
Programming Environment#
Show code cell source
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
plt.style.use('ggplot');
plt.rcParams.update({'text.usetex' : True});
%matplotlib inline
from collections import defaultdict
from itertools import combinations,product
import itertools
from typing import Set
from IPython.display import display, Math
from datetime import datetime as d
import locale as l
import platform as p
import sys as s
pad = 20
print(f"{'Executed'.upper():<{pad}}: {d.now()}")
print()
print(f"{'Platform' :<{pad}}: "
f"{p.mac_ver()[0]} | "
f"{p.system()} | "
f"{p.release()} | "
f"{p.machine()}")
print(f"{'' :<{pad}}: {l.getpreferredencoding()}")
print()
print(f"{'Python' :<{pad}}: {s.version}")
print(f"{'' :<{pad}}: {s.version_info}")
print(f"{'' :<{pad}}: {p.python_implementation()}")
print()
print(f"{'Matplotlib' :<{pad}}: {mpl.__version__}")
print(f"{'NumPy' :<{pad}}: {np .__version__}")
EXECUTED : 2025-01-19 12:50:36.044123
Platform : 15.2 | Darwin | 24.2.0 | arm64
: UTF-8
Python : 3.11.11 | packaged by conda-forge | (main, Dec 5 2024, 14:21:42) [Clang 18.1.8 ]
: sys.version_info(major=3, minor=11, micro=11, releaselevel='final', serial=0)
: CPython
Matplotlib : 3.9.3
NumPy : 2.0.2
Show code cell source
def rc (q : int,
n : int) -> Set[str]:
"""Repetition Code
Generate a q-ary repetition block code of length n.
"""
S=set()
for i in range(q):
S.add(str(i)*n)
return S
def fqn (q : int,
n : int,
g : int = 0) -> Set[str]:
"""Construct a linear space of dimension n over a finite field of order q.
Parameters
==========
g : If the space is very large, opt for the first g elements of a generator object.
"""
if bool(g):
f=itertools.product(range(q),repeat=n)
return set(''.join(str(i) for i in next(f)) for _ in range(g))
else:
return {''.join(str(bit) for bit in word) for word in itertools.product(range(q),repeat=n)}
def qarycode_to_nbitstring (code={'3121','2101'},k=4):
"""Convert a q-ary code """
for n in code:
print(' '.join(format(int(i),f'0{k}b') for i in n))
def hd (a : str = '1001',
b : str = '0101') -> int:
"""HAMMING DISTANCE
Parameters
==========
x : str
y : str
Return
======
int
"""
assert len(a) == len(b), 'x and y must have the same length'
return sum(x!=y for x,y in zip(a,b))
def nbfmd (c : Set[str],
pr : bool = False) -> np.float16:
"""NAIVE BRUTE FORCE MINIMUM DISTANCE d(C)
Computes the pairwise Hamming distance for all codewords and returns the minimum value.
This is a naive (i.e., non vectorized) implementation using nested for loops.
Parameters
==========
c : code
pr : Print intermediate steps.
Returns
=======
d(C)
"""
# convert a set of string vectors to a 2D NumPy array of integers
c=np.array([list(codeword) for codeword in c],dtype=np.float16)
# intialize empty hamming distance matrix
hamming = np.empty([c.shape[0]]*2,dtype=np.float16)
for i,x in enumerate(c):
for j,y in enumerate(c):
hamming[i,j]=(x!=y).sum()
# the diagonal represents the Hamming distance of a codeword with itself, which is always 0.
np.fill_diagonal(hamming,np.inf)
if pr == True:
print(hamming)
return hamming.min().astype(np.int8)
def one_error_detecting (q : int,
code : Set[str],
p : bool = False) -> bool:
"""Verify that a code is one-error detecting.
No one-bit error equals a codeword.
"""
flag=True
alphabet=set(str(i) for i in range(q))
for codeword in code:
if p:
print()
print(f"{'orig cw : ':10}{codeword}")
for i in range(len(codeword)):
a,b,c=codeword[:i],codeword[i],codeword[i+1:]
symbols=alphabet-set(codeword[i])
for symbol in symbols:
cw=codeword[:i]+symbol+codeword[i+1:] # SINGLE ERROR
if cw in code:
flag=False
if p:
print(f"{'ERROR':10}{cw}")
else:
if p:
print(f"{'':10}{cw}")
return flag
# set(''.join(l for l in i) for i in itertools.product('10',repeat=3))
# set(''.join(l for l in i) for i in itertools.combinations_with_replacement('012',r=3))
# set(''.join(l for l in i) for i in itertools.combinations('01',r=2))
Binary Codes#
\(n = 3\)#
The binary repetition code of length \(3\)
SUMMARY OF PROPERTIES
block length n = 3
rank m = 1
rate m/n = 1/3
minimum distance d(C) = 3
s = 2, t = 0 double-error detection, no error correction
s = 0, t = 1 no error detection, single-error correction
Upon receiving the message \(101\) the receiver has two options: it can ask for a retransmission, or it can guess that it is more likely that a single \(1\) flipped to \(0\) rather than the case in which two \(0\) s flipped to \(1\) s.
\( \begin{array}{r|r} \text{symbol} & \text{encoding} \\ \hline 0 & 000 \\ 1 & 111 \\ \end {array} \)
the code is uneconomical: the encoded message is \(3\) times as long as the original
the code can correct single errors in a block of three (the error probability can be moderate); or alternatively where retransmission is possible, the code can be used to detect single or double errors in a block of three (the error probability can be quite high)
Encoder: repeat each bit three times
Error Processor: (assume that the receiver adopts a correction strategy where possible) take the majority vote in each block of three and make all three equal to that
Decoder: strip the last two bits
\( \begin{aligned} &(3,2,3)\text{-code} \\ C_{[2\times3]}&= \begin{cases} 000\\111\\ \end{cases} \end{aligned} \)
\( \begin{aligned} q&=2 \\ Z_q&=\{0,1\} \\ C_{[2\times3]} &\subseteq(F_2)^3 \\ n&=3 \\ M&=2 \\ d(C)&=3 \\ d(C)&\ge3=(s=2)+1 &&\text{two-error detecting} \\ d(C)&\ge3=2(t=1)+1 &&\text{one-error correcting} \end{aligned} \)
q=2
n=3
code=rc(q,n)
code
{'000', '111'}
f=fqn(2,3)
f
{'000', '001', '010', '011', '100', '101', '110', '111'}
hd(*code)
3
code.issubset(f)
True
dC=nbfmd(code,True)
dC
[[inf 3.]
[ 3. inf]]
np.int8(3)
Suppose codeword \(\mathbf{x}=000\) is transmitted.
Under nearest neighbors decoding, the vectors that will be decoded as the codeword
\(\mathbf{x'}=000\)
are
\( \underbrace{ 000 }_{0}, \underbrace{ 100, 010, 001, }_{1} \)
Under the assumption of a binary symmetric channel the probability that the received vector \(\mathbf{y}\) is decoded as the transmitted codeword \(000\) is
\( \begin{aligned} &P_\text{correct} \\ &=P[\text{0 errors occurred}]+P[\text{1 error occurred}] \\ &=(1-p)^3+3p(1-p)^2 \\ &=(1-p)^2((1-p)+3p) \\ &=(1-p)^2(1+2p) \\ &=(1-2p+p^2)(1+2p) \\ &=1+2p-2p-4p^2+p^2+2p^3 \\ &=1-3p^2+2p^3 \end{aligned} \)
This probability is the same when we assume that the transmitted codeword is
\(\mathbf{x'}=111\)
\( \underbrace{ 111 }_{0}, \underbrace{ 011, 101, 110, }_{1} \)
Therefore, the code \(C\) has a word error probability \(P_{err}(C)\) that is independent of the codeword transmitted.
\( \begin{aligned} &P_{err}(C) \\ &=1-P_\text{correct} \\ &=1-(1-3p^2+2p^3) \\ &=1-1+3p^2-2p^3 \\ &=3p^2-2p^3 \end{aligned} \)
Let’s say that, on average, the channel causes one symbol in a hundred to be received in error
\(p=0.01\)
p =0.01
Perr=3*p**2-2*p**3
print(f"{'symbol error p = ':15}{p}")
print(f"{'(1 - p) = ':15}{1-p}")
print(f"{'word error = ':15}{Perr:.6f}")
for k in range(0,n+1):
k_error_probability=p**k * (1-p)**(n-k)
print(f"{'P['}{k}{' errors] = '}{k_error_probability:.6f}")
symbol error p = 0.01
(1 - p) = 0.99
word error = 0.000298
P[0 errors] = 0.970299
P[1 errors] = 0.009801
P[2 errors] = 0.000099
P[3 errors] = 0.000001
\(n = 5\)#
The binary repetition code of length \(5\)
\( \begin{aligned} &(5,2,5)\text{-code} \\ C_{[2\times5]}&= \begin{cases} 00000\\11111\\ \end{cases} \end{aligned} \)
\( \begin{aligned} q&=2 \\ Z_q&=\{0,1\} \\ C_{[2\times5]} &\subseteq(F_2)^5 \\ n&=5 \\ M&=2 \\ d(C)&=5 \\ d(C)&\ge5=(s=4)+1 &&\text{four-error detecting} \\ d(C)&\ge5=2(t=2)+1 &&\text{two-error correcting} \end{aligned} \)
q=2
n=5
code=rc(q,n)
code
{'00000', '11111'}
f=fqn(2,5)
f
{'00000',
'00001',
'00010',
'00011',
'00100',
'00101',
'00110',
'00111',
'01000',
'01001',
'01010',
'01011',
'01100',
'01101',
'01110',
'01111',
'10000',
'10001',
'10010',
'10011',
'10100',
'10101',
'10110',
'10111',
'11000',
'11001',
'11010',
'11011',
'11100',
'11101',
'11110',
'11111'}
hd(*code)
5
code.issubset(f)
True
dC=nbfmd(code,True)
dC
[[inf 5.]
[ 5. inf]]
np.int8(5)
Suppose codeword \(\mathbf{x}=00000\) is transmitted.
Under nearest neighbors decoding, the vectors that will be decoded as the codeword
\(\mathbf{x'}=00000\)
are
\( \underbrace{ 00000 }_{0}, \underbrace{ 10000, 01000, 00100, 00010, 00001 }_{1}, \underbrace{ 11000, 10100, 10010, 10001, 01100, 01010, 01001, 00110, 00101, 00011 }_{2} \)
Under the assumption of a binary symmetric channel the probability that the received vector \(\mathbf{y}\) is decoded as the transmitted codeword \(00000\) is
\( \begin{aligned} &P_\text{correct} \\ &=P[\text{0 errors occurred}]+P[\text{1 error occurred}]+P[\text{2 errors occurred}] \\ &=(1-p)^5+5p(1-p)^4+10p^2(1-p)^3 \\ &=(1-p)(1-p)^2((1-p)^2+5p(1-p)+10p^2) \\ &=(1-p)(1-2p+p^2)(1-2p+p^2+5p-5p^2+10p^2) \\ &=(1-2p+p^2-p+2p^2-p^3)(1+3p+6p^2) \\ &=(1-3p+3p^2-p^3)(1+3p+6p^2) \\ &=1+3p+6p^2-3p-9p^2-18p^3+3p^2+9p^3+18p^4-p^3-3p^4-6p^5 \\ &=1-10p^3+15p^4-6p^5 \end{aligned} \)
This probability is the same when we assume that the transmitted codeword is
\(\mathbf{x'}=11111\)
\( \underbrace{ 11111 }_{0}, \underbrace{ 01111, 10111, 11011, 11101, 11110 }_{1}, \underbrace{ 00111, 01011, 01101, 01110, 10011, 10101, 10110, 11001, 11010, 11100 }_{2} \)
Therefore, the code \(C\) has a word error probability \(P_{err}(C)\) that is independent of the codeword transmitted.
\( \begin{aligned} &P_{err}(C) \\ &=1-P_\text{correct} \\ &=1-(1-10p^3+15p^4-6p^5) \\ &=10p^3-15p^4+6p^5 \end{aligned} \)
Let’s say that, on average, the channel causes one symbol in a hundred to be received in error
\(p=0.01\)
p =0.01
Perr=10*p**3 - 15*p**4 + 6*p**5
print(f"{'symbol error p = ':15}{p}")
print(f"{'(1 - p) = ':15}{1-p}")
print(f"{'word error = ':15}{Perr:.12f}")
for k in range(0,n+1):
k_error_probability=p**k * (1-p)**(n-k)
print(f"{'P['}{k}{' errors] = '}{k_error_probability:.12f}")
symbol error p = 0.01
(1 - p) = 0.99
word error = 0.000009850600
P[0 errors] = 0.950990049900
P[1 errors] = 0.009605960100
P[2 errors] = 0.000097029900
P[3 errors] = 0.000000980100
P[4 errors] = 0.000000009900
P[5 errors] = 0.000000000100
Ternary#
\(n = 3\)#
The ternary repetition code of length \(3\)
\( C= \begin{cases} 000\\ 111\\ 222\\ \end{cases} \)
\((3,3,3)\)-code
Equivalent codes
\( \begin{aligned} C= \begin{cases} 012\\ 120\\ 201\\ \end{cases} \,\,\,\,\, \underset{p_2}{ \left( \begin{matrix} 0&1&2\\ \downarrow&\downarrow&\downarrow\\ 2&0&1\\ \end{matrix} \right) } \,\,\,\,\, C= \begin{cases} 002\\ 110\\ 221\\ \end{cases} \,\,\,\,\, \underset{p_3}{ \left( \begin{matrix} 0&1&2\\ \downarrow&\downarrow&\downarrow\\ 1&2&0\\ \end{matrix} \right) } \,\,\,\,\, C= \begin{cases} 000\\ 111\\ 222\\ \end{cases} \end{aligned} \)
q=3
n=3
print(f"{q**n}")
f=fqn(q,n)
code=rc(q,n)
code
27
{'000', '111', '222'}
f
{'000',
'001',
'002',
'010',
'011',
'012',
'020',
'021',
'022',
'100',
'101',
'102',
'110',
'111',
'112',
'120',
'121',
'122',
'200',
'201',
'202',
'210',
'211',
'212',
'220',
'221',
'222'}