Files#


Table of Contents#


Programming Environment#

from   html.entities import codepoint2name
import string
from   typing import List, Union
import unicodedata

from   datetime import datetime
import locale   as l
import platform as p
import sys
print(datetime.now())
print()
print(f"{'Platform':<20}: {p.mac_ver()[0]} | {p.system()} | {p.release()} | {p.machine()}")
print(f"{'':<20}: {l.getpreferredencoding()}")
print()
print(f"{'Python':<20}: {sys.version}")
print(f"{      '':<20}: {sys.version_info}")
print(f"{      '':<20}: {p.python_implementation()}")
2024-05-21 15:42:47.215239

Platform            : 14.4.1 | Darwin | 23.4.0 | arm64
                    : UTF-8

Python              : 3.11.9 | packaged by conda-forge | (main, Apr 19 2024, 18:34:54) [Clang 16.0.6 ]
                    : sys.version_info(major=3, minor=11, micro=9, releaselevel='final', serial=0)
                    : CPython

Auxiliary#

Hide code cell source
def dec_to_hex (dec : int = 2**16 - 1) -> str:
  """ Compose the hexadecimal representation
      as a string
      of a nonnegative integer.
  
  params: int (dec)
  return: str
  """
  assert 0 <= dec, 'Try again with a nonnegative integer.'
  return format(dec, '>06x').upper()

test_cases = [
  -1, 0, 1, 2**16 - 1, 2**32 - 1,
]
for test_case in test_cases:
  try:
    print(f"Case {test_case:<10}: {repr(dec_to_hex(test_case))}")
  except AssertionError as e:
    print(f"Case {test_case:<10}: {e}")
Hide code cell output
Case -1        : Try again with a nonnegative integer.
Case 0         : '000000'
Case 1         : '000001'
Case 65535     : '00FFFF'
Case 4294967295: 'FFFFFFFF'
Hide code cell source
def to_codepoint (hexa : str = '10FFFF') -> str:
  """ Compose a Unicode code point
      as a string.

  params: str (hexa)
  return: str
  """
  assert int(hexa, base=16) <= 0x10FFFF, 'Try again with a valid code point.'
  return fr'\U00{hexa}'

test_cases = [
  dec_to_hex(i) for i in range(5)
]
for test_case in test_cases:
  try:
    print(f"Case {test_case:<10}: {repr(to_codepoint(test_case))}")
  except AssertionError as e:
    print(f"Case {test_case:<10}: {e}")
Hide code cell output
Case 000000    : '\\U00000000'
Case 000001    : '\\U00000001'
Case 000002    : '\\U00000002'
Case 000003    : '\\U00000003'
Case 000004    : '\\U00000004'
Hide code cell source
# Nonnegative integer no greater than 0x10FFFF (1_114_111)
dec_to_glyph = chr

print(repr(dec_to_glyph(0xFF)))
Hide code cell output
'ÿ'
Hide code cell source
def to_glyph (code_point : str = '\\u00FF') -> str:
  """ Convert a raw Unicode code point to its non raw (graphical) form.
  
  params: str (code_point)
  return: str
  """
  return code_point.encode('utf-8').decode('unicode-escape')

print(to_glyph())
Hide code cell output
ÿ
Hide code cell source
def print_code_point_information (points : str = 'hello world') -> None:
  """ Prints information about Unicode code points.
  
      Prints the sequence of code points
             the number of code points in the sequence
             the following information for each Unicode code point in a sequence of code points:
               * glyph
               * raw code point
               * hexadecimal repr
               * byte repr
               * Unicode category
               * Named entity repr
               * Unicode name

  params: str (points)
  return: None
  """
  print(points)
  print(len(points))
  print()
  print(f"{'Glyph':<10} "
        f"{'Code Point':<10} "
        f"{'Hex':<10} "
        f"{'Bytes':<20} "
        f"{'Category':<10} "
        f"{'Named Entity':<20} "
        f"{'Name':<10}")
  try:
    for point in points:
      hex_rep    = dec_to_hex(ord(point))
      code_point = to_codepoint(hex_rep)
      glyph      = to_glyph(code_point)
      unicode = (f"{chr(ord(point)):<10} "
                f"{code_point:<10} "
                f"{format(ord(point), '06x').upper():<10} "
                f"{str(point.encode('utf-8')):<20} "
                f"{unicodedata.category(chr(ord(point))):<10} ")
      try:
        unicode += f"{codepoint2name[ord(point)]:<20} "
      except KeyError as e:
        unicode += f"{'NO NAMED ENTITY':<20} "
      try:
        unicode += f"{unicodedata.name(point)}"
      except ValueError as e:
        unicode += f"NO UNICODE NAME"
      print(unicode)
  except AssertionError as e:
    print(f"Case {point}: {e}")

print_code_point_information()
Hide code cell output
hello world
11

Glyph      Code Point Hex        Bytes                Category   Named Entity         Name      
h          \U00000068 000068     b'h'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER H
e          \U00000065 000065     b'e'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER E
l          \U0000006C 00006C     b'l'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER L
l          \U0000006C 00006C     b'l'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER L
o          \U0000006F 00006F     b'o'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER O
           \U00000020 000020     b' '                 Zs         NO NAMED ENTITY      SPACE
w          \U00000077 000077     b'w'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER W
o          \U0000006F 00006F     b'o'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER O
r          \U00000072 000072     b'r'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER R
l          \U0000006C 00006C     b'l'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER L
d          \U00000064 000064     b'd'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER D

Text Encoding#

A text encoding is a text serialization codec encoding text to bytes and decoding bytes to text.

Encoding is the serialization of a string into a sequence of bytes and decoding is the deserialization of a sequence of bytes into a string.

ASCII#

[ w ] Basic Latin

128 code points 0-127 are mapped to bytes 0x0-0x80 where the first bit is a placeholder and the remaining 7 bits encode the code point.

Code Points

Encoding

0...127

0x00...0x80

33 control codes

95 printable characters

  • 26 uppercase letters

  • 26 lowercase letters

  • 10 digits

  • 32 punctuation

  • 1 whitespace

26 * 2 + 10 + 32 + 1 + 33
128

Control Codes#

Control Characters

  • [ u ] Control Pictures

  • [ w ] NUL

  • [ w ] ETX

  • [ w ] EOT

  • [ w ] ENQ

  • [ w ] ACK

  • [ w ] BEL

  • [ w ] BS

  • [ w ] HT

  • [ w ] LF

  • [ w ] FF

  • [ w ] CR

  • [ w ] SO

  • [ w ] SI

  • [ w ] NAK

  • [ w ] SYN

  • [ w ] ETB

  • [ w ] CAN

  • [ w ] SUB

  • [ w ] ESC

  • [ w ] DEL

ASCII

Abbreviation

Caret Notation

Signal

Escape

HTML Entity

Percent Code

Unicode

Unicode Name

0

NUL

^@

\0

&#0000;, &#x0000;

%00

U+0000

NULL

1

SOH

^A

&#0001;, &#x0001;

U+0001

START OF HEADING

2

STX

^B

&#0002;, &#x0002;

U+0002

START OF TEXT

3

ETX

^C

Ctrl-C

&#0003;, &#x0003;

U+0003

END OF TEXT

4

EOT

^D

Ctrl-D

&#0004;, &#x0004;

U+0004

END OF TRANSMISSION

5

ENQ

^E

&#0005;, &#x0005;

U+0005

ENQUIRY

6

ACK

^F

&#0006;, &#x0006;

U+0006

ACKNOWLEDGE

7

BEL

^G

\a

&#0007;, &#x0007;

U+0007

BELL

8

BS

^H

\b

&#0008;, &#x0008;

U+0008

BACKSPACE

9

HT

^I

\t

&#0009;, &#x0009;

U+0009

CHARACTER TABULATION (horizontal tabulation, tab)

10

LF

^J

\n

&#0010;, &#x000A;

U+000A

LINE FEED (new line NL, end of line EOL)

11

VT

^K

\v

&#0011;, &#x000B;

U+000B

LINE TABULATION (vertical tabulation)

12

FF

^L

Ctrl-L

\f

&#0012;, &#x000C;

U+000C

FORM FEED

13

CR

^M

\r

&#0013;, &#x000D;

U+000D

CARRIAGE RETURN

14

SO

^N

&#0014;, &#x000E;

U+000E

SHIFT OUT (locking-shift one)

15

SI

^O

&#0015;, &#x000F;

U+000F

SHIFT IN (locking-shift zero)

16

DLE

^P

&#0016;, &#x0010;

U+0010

DATA LINK ESCAPE

17

DC1

^Q

&#0017;, &#x0011;

U+0011

DEVICE CONTROL ONE

18

DC2

^R

&#0018;, &#x0012;

U+0012

DEVICE CONTROL TWO

19

DC3

^S

&#0019;, &#x0013;

U+0013

DEVICE CONTROL THREE

20

DC4

^T

&#0020;, &#x0014;

U+0014

DEVICE CONTROL FOUR

21

NAK

^U

&#0021;, &#x0015;

U+0015

NEGATIVE ACKNOWLEDGE

22

SYN

^V

&#0022;, &#x0016;

U+0016

SYNCHRONOUS IDLE

23

ETB

^W

&#0023;, &#x0017;

U+0017

END OF TRANSMISSION BLOCK

24

CAN

^X

&#0024;, &#x0018;

U+0018

CANCEL

25

EM

^Y

&#0025;, &#x0019;

U+0019

END OF MEDIUM

26

SUB

^Z

Ctrl-Z

&#0026;, &#x001A;

U+001A

SUBSTITUTE

27

ESC

^[

\e

&#0027;, &#x001B;

U+001B

ESCAPE

28

FS

^\

&#0028;, &#x001C;

U+001C

INFORMATION SEPARATOR FOUR (file separator)

29

GS

^]

&#0029;, &#x001D;

U+001D

INFORMATION SEPARATOR THREE (group separator)

30

RS

^^

&#0030;, &#x001E;

U+001E

INFORMATION SEPARATOR TWO (record separator)

31

US

^_

&#0031;, &#x001F;

U+001F

INFORMATION SEPARATOR ONE (unit separator)

127

DEL

^?

&#0127;, &#x007F;

U+007F

DELETE

ASCII

Control Picture

HTML Entity

Unicode

Unicode Name

0

&#9216;, &#x2400;

U+2400

SYMBOL FOR NULL

1

&#9217;, &#x2401;

U+2401

SYMBOL FOR START OF HEADING

2

&#9218;, &#x2402;

U+2402

SYMBOL FOR START OF TEXT

3

&#9219;, &#x2403;

U+2403

SYMBOL FOR END OF TEXT

4

&#9220;, &#x2404;

U+2404

SYMBOL FOR END OF TRANSMISSION

5

&#9221;, &#x2405;

U+2405

SYMBOL FOR ENQUIRY

6

&#9222;, &#x2406;

U+2406

SYMBOL FOR ACKNOWLEDGE

7

&#9223;, &#x2407;

U+2407

SYMBOL FOR BELL

8

&#9224;, &#x2408;

U+2408

SYMBOL FOR BACKSPACE

9

&#9225;, &#x2409;

U+2409

SYMBOL FOR HORIZONTAL TABULATION

10

&#9226;, &#x240A;

U+240A

SYMBOL FOR LINE FEED

11

&#9227;, &#x240B;

U+240B

SYMBOL FOR VERTICAL TABULATION

12

&#9228;, &#x240C;

U+240C

SYMBOL FOR FORM FEED

13

&#9229;, &#x240D;

U+240D

SYMBOL FOR CARRIAGE RETURN

14

&#9230;, &#x240E;

U+240E

SYMBOL FOR SHIFT OUT

15

&#9231;, &#x240F;

U+240F

SYMBOL FOR SHIFT IN

16

&#9232;, &#x2410;

U+2410

SYMBOL FOR DATA LINK ESCAPE

17

&#9233;, &#x2411;

U+2411

SYMBOL FOR DEVICE CONTROL ONE

18

&#9234;, &#x2412;

U+2412

SYMBOL FOR DEVICE CONTROL TWO

19

&#9235;, &#x2413;

U+2413

SYMBOL FOR DEVICE CONTROL THREE

20

&#9236;, &#x2414;

U+2414

SYMBOL FOR DEVICE CONTROL FOUR

21

&#9237;, &#x2415;

U+2415

SYMBOL FOR NEGATIVE ACKNOWLEDGE

22

&#9238;, &#x2416;

U+2416

SYMBOL FOR SYNCHRONOUS IDLE

23

&#9239;, &#x2417;

U+2417

SYMBOL FOR END OF TRANSMISSION BLOCK

24

&#9240;, &#x2418;

U+2418

SYMBOL FOR CANCEL

25

&#9241;, &#x2419;

U+2419

SYMBOL FOR END OF MEDIUM

26

&#9242;, &#x241A;

U+241A

SYMBOL FOR SUBSTITUTE

27

&#9243;, &#x241B;

U+241B

SYMBOL FOR ESCAPE

28

&#9244;, &#x241C;

U+241C

SYMBOL FOR FILE SEPARATOR

29

&#9245;, &#x241D;

U+241D

SYMBOL FOR GROUP SEPARATOR

30

&#9246;, &#x241E;

U+241E

SYMBOL FOR RECORD SEPARATOR

31

&#9247;, &#x241F;

U+241F

SYMBOL FOR UNIT SEPARATOR

32

&#9248;, &#x2420;

U+2420

SYMBOL FOR SPACE

127

&#9249;, &#x2421;

U+2421

SYMBOL FOR DELETE


Uppercase Letters#

ASCII

Symbol

HTML Entity

Unicode Code Point

Unicode Name

65

A

&#65;, &#x41;

U+0041

LATIN CAPITAL LETTER A

66

B

&#66;, &#x42;

U+0042

LATIN CAPITAL LETTER B

67

C

&#67;, &#x43;

U+0043

LATIN CAPITAL LETTER C

68

D

&#68;, &#x44;

U+0044

LATIN CAPITAL LETTER D

69

E

&#69;, &#x45;

U+0045

LATIN CAPITAL LETTER E

70

F

&#70;, &#x46;

U+0046

LATIN CAPITAL LETTER F

71

G

&#71;, &#x47;

U+0047

LATIN CAPITAL LETTER G

72

H

&#72;, &#x48;

U+0048

LATIN CAPITAL LETTER H

73

I

&#73;, &#x49;

U+0049

LATIN CAPITAL LETTER I

74

J

&#74;, &#x4A;

U+004A

LATIN CAPITAL LETTER J

75

K

&#75;, &#x4B;

U+004B

LATIN CAPITAL LETTER K

76

L

&#76;, &#x4C;

U+004C

LATIN CAPITAL LETTER L

77

M

&#77;, &#x4D;

U+004D

LATIN CAPITAL LETTER M

78

N

&#78;, &#x4E;

U+004E

LATIN CAPITAL LETTER N

79

O

&#79;, &#x4F;

U+004F

LATIN CAPITAL LETTER O

80

P

&#80;, &#x50;

U+0050

LATIN CAPITAL LETTER P

81

Q

&#81;, &#x51;

U+0051

LATIN CAPITAL LETTER Q

82

R

&#82;, &#x52;

U+0052

LATIN CAPITAL LETTER R

83

S

&#83;, &#x53;

U+0053

LATIN CAPITAL LETTER S

84

T

&#84;, &#x54;

U+0054

LATIN CAPITAL LETTER T

85

U

&#85;, &#x55;

U+0055

LATIN CAPITAL LETTER U

86

V

&#86;, &#x56;

U+0056

LATIN CAPITAL LETTER V

87

W

&#87;, &#x57;

U+0057

LATIN CAPITAL LETTER W

88

X

&#88;, &#x58;

U+0058

LATIN CAPITAL LETTER X

89

Y

&#89;, &#x59;

U+0059

LATIN CAPITAL LETTER Y

90

Z

&#90;, &#x5A;

U+005A

LATIN CAPITAL LETTER Z

print_code_point_information(string.ascii_uppercase)
ABCDEFGHIJKLMNOPQRSTUVWXYZ
26

Glyph      Code Point Hex        Bytes                Category   Named Entity         Name      
A          \U00000041 000041     b'A'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER A
B          \U00000042 000042     b'B'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER B
C          \U00000043 000043     b'C'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER C
D          \U00000044 000044     b'D'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER D
E          \U00000045 000045     b'E'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER E
F          \U00000046 000046     b'F'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER F
G          \U00000047 000047     b'G'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER G
H          \U00000048 000048     b'H'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER H
I          \U00000049 000049     b'I'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER I
J          \U0000004A 00004A     b'J'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER J
K          \U0000004B 00004B     b'K'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER K
L          \U0000004C 00004C     b'L'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER L
M          \U0000004D 00004D     b'M'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER M
N          \U0000004E 00004E     b'N'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER N
O          \U0000004F 00004F     b'O'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER O
P          \U00000050 000050     b'P'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER P
Q          \U00000051 000051     b'Q'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER Q
R          \U00000052 000052     b'R'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER R
S          \U00000053 000053     b'S'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER S
T          \U00000054 000054     b'T'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER T
U          \U00000055 000055     b'U'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER U
V          \U00000056 000056     b'V'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER V
W          \U00000057 000057     b'W'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER W
X          \U00000058 000058     b'X'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER X
Y          \U00000059 000059     b'Y'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER Y
Z          \U0000005A 00005A     b'Z'                 Lu         NO NAMED ENTITY      LATIN CAPITAL LETTER Z

Lowercase Letters#

ASCII

Symbol

HTML Entity

Unicode Code Point

Unicode Name

97

a

&#097;, &#x61;

U+0061

LATIN SMALL LETTER A

98

b

&#098;, &#x62;

U+0062

LATIN SMALL LETTER B

99

c

&#099;, &#x63;

U+0063

LATIN SMALL LETTER C

100

d

&#100;, &#x64;

U+0064

LATIN SMALL LETTER D

101

e

&#101;, &#x65;

U+0065

LATIN SMALL LETTER E

102

f

&#102;, &#x66;

U+0066

LATIN SMALL LETTER F

103

g

&#103;, &#x67;

U+0067

LATIN SMALL LETTER G

104

h

&#104;, &#x68;

U+0068

LATIN SMALL LETTER H

105

i

&#105;, &#x69;

U+0069

LATIN SMALL LETTER I

106

j

&#106;, &#x6A;

U+006A

LATIN SMALL LETTER J

107

k

&#107;, &#x6B;

U+006B

LATIN SMALL LETTER K

108

l

&#108;, &#x6C;

U+006C

LATIN SMALL LETTER L

109

m

&#109;, &#x6D;

U+006D

LATIN SMALL LETTER M

110

n

&#110;, &#x6E;

U+006E

LATIN SMALL LETTER N

111

o

&#111;, &#x6F;

U+006F

LATIN SMALL LETTER O

112

p

&#112;, &#x70;

U+0070

LATIN SMALL LETTER P

113

q

&#113;, &#x71;

U+0071

LATIN SMALL LETTER Q

114

r

&#114;, &#x72;

U+0072

LATIN SMALL LETTER R

115

s

&#115;, &#x73;

U+0073

LATIN SMALL LETTER S

116

t

&#116;, &#x74;

U+0074

LATIN SMALL LETTER T

117

u

&#117;, &#x75;

U+0075

LATIN SMALL LETTER U

118

v

&#118;, &#x76;

U+0076

LATIN SMALL LETTER V

119

w

&#119;, &#x77;

U+0077

LATIN SMALL LETTER W

120

x

&#120;, &#x78;

U+0078

LATIN SMALL LETTER X

121

y

&#121;, &#x79;

U+0079

LATIN SMALL LETTER Y

122

z

&#122;, &#x7A;

U+007A

LATIN SMALL LETTER Z

print_code_point_information(string.ascii_lowercase)
abcdefghijklmnopqrstuvwxyz
26

Glyph      Code Point Hex        Bytes                Category   Named Entity         Name      
a          \U00000061 000061     b'a'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER A
b          \U00000062 000062     b'b'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER B
c          \U00000063 000063     b'c'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER C
d          \U00000064 000064     b'd'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER D
e          \U00000065 000065     b'e'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER E
f          \U00000066 000066     b'f'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER F
g          \U00000067 000067     b'g'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER G
h          \U00000068 000068     b'h'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER H
i          \U00000069 000069     b'i'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER I
j          \U0000006A 00006A     b'j'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER J
k          \U0000006B 00006B     b'k'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER K
l          \U0000006C 00006C     b'l'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER L
m          \U0000006D 00006D     b'm'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER M
n          \U0000006E 00006E     b'n'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER N
o          \U0000006F 00006F     b'o'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER O
p          \U00000070 000070     b'p'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER P
q          \U00000071 000071     b'q'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER Q
r          \U00000072 000072     b'r'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER R
s          \U00000073 000073     b's'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER S
t          \U00000074 000074     b't'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER T
u          \U00000075 000075     b'u'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER U
v          \U00000076 000076     b'v'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER V
w          \U00000077 000077     b'w'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER W
x          \U00000078 000078     b'x'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER X
y          \U00000079 000079     b'y'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER Y
z          \U0000007A 00007A     b'z'                 Ll         NO NAMED ENTITY      LATIN SMALL LETTER Z
# print_code_point_information(string.ascii_letters)

Digits#

ASCII

Symbol

HTML Entity

Unicode Code Point

Unicode Name

48

0

&#48;, &#x30;

U+0030

DIGIT ZERO

49

1

&#49;, &#x31;

U+0031

DIGIT ONE

50

2

&#50;, &#x32;

U+0032

DIGIT TWO

51

3

&#51;, &#x33;

U+0033

DIGIT THREE

52

4

&#52;, &#x34;

U+0034

DIGIT FOUR

53

5

&#53;, &#x35;

U+0035

DIGIT FIVE

54

6

&#54;, &#x36;

U+0036

DIGIT SIX

55

7

&#55;, &#x37;

U+0037

DIGIT SEVEN

56

8

&#56;, &#x38;

U+0038

DIGIT EIGHT

57

9

&#57;, &#x39;

U+0039

DIGIT NINE

print_code_point_information(string.digits)
0123456789
10

Glyph      Code Point Hex        Bytes                Category   Named Entity         Name      
0          \U00000030 000030     b'0'                 Nd         NO NAMED ENTITY      DIGIT ZERO
1          \U00000031 000031     b'1'                 Nd         NO NAMED ENTITY      DIGIT ONE
2          \U00000032 000032     b'2'                 Nd         NO NAMED ENTITY      DIGIT TWO
3          \U00000033 000033     b'3'                 Nd         NO NAMED ENTITY      DIGIT THREE
4          \U00000034 000034     b'4'                 Nd         NO NAMED ENTITY      DIGIT FOUR
5          \U00000035 000035     b'5'                 Nd         NO NAMED ENTITY      DIGIT FIVE
6          \U00000036 000036     b'6'                 Nd         NO NAMED ENTITY      DIGIT SIX
7          \U00000037 000037     b'7'                 Nd         NO NAMED ENTITY      DIGIT SEVEN
8          \U00000038 000038     b'8'                 Nd         NO NAMED ENTITY      DIGIT EIGHT
9          \U00000039 000039     b'9'                 Nd         NO NAMED ENTITY      DIGIT NINE

Punctuation#

ASCII

Symbol

HTML Entity

Unicode Code Point

Unicode Name

32

&#32;, &#x20;

U+0020

SPACE [ w ]

33

!

&#33;, &#x21;, &excl;

U+0021

EXCLAMATION MARK (factorial, bang) [ w ]

34

&#34;, &#x22;, &quot;

U+0022

QUOTATION MARK (double quote) [ w ]

35

#

&#35;, &#x23;

U+0023

NUMBER SIGN (pound sign, hash) [ w ]

36

$

&#36;, &#x24;, &dollar;

U+0024

DOLLAR SIGN [ w ]

37

%

&#37;, &#x25;, &percnt;

U+0025

PERCENT SIGN [ w ]

38

&

&#38;, &#x26;, &amp;

U+0026

AMPERSAND (and) [ w ]

39

&#39;, &#x27;, &apos;

U+0027

APOSTROPHE (single quote)

40

(

&#40;, &#x28;

U+0028

LEFT PARENTHESIS (opening parenthesis) [ w ]

41

)

&#41;, &#x29;

U+0029

RIGHT PARENTHESIS (closing parenthesis) [ w ]

42

*

&#42;, &#x2A;

U+002A

ASTERISK (star) [ w ] (ἀστερίσκος “little star”)

43

+

&#43;, &#x2B;, &plus;

U+002B

PLUS SIGN [ w ]

44

,

&#44;, &#x2C;, &comma;

U+002C

COMMA [ w ]

45

-

&#45;, &#x2D;, &hyphen;

U+002D

HYPHEN-MINUS [ w ] (hyphen [ w ], dash [ w ], minus sign [ w ])

46

.

&#46;, &#x2E;, &period;

U+002E

FULL STOP (period, dot, decimal point) [ w ]

47

/

&#47;, &#x2F;

U+002F

SOLIDUS (slash, forward slash) [ w ]

58

:

&#58;, &#x3A;, &colon;

U+003A

COLON [ w ]

59

;

&#59;, &#x3B;, &semi;

U+003B

SEMICOLON [ w ]

60

<

&#60;, &#x3C;, &lt;

U+003C

LESS-THAN SIGN [ w ]

61

=

&#61;, &#x3D;, &equals;

U+003D

EQUALS SIGN [ w ]

62

>

&#62;, &#x3E;, &gt;

U+003E

GREATER-THAN SIGN [ w ]

63

?

&#63;, &#x3F;, &quest;

U+003F

QUESTION MARK [ w ]

64

@

&#64;, &#x40;

U+0040

COMMERCIAL AT (at sign) [ w ]

91

[

&#91;, &#x5B;

U+005B

LEFT SQUARE BRACKET (opening square bracket) [ w ]

92

\

&#92;, &#x5C;

U+005C

REVERSE SOLIDUS (backslash) [ w ]

93

]

&#93;, &#x5D;

U+005D

RIGHT SQUARE BRACKET (closing square bracket) [ w ]

94

^

&#94;, &#x5E;

U+005E

CIRCUMFLEX ACCENT (“caret”, “hat”) [ w ]

95

_

&#95;, &#x5F;

U+005F

LOW LINE (“underscore”) [ w ]

96

`

&#96;, &#x60;

U+0060

GRAVE ACCENT (backtick, backquote) [ w ]

123

{

&#123;, &#x7B;

U+00&B

LEFT CURLY BRACKET (opening curly bracket, left brace) [ w ]

124

|

&#124;, &#x7C;

U+00&C

VERTICAL LINE (vertical bar, pipe) [ w ]

125

}

&#125;, &#x7D;

U+00&D

RIGHT CURLY BRACKET (closing curly bracket, right brace) [ w ]

126

~

&#126;, &#x7E;

U+00&E

TILDE [ w ]

Dashes

Symbol

HTML Entity

Unicode Code Point

Unicode Name

-

&#x002D;

U+002D

HYPHEN-MINUS (hyphen, dash, minus sign)

&#x2010;

U+2010

HYPHEN

&#x2012;

U+2012

FIGURE DASH

&#x2013;

U+2013

EN DASH

&#x2014;

U+2014

EM DASH

&#x2015;

U+2015

HORIZONTAL BAR

&#x2212;

U+2212

MINUS SIGN

Symbol

HTML Entity

Unicode Code Point

Unicode Name

­

&#x00AD;

U+00AD

SOFT HYPHEN

˗

&#x02D7;

U+02D7

MODIFIER LETTER MINUS SIGN

&#x2011;

U+2011

NON-BREAKING HYPHEN

&#x2027;

U+2027

HYPHENATION POINT

&#x2043;

U+2043

HYPHEN BULLET

𐆑

&#x10191;

U+10191

ROMAN UNCIA SIGN

Quotation Marks

Symbol

HTML Entity

Unicode Code Point

Unicode Name

&#x2018;

U+2018

LEFT SINGLE QUOTATION MARK

&#x2019;

U+2019

RIGHT SINGLE QUOTATION MARK

&#x201C;

U+201C

LEFT DOUBLE QUOTATION MARK

&#x201D;

U+201D

RIGHT DOUBLE QUOTATION MARK

Symbol

HTML Entity

Unicode Code Point

Unicode Name

&#x0022;

U+0022

QUOTATION MARK (double quote)

ʹ

&#x02B9;

U+02B9

MODIFIER LETTER PRIME

ʺ

&#x02BA;

U+02BA

MODIFIER LETTER DOUBLE PRIME

ʼ

&#x02BC;

U+02BC

MODIFIER LETTER APOSTROPHE

ˈ

&#x02C8;

U+02C8

MODIFIER LETTER VERTICAL LINE

˝

&#x02DD;

U+02DD

DOUBLE ACCUTE ACCENT

ˮ

&#x02EE;

U+02EE

MODIFIER LETTER DOUBLE APOSTROPHE

́

&#x0301;

U+0301

COMBINING ACUTE ACCENT

̋

&#x030B;

U+030B

COMBINING DOUBLE ACUTE ACCENT

̍

&#x030D;

U+030D

COMBINING VERTICAL LINE ABOVE

̎

&#x030E;

U+030E

COMBINING DOUBLE VERTICAL LINE ABOVE

׳

&#x05F3;

U+05F3

HEBREW PUNCTUATION GERESH

״

&#x05F4;

U+05F4

HEBREW PUNCTUATION GERSHAYIM

&#x2032;

U+2032

PRIME

&#x2033;

U+2033

DOUBLE PRIME

&#x3003;

U+3003

DITTO MARK

&#xA78C;

U+A78C

LATIN SMALL LETTER SALTILLO

print_code_point_information(string.punctuation)
!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
32

Glyph      Code Point Hex        Bytes                Category   Named Entity         Name      
!          \U00000021 000021     b'!'                 Po         NO NAMED ENTITY      EXCLAMATION MARK
"          \U00000022 000022     b'"'                 Po         quot                 QUOTATION MARK
#          \U00000023 000023     b'#'                 Po         NO NAMED ENTITY      NUMBER SIGN
$          \U00000024 000024     b'$'                 Sc         NO NAMED ENTITY      DOLLAR SIGN
%          \U00000025 000025     b'%'                 Po         NO NAMED ENTITY      PERCENT SIGN
&          \U00000026 000026     b'&'                 Po         amp                  AMPERSAND
'          \U00000027 000027     b"'"                 Po         NO NAMED ENTITY      APOSTROPHE
(          \U00000028 000028     b'('                 Ps         NO NAMED ENTITY      LEFT PARENTHESIS
)          \U00000029 000029     b')'                 Pe         NO NAMED ENTITY      RIGHT PARENTHESIS
*          \U0000002A 00002A     b'*'                 Po         NO NAMED ENTITY      ASTERISK
+          \U0000002B 00002B     b'+'                 Sm         NO NAMED ENTITY      PLUS SIGN
,          \U0000002C 00002C     b','                 Po         NO NAMED ENTITY      COMMA
-          \U0000002D 00002D     b'-'                 Pd         NO NAMED ENTITY      HYPHEN-MINUS
.          \U0000002E 00002E     b'.'                 Po         NO NAMED ENTITY      FULL STOP
/          \U0000002F 00002F     b'/'                 Po         NO NAMED ENTITY      SOLIDUS
:          \U0000003A 00003A     b':'                 Po         NO NAMED ENTITY      COLON
;          \U0000003B 00003B     b';'                 Po         NO NAMED ENTITY      SEMICOLON
<          \U0000003C 00003C     b'<'                 Sm         lt                   LESS-THAN SIGN
=          \U0000003D 00003D     b'='                 Sm         NO NAMED ENTITY      EQUALS SIGN
>          \U0000003E 00003E     b'>'                 Sm         gt                   GREATER-THAN SIGN
?          \U0000003F 00003F     b'?'                 Po         NO NAMED ENTITY      QUESTION MARK
@          \U00000040 000040     b'@'                 Po         NO NAMED ENTITY      COMMERCIAL AT
[          \U0000005B 00005B     b'['                 Ps         NO NAMED ENTITY      LEFT SQUARE BRACKET
\          \U0000005C 00005C     b'\\'                Po         NO NAMED ENTITY      REVERSE SOLIDUS
]          \U0000005D 00005D     b']'                 Pe         NO NAMED ENTITY      RIGHT SQUARE BRACKET
^          \U0000005E 00005E     b'^'                 Sk         NO NAMED ENTITY      CIRCUMFLEX ACCENT
_          \U0000005F 00005F     b'_'                 Pc         NO NAMED ENTITY      LOW LINE
`          \U00000060 000060     b'`'                 Sk         NO NAMED ENTITY      GRAVE ACCENT
{          \U0000007B 00007B     b'{'                 Ps         NO NAMED ENTITY      LEFT CURLY BRACKET
|          \U0000007C 00007C     b'|'                 Sm         NO NAMED ENTITY      VERTICAL LINE
}          \U0000007D 00007D     b'}'                 Pe         NO NAMED ENTITY      RIGHT CURLY BRACKET
~          \U0000007E 00007E     b'~'                 Sm         NO NAMED ENTITY      TILDE

Whitespace#

print_code_point_information(string.whitespace[0])
 
1

Glyph      Code Point Hex        Bytes                Category   Named Entity         Name      
           \U00000020 000020     b' '                 Zs         NO NAMED ENTITY      SPACE
string.whitespace[1:]
'\t\n\r\x0b\x0c'

# print_code_point_information(string.printable)

# codec `ascii` only first 128

for i in range(256):
  i = chr(i)
  try:
    print(f"{i:<10} {str(i.encode('ascii')):<10} {unicodedata.name(i)}")
  except (UnicodeEncodeError, ValueError) as e:
    print(e)
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
           b' '       SPACE
!          b'!'       EXCLAMATION MARK
"          b'"'       QUOTATION MARK
#          b'#'       NUMBER SIGN
$          b'$'       DOLLAR SIGN
%          b'%'       PERCENT SIGN
&          b'&'       AMPERSAND
'          b"'"       APOSTROPHE
(          b'('       LEFT PARENTHESIS
)          b')'       RIGHT PARENTHESIS
*          b'*'       ASTERISK
+          b'+'       PLUS SIGN
,          b','       COMMA
-          b'-'       HYPHEN-MINUS
.          b'.'       FULL STOP
/          b'/'       SOLIDUS
0          b'0'       DIGIT ZERO
1          b'1'       DIGIT ONE
2          b'2'       DIGIT TWO
3          b'3'       DIGIT THREE
4          b'4'       DIGIT FOUR
5          b'5'       DIGIT FIVE
6          b'6'       DIGIT SIX
7          b'7'       DIGIT SEVEN
8          b'8'       DIGIT EIGHT
9          b'9'       DIGIT NINE
:          b':'       COLON
;          b';'       SEMICOLON
<          b'<'       LESS-THAN SIGN
=          b'='       EQUALS SIGN
>          b'>'       GREATER-THAN SIGN
?          b'?'       QUESTION MARK
@          b'@'       COMMERCIAL AT
A          b'A'       LATIN CAPITAL LETTER A
B          b'B'       LATIN CAPITAL LETTER B
C          b'C'       LATIN CAPITAL LETTER C
D          b'D'       LATIN CAPITAL LETTER D
E          b'E'       LATIN CAPITAL LETTER E
F          b'F'       LATIN CAPITAL LETTER F
G          b'G'       LATIN CAPITAL LETTER G
H          b'H'       LATIN CAPITAL LETTER H
I          b'I'       LATIN CAPITAL LETTER I
J          b'J'       LATIN CAPITAL LETTER J
K          b'K'       LATIN CAPITAL LETTER K
L          b'L'       LATIN CAPITAL LETTER L
M          b'M'       LATIN CAPITAL LETTER M
N          b'N'       LATIN CAPITAL LETTER N
O          b'O'       LATIN CAPITAL LETTER O
P          b'P'       LATIN CAPITAL LETTER P
Q          b'Q'       LATIN CAPITAL LETTER Q
R          b'R'       LATIN CAPITAL LETTER R
S          b'S'       LATIN CAPITAL LETTER S
T          b'T'       LATIN CAPITAL LETTER T
U          b'U'       LATIN CAPITAL LETTER U
V          b'V'       LATIN CAPITAL LETTER V
W          b'W'       LATIN CAPITAL LETTER W
X          b'X'       LATIN CAPITAL LETTER X
Y          b'Y'       LATIN CAPITAL LETTER Y
Z          b'Z'       LATIN CAPITAL LETTER Z
[          b'['       LEFT SQUARE BRACKET
\          b'\\'      REVERSE SOLIDUS
]          b']'       RIGHT SQUARE BRACKET
^          b'^'       CIRCUMFLEX ACCENT
_          b'_'       LOW LINE
`          b'`'       GRAVE ACCENT
a          b'a'       LATIN SMALL LETTER A
b          b'b'       LATIN SMALL LETTER B
c          b'c'       LATIN SMALL LETTER C
d          b'd'       LATIN SMALL LETTER D
e          b'e'       LATIN SMALL LETTER E
f          b'f'       LATIN SMALL LETTER F
g          b'g'       LATIN SMALL LETTER G
h          b'h'       LATIN SMALL LETTER H
i          b'i'       LATIN SMALL LETTER I
j          b'j'       LATIN SMALL LETTER J
k          b'k'       LATIN SMALL LETTER K
l          b'l'       LATIN SMALL LETTER L
m          b'm'       LATIN SMALL LETTER M
n          b'n'       LATIN SMALL LETTER N
o          b'o'       LATIN SMALL LETTER O
p          b'p'       LATIN SMALL LETTER P
q          b'q'       LATIN SMALL LETTER Q
r          b'r'       LATIN SMALL LETTER R
s          b's'       LATIN SMALL LETTER S
t          b't'       LATIN SMALL LETTER T
u          b'u'       LATIN SMALL LETTER U
v          b'v'       LATIN SMALL LETTER V
w          b'w'       LATIN SMALL LETTER W
x          b'x'       LATIN SMALL LETTER X
y          b'y'       LATIN SMALL LETTER Y
z          b'z'       LATIN SMALL LETTER Z
{          b'{'       LEFT CURLY BRACKET
|          b'|'       VERTICAL LINE
}          b'}'       RIGHT CURLY BRACKET
~          b'~'       TILDE
no such name
'ascii' codec can't encode character '\x80' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x81' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x82' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x83' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x84' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x85' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x86' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x87' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x88' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x89' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x8a' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x8b' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x8c' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x8d' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x8e' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x8f' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x90' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x91' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x92' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x93' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x94' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x95' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x96' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x97' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x98' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x99' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x9a' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x9b' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x9c' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x9d' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x9e' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\x9f' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xa0' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xa1' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xa2' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xa3' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xa4' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xa5' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xa6' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xa7' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xa8' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xa9' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xaa' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xab' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xac' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xad' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xae' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xaf' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xb0' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xb1' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xb2' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xb3' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xb4' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xb5' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xb6' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xb7' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xb8' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xb9' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xba' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xbb' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xbc' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xbd' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xbe' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xbf' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xc0' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xc1' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xc2' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xc3' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xc4' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xc5' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xc6' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xc7' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xc8' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xc9' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xca' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xcb' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xcc' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xcd' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xce' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xcf' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xd0' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xd1' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xd2' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xd3' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xd4' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xd5' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xd6' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xd7' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xd8' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xd9' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xda' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xdb' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xdc' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xdd' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xde' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xdf' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xe0' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xe1' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xe2' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xe3' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xe4' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xe5' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xe6' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xe7' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xe8' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xe9' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xea' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xeb' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xec' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xed' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xee' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xef' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xf0' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xf1' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xf2' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xf3' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xf4' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xf5' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xf6' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xf7' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xf8' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xf9' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xfa' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xfb' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xfc' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xfd' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xfe' in position 0: ordinal not in range(128)
'ascii' codec can't encode character '\xff' in position 0: ordinal not in range(128)
# ASCII requires no more than one byte of space.
all(len(chr(i).encode('ascii')) == 1 for i in range(128))
True

Extended ASCII#

Code points 0-255 are mapped to bytes 0x0-0xff.

# codec `latin-1` only first 256

for i in range(257):
  i = chr(i)
  try:
    print(f"{i:<10} {str(i.encode('latin-1')):<10} {unicodedata.name(i)}")
  except (UnicodeEncodeError, ValueError) as e:
    print(e)
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
           b' '       SPACE
!          b'!'       EXCLAMATION MARK
"          b'"'       QUOTATION MARK
#          b'#'       NUMBER SIGN
$          b'$'       DOLLAR SIGN
%          b'%'       PERCENT SIGN
&          b'&'       AMPERSAND
'          b"'"       APOSTROPHE
(          b'('       LEFT PARENTHESIS
)          b')'       RIGHT PARENTHESIS
*          b'*'       ASTERISK
+          b'+'       PLUS SIGN
,          b','       COMMA
-          b'-'       HYPHEN-MINUS
.          b'.'       FULL STOP
/          b'/'       SOLIDUS
0          b'0'       DIGIT ZERO
1          b'1'       DIGIT ONE
2          b'2'       DIGIT TWO
3          b'3'       DIGIT THREE
4          b'4'       DIGIT FOUR
5          b'5'       DIGIT FIVE
6          b'6'       DIGIT SIX
7          b'7'       DIGIT SEVEN
8          b'8'       DIGIT EIGHT
9          b'9'       DIGIT NINE
:          b':'       COLON
;          b';'       SEMICOLON
<          b'<'       LESS-THAN SIGN
=          b'='       EQUALS SIGN
>          b'>'       GREATER-THAN SIGN
?          b'?'       QUESTION MARK
@          b'@'       COMMERCIAL AT
A          b'A'       LATIN CAPITAL LETTER A
B          b'B'       LATIN CAPITAL LETTER B
C          b'C'       LATIN CAPITAL LETTER C
D          b'D'       LATIN CAPITAL LETTER D
E          b'E'       LATIN CAPITAL LETTER E
F          b'F'       LATIN CAPITAL LETTER F
G          b'G'       LATIN CAPITAL LETTER G
H          b'H'       LATIN CAPITAL LETTER H
I          b'I'       LATIN CAPITAL LETTER I
J          b'J'       LATIN CAPITAL LETTER J
K          b'K'       LATIN CAPITAL LETTER K
L          b'L'       LATIN CAPITAL LETTER L
M          b'M'       LATIN CAPITAL LETTER M
N          b'N'       LATIN CAPITAL LETTER N
O          b'O'       LATIN CAPITAL LETTER O
P          b'P'       LATIN CAPITAL LETTER P
Q          b'Q'       LATIN CAPITAL LETTER Q
R          b'R'       LATIN CAPITAL LETTER R
S          b'S'       LATIN CAPITAL LETTER S
T          b'T'       LATIN CAPITAL LETTER T
U          b'U'       LATIN CAPITAL LETTER U
V          b'V'       LATIN CAPITAL LETTER V
W          b'W'       LATIN CAPITAL LETTER W
X          b'X'       LATIN CAPITAL LETTER X
Y          b'Y'       LATIN CAPITAL LETTER Y
Z          b'Z'       LATIN CAPITAL LETTER Z
[          b'['       LEFT SQUARE BRACKET
\          b'\\'      REVERSE SOLIDUS
]          b']'       RIGHT SQUARE BRACKET
^          b'^'       CIRCUMFLEX ACCENT
_          b'_'       LOW LINE
`          b'`'       GRAVE ACCENT
a          b'a'       LATIN SMALL LETTER A
b          b'b'       LATIN SMALL LETTER B
c          b'c'       LATIN SMALL LETTER C
d          b'd'       LATIN SMALL LETTER D
e          b'e'       LATIN SMALL LETTER E
f          b'f'       LATIN SMALL LETTER F
g          b'g'       LATIN SMALL LETTER G
h          b'h'       LATIN SMALL LETTER H
i          b'i'       LATIN SMALL LETTER I
j          b'j'       LATIN SMALL LETTER J
k          b'k'       LATIN SMALL LETTER K
l          b'l'       LATIN SMALL LETTER L
m          b'm'       LATIN SMALL LETTER M
n          b'n'       LATIN SMALL LETTER N
o          b'o'       LATIN SMALL LETTER O
p          b'p'       LATIN SMALL LETTER P
q          b'q'       LATIN SMALL LETTER Q
r          b'r'       LATIN SMALL LETTER R
s          b's'       LATIN SMALL LETTER S
t          b't'       LATIN SMALL LETTER T
u          b'u'       LATIN SMALL LETTER U
v          b'v'       LATIN SMALL LETTER V
w          b'w'       LATIN SMALL LETTER W
x          b'x'       LATIN SMALL LETTER X
y          b'y'       LATIN SMALL LETTER Y
z          b'z'       LATIN SMALL LETTER Z
{          b'{'       LEFT CURLY BRACKET
|          b'|'       VERTICAL LINE
}          b'}'       RIGHT CURLY BRACKET
~          b'~'       TILDE
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
no such name
           b'\xa0'    NO-BREAK SPACE
¡          b'\xa1'    INVERTED EXCLAMATION MARK
¢          b'\xa2'    CENT SIGN
£          b'\xa3'    POUND SIGN
¤          b'\xa4'    CURRENCY SIGN
¥          b'\xa5'    YEN SIGN
¦          b'\xa6'    BROKEN BAR
§          b'\xa7'    SECTION SIGN
¨          b'\xa8'    DIAERESIS
©          b'\xa9'    COPYRIGHT SIGN
ª          b'\xaa'    FEMININE ORDINAL INDICATOR
«          b'\xab'    LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
¬          b'\xac'    NOT SIGN
­          b'\xad'    SOFT HYPHEN
®          b'\xae'    REGISTERED SIGN
¯          b'\xaf'    MACRON
°          b'\xb0'    DEGREE SIGN
±          b'\xb1'    PLUS-MINUS SIGN
²          b'\xb2'    SUPERSCRIPT TWO
³          b'\xb3'    SUPERSCRIPT THREE
´          b'\xb4'    ACUTE ACCENT
µ          b'\xb5'    MICRO SIGN
¶          b'\xb6'    PILCROW SIGN
·          b'\xb7'    MIDDLE DOT
¸          b'\xb8'    CEDILLA
¹          b'\xb9'    SUPERSCRIPT ONE
º          b'\xba'    MASCULINE ORDINAL INDICATOR
»          b'\xbb'    RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
¼          b'\xbc'    VULGAR FRACTION ONE QUARTER
½          b'\xbd'    VULGAR FRACTION ONE HALF
¾          b'\xbe'    VULGAR FRACTION THREE QUARTERS
¿          b'\xbf'    INVERTED QUESTION MARK
À          b'\xc0'    LATIN CAPITAL LETTER A WITH GRAVE
Á          b'\xc1'    LATIN CAPITAL LETTER A WITH ACUTE
          b'\xc2'    LATIN CAPITAL LETTER A WITH CIRCUMFLEX
à         b'\xc3'    LATIN CAPITAL LETTER A WITH TILDE
Ä          b'\xc4'    LATIN CAPITAL LETTER A WITH DIAERESIS
Å          b'\xc5'    LATIN CAPITAL LETTER A WITH RING ABOVE
Æ          b'\xc6'    LATIN CAPITAL LETTER AE
Ç          b'\xc7'    LATIN CAPITAL LETTER C WITH CEDILLA
È          b'\xc8'    LATIN CAPITAL LETTER E WITH GRAVE
É          b'\xc9'    LATIN CAPITAL LETTER E WITH ACUTE
Ê          b'\xca'    LATIN CAPITAL LETTER E WITH CIRCUMFLEX
Ë          b'\xcb'    LATIN CAPITAL LETTER E WITH DIAERESIS
Ì          b'\xcc'    LATIN CAPITAL LETTER I WITH GRAVE
Í          b'\xcd'    LATIN CAPITAL LETTER I WITH ACUTE
Î          b'\xce'    LATIN CAPITAL LETTER I WITH CIRCUMFLEX
Ï          b'\xcf'    LATIN CAPITAL LETTER I WITH DIAERESIS
Ð          b'\xd0'    LATIN CAPITAL LETTER ETH
Ñ          b'\xd1'    LATIN CAPITAL LETTER N WITH TILDE
Ò          b'\xd2'    LATIN CAPITAL LETTER O WITH GRAVE
Ó          b'\xd3'    LATIN CAPITAL LETTER O WITH ACUTE
Ô          b'\xd4'    LATIN CAPITAL LETTER O WITH CIRCUMFLEX
Õ          b'\xd5'    LATIN CAPITAL LETTER O WITH TILDE
Ö          b'\xd6'    LATIN CAPITAL LETTER O WITH DIAERESIS
×          b'\xd7'    MULTIPLICATION SIGN
Ø          b'\xd8'    LATIN CAPITAL LETTER O WITH STROKE
Ù          b'\xd9'    LATIN CAPITAL LETTER U WITH GRAVE
Ú          b'\xda'    LATIN CAPITAL LETTER U WITH ACUTE
Û          b'\xdb'    LATIN CAPITAL LETTER U WITH CIRCUMFLEX
Ü          b'\xdc'    LATIN CAPITAL LETTER U WITH DIAERESIS
Ý          b'\xdd'    LATIN CAPITAL LETTER Y WITH ACUTE
Þ          b'\xde'    LATIN CAPITAL LETTER THORN
ß          b'\xdf'    LATIN SMALL LETTER SHARP S
à          b'\xe0'    LATIN SMALL LETTER A WITH GRAVE
á          b'\xe1'    LATIN SMALL LETTER A WITH ACUTE
â          b'\xe2'    LATIN SMALL LETTER A WITH CIRCUMFLEX
ã          b'\xe3'    LATIN SMALL LETTER A WITH TILDE
ä          b'\xe4'    LATIN SMALL LETTER A WITH DIAERESIS
å          b'\xe5'    LATIN SMALL LETTER A WITH RING ABOVE
æ          b'\xe6'    LATIN SMALL LETTER AE
ç          b'\xe7'    LATIN SMALL LETTER C WITH CEDILLA
è          b'\xe8'    LATIN SMALL LETTER E WITH GRAVE
é          b'\xe9'    LATIN SMALL LETTER E WITH ACUTE
ê          b'\xea'    LATIN SMALL LETTER E WITH CIRCUMFLEX
ë          b'\xeb'    LATIN SMALL LETTER E WITH DIAERESIS
ì          b'\xec'    LATIN SMALL LETTER I WITH GRAVE
í          b'\xed'    LATIN SMALL LETTER I WITH ACUTE
î          b'\xee'    LATIN SMALL LETTER I WITH CIRCUMFLEX
ï          b'\xef'    LATIN SMALL LETTER I WITH DIAERESIS
ð          b'\xf0'    LATIN SMALL LETTER ETH
ñ          b'\xf1'    LATIN SMALL LETTER N WITH TILDE
ò          b'\xf2'    LATIN SMALL LETTER O WITH GRAVE
ó          b'\xf3'    LATIN SMALL LETTER O WITH ACUTE
ô          b'\xf4'    LATIN SMALL LETTER O WITH CIRCUMFLEX
õ          b'\xf5'    LATIN SMALL LETTER O WITH TILDE
ö          b'\xf6'    LATIN SMALL LETTER O WITH DIAERESIS
÷          b'\xf7'    DIVISION SIGN
ø          b'\xf8'    LATIN SMALL LETTER O WITH STROKE
ù          b'\xf9'    LATIN SMALL LETTER U WITH GRAVE
ú          b'\xfa'    LATIN SMALL LETTER U WITH ACUTE
û          b'\xfb'    LATIN SMALL LETTER U WITH CIRCUMFLEX
ü          b'\xfc'    LATIN SMALL LETTER U WITH DIAERESIS
ý          b'\xfd'    LATIN SMALL LETTER Y WITH ACUTE
þ          b'\xfe'    LATIN SMALL LETTER THORN
ÿ          b'\xff'    LATIN SMALL LETTER Y WITH DIAERESIS
'latin-1' codec can't encode character '\u0100' in position 0: ordinal not in range(256)
# Extended ASCII requires no more than one byte of space.
all(len(chr(i).encode('latin-1')) == 1 for i in range(256))
True

UTF-8#

  • 8-bit encoding: this means that there are no issues with byte order and no BOM is required

  • each byte consists of two parts

    • marker bits (most significant bits): a sequence of zero to four 1 bits followed by a 0 bit

    • payload bits

  • the LSB of the Unicode character is the rightmost x bit

Range

Encoding

U-00000000...U-0000007F

0xxxxxxx

U-00000080...U-000007FF

110xxxxx 10xxxxxx

U-00000800...U-0000FFFF

1110xxxx 10xxxxxx 10xxxxxx

U-00010000...U-0010FFFF

11110xxx 10xxxxxx 10xxxxxx 10xxxxxx


File Formats#

File Type

Magic Number (hex)

Magic Number (ASCII)

File Offset [bytes]

File Name Extension

DOS executable

4D 5A

MZ

0

.exe

ELF

7F 45 4C 46

\x7fELF (␡ELF)

0

.elf

GIF

GIF87a

.gif

.

GIF89a

.gif

HDF

\211HDF\r\n\032\n

.hd5, .hdf5

Java Class

CA FE BA BE

Êþº¾

.class

JAR

50 4B 03 04

PK\x03\x04

.jar

JPEG

FF D8 FF DB

ÿØÿÛ

0

.jpg, .jpeg

Linux/Unix Script

23 21

#!

.sh

MIDI

4D 54 68 64

MThd (“MIDI Track Header”)

PDF

25 50 44 46

%PDF

.pdf

PNG

89 50 4E 47 0D 0A 1A 0A

\x89PNG\r\n\x1a\n (‰PNG␍␊␚␊)

.png

PS

25 21 (50 53)

%!(PS)

.ps

TIFF (Intel little end)

49 49 2A 00

II*

.tif, .tiff

TIFF (Motorola big end)

4D 4D 00 2A

MM*

XML

<?xml

.xml

Zip

50 4B 03 04

PK\x03\x04

0

.

50 4B 05 06

.

50 4B 07 08

pad=30
print(f"{'DOS'                    :<{pad}} {''.join([chr(c) for c in [0x4d, 0x5a]])}")
print(f"{'ELF'                    :<{pad}} {''.join([chr(c) for c in [0x7f, 0x45, 0x4c, 0x46]])}")
print(f"{'Java Archive'           :<{pad}} {''.join([chr(c) for c in [0x50, 0x4b, 0x03, 0x04]])}")
print(f"{'Java Class'             :<{pad}} {''.join([chr(c) for c in [0xca, 0xfe, 0xba, 0xbe]])}")
print(f"{'JPEG'                   :<{pad}} {''.join([chr(c) for c in [0xff, 0xd8, 0xff, 0xdb]])}")
print(f"{'Linux/Unix Script'      :<{pad}} {''.join([chr(c) for c in [0x23, 0x21]])}")
print(f"{'MIDI'                   :<{pad}} {''.join([chr(c) for c in [0x4D, 0x54, 0x68, 0x64]])}")
print(f"{'PDF'                    :<{pad}} {''.join([chr(c) for c in [0x25, 0x50, 0x44, 0x46]])}")
print(f"{'PNG'                    :<{pad}} {''.join([chr(c) for c in [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]])}")
print(f"{'PS'                     :<{pad}} {''.join([chr(c) for c in [0x25, 0x21, 0x50, 0x53]])}")
print(f"{'TIFF (Intel little end)':<{pad}} {''.join([chr(c) for c in [0x49, 0x49, 0x2A, 0x00]])}")
print(f"{'TIFF (Motorola big end)':<{pad}} {''.join([chr(c) for c in [0x4D, 0x4D, 0x00, 0x2A]])}")
print(f"{'Zip'                    :<{pad}} {''.join([chr(c) for c in [0x50, 0x4b, 0x03, 0x04]])}")
DOS                            MZ
ELF                            ELF
Java Archive                   PK
Java Class                     Êþº¾
JPEG                           ÿØÿÛ
Linux/Unix Script              #!
MIDI                           MThd
PDF                            %PDF
PNG                            ‰PNG


PS                             %!PS
TIFF (Intel little end)        II*TIFF (Motorola big end)        MM*
Zip                            PK

Figures#

  • [ h ][ y ][ w ] Crockford, Douglas

  • [ w ] Zibkowski, Mark (1956-)


Resources#

  • Named Character References HTML

  • [ h ][ w ] Bitmap (BMP) .bmp

  • [ h ][ w ] Comma-Separated Values (CSV) .csv

  • [ h ][ w ] DOS MZ “Mark Zibkowski” Executable .exe

  • [ h ][ w ] Extensible Markup Language (XML) .xml

  • [ h ][ w ] Graphics Interchange Format (GIF) .gif

  • [ h ][ w ] Hierarchical Data Foramt (HDF) .hd5, .hdf5

  • [ h ][ w ] Joint Photographic Experts Group (JPEG) .jpg, .jpeg

  • [ h ][ w ] Java Archive File (JAR) .jar

  • [ h ][ w ] JavaScript Object Notation (JSON) .json

  • [ h ][ w ] GeoJSON

  • [ h ][ w ] MP3

  • [ h ][ w ] Musical Instrument Digital Interface (MIDI)

  • [ h ][ w ] Parquet

  • [ h ][ w ] Portable Document Format (PDF) .pdf

  • [ h ][ w ] Portable Network Graphics (PNG) .png

  • [ h ][ w ] Scalable Vector Graphics (SVG) .svg

  • [ h ][ w ] Tab-Separated Values (TSV) .tsv

  • [ h ][ w ] Tag(ged) Image File Format (TIFF)

  • [ h ][ w ] Tom’s Obvious Minimal Language (TOML)

  • [ h ][ w ] Unicode

  • [ h ][ w ] Waveform Audio File Format (WAV) .wav, .wave

  • [ h ][ w ] Yet Another Markup Language (YAML) .yml, .yaml

  • [ h ][ w ] xz


Terms#

  • [ w ] .exe

  • [ w ] ANSI Escape Sequences

  • [ w ] Archive File

    • [ w ] list of archive formats

  • [ w ] American Standard Code for Information Interchange (ASCII)

  • [ w ] Audio File Format

  • [ w ] Big Endian

  • [ w ] Binary-Coded Decimal Interchange Code (BCDIC)

  • [ w ] Binary-to-Text Encoding

  • [ w ] Binary Code

  • [ w ] Binary File

  • [ w ] Bit Numbering

  • [ w ] Bit String

  • [ w ] Byte

  • [ w ] Byte Order Mark (BOM)

  • [ w ] C0 & C1 Control Codes

  • [ w ] Caps Lock

  • [ w ] Caret Notation

  • [ w ] Character

  • [ w ] Character Encoding

  • [ w ] Character Entities

  • [ w ] Code

  • [ w ] Code Page

  • [ w ] Code Point

  • [ w ] Codec

  • [ w ] Combining Character

  • [ w ] Complex Text Layout (CTL)

  • [ w ] Container Format

  • [ w ] Control Character

  • [ w ] Control Picture

  • [ w ] Ctrl-Alt-Del

  • [ w ] Ctrl-C

  • [ w ] Ctrl-D

  • [ w ] Ctrl-Z

  • [ w ] Data Compression

  • [ w ] Data Compression Ratio

  • [ w ] Data Conversion

  • [ w ] Data File

  • [ w ] Deflate

  • [ w ] Diacritic

  • [ w ] Diaeresis

  • [ w ] Dictionary Coder

  • [ w ] Disk Image

  • [ w ] Document Template

  • [ w ] Electronic Data Interchange (EDI)

  • [ w ] End of File (EOF)

  • [ w ] End of Line (EOL)

  • [ w ] Endianness

  • [ w ] Enriched Text

  • [ w ] Escape Character

  • [ w ] Escape Sequence

  • [ w ] Escape Sequence in C

  • [ w ] Executable File

    • [ w ] list of executable file formats

  • [ w ] Executable and Linkable Format (ELF)

  • [ w ] Executable Compression

  • [ w ] Extended Binary Coded Decimal Interchange Code (EBCDIC)

  • [ w ] Extended ASCII

  • [ w ] File Archiver

  • [ w ] File Archivers

  • [ w ] File Format

    • [ w ] list of file formats

  • [ w ] File Name Extension

    • [ w ] list of file name extensions

    • [ w ] Glyph

  • [ w ] Grapheme

  • [ w ] Grave Accent

  • [ w ] Graphics Interchange Format (GIF)

  • [ w ] Guillemet

  • [ w ] gzip

  • [ w ] Hexadecimal

  • [ w ] Huffman Coding

  • [ w ] Image File Format

    • [ w ] list of image file formats

  • [ w ] Interchange File Format (IFF)

  • [ w ] International Phonetic Alphabet (IPA)

  • [ w ] Internationalization and Localization

  • [ w ] Java Class File

  • [ w ] Language-Independent Specification (LIS)

  • [ w ] Lempel-Ziv-Oberhumer (LZO)

  • [ w ] Letter Case

  • [ w ] Ligature

  • [ w ] Line

  • [ w ] Linear Predictive Coding (LPC)

  • [ w ] Little Endian

  • [ w ] Lossless Compression

  • [ w ] Lossy Compression

  • [ w ] Lower Case (Miniscule)

  • [ w ] LZ77 LZ78

  • [ w ] Magic Number

  • [ w ] Manifest File

  • [ w ] Metacharacter

  • [ w ] Mojibake

  • [ w ] Named Character Reference

  • [ w ] Newline

  • [ w ] Nibble

  • [ w ] Null-Terminated String

  • [ w ] Number Sign

  • [ w ] Numeric Character Reference

  • [ w ] Object File

  • [ w ] Octet

  • [ w ] Open File Format

  • [ w ] OpenDocument

  • [ w ] OpenType

  • [ w ] Page Break

  • [ w ] pax

  • [ w ] Percent Encoding

  • [ w ] Plain Text

  • [ w ] Plane

  • [ w ] PostScript (PS)

  • [ w ] Pound Sign

  • [ w ] Precomposed Character

  • [ w ] Punctuation

  • [ w ] Raster Graphics

  • [ w ] Rich Text

  • [ w ] Ring

  • [ w ] Run-Length Encoding (RLE)

  • [ w ] Self-Synchronizing Code

  • [ w ] Serialization

  • [ w ] Shebang

  • [ w ] Simple Data Format (SDF)

  • [ w ] Software Flow Control

  • [ w ] Specials

  • [ w ] String

  • [ w ] String Literal

  • [ w ] Tab-Separated Values

  • [ w ] Tab Stop

  • [ w ] tar

  • [ w ] Text Normalization

  • [ w ] Touch Typing

  • [ w ] Typeface

  • [ w ] Unicode

  • [ w ] Unicode Block

  • [ w ] Unicode Character Property

  • [ w ] Unicode Collation Algorithm

  • [ w ] Unicode Consortium

  • [ w ] Unicode Equivalence

  • [ w ] Universal Character Set (UCS) characters

  • [ w ] Universal Coded Character Set (UCS)

  • [ w ] Upper Case (Majuscule)

  • [ w ] URL Encoding

  • [ w ] UTF-16

  • [ w ] UTF-8

  • [ w ] Variable-Width Encoding

  • [ w ] Video File Format

  • [ w ] Whitespace

  • [ w ] Word

  • [ w ] Writing System

  • [ w ] Zip

  • [ w ] Zlib

  • [ w ] Zstd