C#
Contents#
Binary Prefixes#
print(f'2^10 B = 1024^1 B = {2**10:>35,.0f} B = 1 KiB (about one thousand B)')
print(f'2^20 B = 1024^2 B = {2**20:>35,.0f} B = 1 MiB (about one million B)')
print(f'2^30 B = 1024^3 B = {2**30:>35,.0f} B = 1 GiB (about one billion B)')
print(f'2^40 B = 1024^4 B = {2**40:>35,.0f} B = 1 TiB (about one trillion B)')
print(f'2^50 B = 1024^5 B = {2**50:>35,.0f} B = 1 PiB (about one quadrillion B)')
print(f'2^60 B = 1024^6 B = {2**60:>35,.0f} B = 1 EiB (about one quintillion B)')
print(f'2^70 B = 1024^7 B = {2**70:>35,.0f} B = 1 ZiB (about one sextillion B)')
print(f'2^80 B = 1024^8 B = {2**80:>35,.0f} B = 1 YiB (about one septillion B)')
2^10 B = 1024^1 B = 1,024 B = 1 KiB (about one thousand B)
2^20 B = 1024^2 B = 1,048,576 B = 1 MiB (about one million B)
2^30 B = 1024^3 B = 1,073,741,824 B = 1 GiB (about one billion B)
2^40 B = 1024^4 B = 1,099,511,627,776 B = 1 TiB (about one trillion B)
2^50 B = 1024^5 B = 1,125,899,906,842,624 B = 1 PiB (about one quadrillion B)
2^60 B = 1024^6 B = 1,152,921,504,606,846,976 B = 1 EiB (about one quintillion B)
2^70 B = 1024^7 B = 1,180,591,620,717,411,303,424 B = 1 ZiB (about one sextillion B)
2^80 B = 1024^8 B = 1,208,925,819,614,629,174,706,176 B = 1 YiB (about one septillion B)
Types#
Type |
Bytes (32-bit) |
Bytes (64-bit) |
32-bit Range |
printf |
---|---|---|---|---|
char |
1 |
1 |
[0, 255] |
|
short int |
2 |
2 |
[-32,768, 32,767] |
|
short unsigned int |
2 |
2 |
[0, 65,535] |
|
int |
4 |
4 |
[-214,748,648, 214,748,647] |
|
unsigned int |
4 |
4 |
|
|
long int |
4 |
8 |
|
|
long long int |
8 |
8 |
|
|
long unsigned int |
|
|||
long long unsigned int |
|
|||
float |
4 |
4 |
|
|
double |
8 |
8 |
|
|
long double |
12 |
16 |
|
|
float scientific |
|
|||
pointer (memory address) |
4 |
8 |
|
|
string |
|
|||
octal |
|
|||
hexadecimal |
|
C99 extended integer types
int8_t exactly 8 bits signed
int16_t exactly 16 bits signed
int32_t exactly 32 bits signed
int64_t exactly 64 bits signed
uint8_t exactly 8 bits unsigned
int8_t
int16_t
int32_t
int64_t
int128_t
uint8_t
uint16_t
uint32_t
uint64_t
uint128_t
defined in stdint.h
8-bit unsigned int that stores a value in the range 0-255
useful for dealing with data that can be represented by 8 bits, such as data buffers
Determining their size#
UINT_MAX (%u) : 4294967295
UINT_MAX (%d) : -1
UINT_MAX (%x) : ffffffff
ULONG_MAX (%lu) : 18446744073709551615
ULONG_MAX (%ld) : -1
ULONG_MAX (%lx) : ffffffffffffffff
void print_type_sizes () {
printf(" char : %02zu bits\n", sizeof( char) * 8);
printf("unsigned char : %02zu bits\n", sizeof(unsigned char) * 8);
printf(" short : %02zu bits\n", sizeof( short) * 8);
printf("unsigned short : %02zu bits\n", sizeof(unsigned short) * 8);
printf(" int : %02zu bits\n", sizeof( int) * 8);
printf("unsigned int : %02zu bits\n", sizeof(unsigned int) * 8);
printf(" long : %02zu bits\n", sizeof( long) * 8);
printf("unsigned long : %02zu bits\n", sizeof(unsigned long) * 8);
printf(" long long : %02zu bits\n", sizeof( long long) * 8);
printf("unsigned long long : %02zu bits\n", sizeof(unsigned long long) * 8);
printf(" float : %02zu bits\n", sizeof( float) * 8);
printf(" double : %02zu bits\n", sizeof( double) * 8);
printf(" long double : %02zu bits\n", sizeof( long double) * 8);
printf(" void* : %02zu bits\n", sizeof( void*) * 8);
}
char : 08 bits
unsigned char : 08 bits
short : 16 bits
unsigned short : 16 bits
int : 32 bits
unsigned int : 32 bits
long : 64 bits
unsigned long : 64 bits
long long : 64 bits
unsigned long long : 64 bits
float : 32 bits
double : 64 bits
long double : 64 bits
void* : 64 bits
/* sizeof.c */
#include <limits.h> /* CHAR_BIT */
#include <stdio.h> /* printf */
#include <stdint.h> /* int8_t, uint8_t, */
int main (int argc, char** argv) {
printf("CHAR_BIT : %2zu b\n", (size_t)CHAR_BIT);
printf("\n");
printf(" char : %2zu B %25d (CHAR_MIN) - %25d (CHAR_MAX)\n", sizeof( char), CHAR_MIN, CHAR_MAX);
printf(" signed char : %2zu B %25d (SCHAR_MIN) - %25d (SCHAR_MAX)\n", sizeof( signed char), SCHAR_MIN, SCHAR_MAX);
printf("unsigned char : %2zu B %25u - %25u (UCHAR_MAX)\n", sizeof(unsigned char), 0, UCHAR_MAX);
printf(" short : %2zu B %25d (SHRT_MIN) - %25d (SHRT_MAX)\n", sizeof( short), SHRT_MIN, SHRT_MAX);
printf("unsigned short : %2zu B %25u - %25u (USHRT_MAX)\n", sizeof(unsigned short), 0, USHRT_MAX);
printf(" int : %2zu B %25d (INT_MIN) - %25u (INT_MAX)\n", sizeof( int), INT_MIN, INT_MAX);
printf("unsigned : %2zu B %25d - %25u (UINT_MAX)\n", sizeof(unsigned ), 0, UINT_MAX);
printf(" long : %2zu B %25ld (LONG_MIN) - %25ld (LONG_MAX)\n", sizeof( long), LONG_MIN, LONG_MAX);
printf("unsigned long : %2zu B %25u - %25lu (ULONG_MAX)\n", sizeof(unsigned long), 0, ULONG_MAX);
printf(" long long : %2zu B %25lld (LLONG_MIN) - %25lld (LLONG_MAX)\n", sizeof( long long), LLONG_MIN, LLONG_MAX);
printf("unsigned long long : %2zu B %25u - %25llu (ULLONG_MAX)\n", sizeof(unsigned long long), 0, ULLONG_MAX);
printf(" float : %2zu B\n", sizeof(float));
printf(" double : %2zu B\n", sizeof(double));
printf(" long double : %2zu B\n", sizeof(long double));
printf(" size_t : %2zd B\n", sizeof(size_t));
printf(" int8_t : %2zd B %25d (INT8_MIN) - %25d (INT8_MAX)\n", sizeof( int8_t), INT8_MIN, INT8_MAX);
printf(" uint8_t : %2zu B %25u - %25u (UINT8_MAX)\n", sizeof( uint8_t), 0, UINT8_MAX);
printf(" int16_t : %2zd B %25d (INT16_MIN) - %25d (INT16_MAX)\n", sizeof( int16_t), INT16_MIN, INT16_MAX);
printf(" uint16_t : %2zu B %25u - %25u (UINT16_MAX)\n", sizeof(uint16_t), 0, UINT16_MAX);
printf(" int32_t : %2zd B %25d (INT32_MIN) - %25d (INT32_MAX)\n", sizeof( int32_t), INT32_MIN, INT32_MAX);
printf(" uint32_t : %2zu B %25u - %25u (UINT32_MAX)\n", sizeof(uint32_t), 0, UINT32_MAX);
printf(" int64_t : %2zd B %25ld (INT64_MIN) - %25ld (INT64_MAX)\n", sizeof( int64_t), INT64_MIN, INT64_MAX);
printf(" uint64_t : %2zu B %25u - %25lu (UINT64_MAX)\n", sizeof(uint64_t), 0, UINT64_MAX);
return 0;
}
CHAR_BIT : 8 b
char : 1 B 0 (CHAR_MIN) - 255 (CHAR_MAX)
signed char : 1 B -128 (SCHAR_MIN) - 127 (SCHAR_MAX)
unsigned char : 1 B 0 - 255 (UCHAR_MAX)
short : 2 B -32768 (SHRT_MIN) - 32767 (SHRT_MAX)
unsigned short : 2 B 0 - 65535 (USHRT_MAX)
int : 4 B -2147483648 (INT_MIN) - 2147483647 (INT_MAX)
unsigned : 4 B 0 - 4294967295 (UINT_MAX)
long : 8 B -9223372036854775808 (LONG_MIN) - 9223372036854775807 (LONG_MAX)
unsigned long : 8 B 0 - 18446744073709551615 (ULONG_MAX)
long long : 8 B -9223372036854775808 (LLONG_MIN) - 9223372036854775807 (LLONG_MAX)
unsigned long long : 8 B 0 - 18446744073709551615 (ULLONG_MAX)
float : 4 B
double : 8 B
long double : 16 B
size_t : 8 B
int8_t : 1 B -128 (INT8_MIN) - 127 (INT8_MAX)
uint8_t : 1 B 0 - 255 (UINT8_MAX)
int16_t : 2 B -32768 (INT16_MIN) - 32767 (INT16_MAX)
uint16_t : 2 B 0 - 65535 (UINT16_MAX)
int32_t : 4 B -2147483648 (INT32_MIN) - 2147483647 (INT32_MAX)
uint32_t : 4 B 0 - 4294967295 (UINT32_MAX)
int64_t : 8 B -9223372036854775808 (INT64_MIN) - 9223372036854775807 (INT64_MAX)
uint64_t : 8 B 0 - 18446744073709551615 (UINT64_MAX)
Loss of precision#
#include <limits.h>
#include <stdio.h>
int main ()
{
unsigned large_int = UINT_MAX;
float float_int = (float)large_int;
double doubl_int = (double)large_int;
printf("unsigned int: %u\n", large_int);
printf("float : %f\n", float_int);
printf("double : %f\n", doubl_int);
return 0;
}
Data Type
an abstraction over memory ranges and the data they contain, that allows them to be classified into different types
and so a variable name is just an alias for a memory range
short int si = 9;
long int li = 1234567890L;
float f = 3.14;
double d = 1234567890.1234567;
char c = 'a';
char *ptr = &c;
short int *ptr = &si;
Type System
the compiler uses type information in order to determine how to apply logical operations defined by the code, and to determine ultimately which machine instructions are generated and executed
double one = 3.14, two = 4.5, res1;
int three = 3, four = 4059; res2;
res1 = one + two // double + double
res2 = three + four // int + int
Programming language typing
distinctions between strong typing and weak typing refer to the quality of the language in dealing with ambiguity in types and usage
C IS WEAKLY TYPED, which leads to greater flexibility at the cost of being more error-prone
Careful
expressions like
one + 1
passing parameters like
f(one)
comparisons like
one == 1
Type Casting (type) variable
double one = 3.24;
if (one / 3 == 1) printf("equal"); // 3.24 / 3.0 == 1.0
else printf("not equal")
double one = 3.24;
if ((int)one / 3 == 1) printf("equal"); // 3 / 3 == 1
else printf("not equal")
// typecast.c
#include <stdio.h>
int main (void) {
short int si = 9;
long int li = 1234567890L;
float f = 3.14;
double d = 1234567890.1234567;
char c = 'a';
char *ptr = &si;
printf("short int %d %f %p\n", (int)si, (float)si, (char *)si);
printf("long int %d %f %p\n", (int)li, (float)li, (char *)li);
printf("float %d %f (ERR)\n", (int)f, (float)f);
printf("double %d %f (ERR)\n", (int)d, (float)d);
printf("char %d %f %p\n", (int)c, (float)c, (char *)c);
printf("ptr %d (ERR) %p\n", (int)ptr, (char *)ptr);
return 0;
}
./typecast
short int 9 9.000000 0x9
long int 1234567890 1234567936.000000 0x499602d2
float 3 3.140000 (ERR)
double 1234567890 1234567936.000000 (ERR)
char 97 97.000000 0x61
ptr 1875863146 (ERR) 0x16fcf666a
typedef
#
typedef old_type new_type
The compiler treats the new type just like the old type: the new type is like an alias of the old type.
EXAMPLE
typedef unsigned char bitfield
bifield my_func (bitfield x, int y) {
float a;
bitfield z;
...
return (bitfield)1;
}
struct
s#
C has no objects. The closest thing is a struct, or set of fields.
struct Struct_Name {
list
of
variable
definitions
called
fields
} variable_name(s);
struct Struct_Name another_variable_name;
EXAMPLE
struct {
char name[128];
int mileage;
} gremlin, cayman, cessna180, montauk;
strcpy(cayman.name, "My Favorite Car");
cayman.mileage = 1240;
enum
s#
enum
values are always ints. They are optional and assigned by the compiler if not explicitly assigned.
enum [ Enum_Name ] {
id1 [ = val ],
id2 [ = val ],
...
} variable;
EXAMPLE
enum {
SUNDAY = 0,
MONDAY = 1,
TUESDAY = 2,
WEDNESDAY = 3,
THURSDAY = 4,
FRIDAY = 5,
SATURDAY = 6
} days_of_week;
enum {
SUNDAY,
MONDAY,
TUESDAY,
WEDNESDAY,
THURSDAY,
FRIDAY,
SATURDAY,
} days_of_week;
union
s#
struct {
enum {
AUTOMOTIVE = 0,
AERONAUTICAL = 1,
MARINE = 2
} type; // 12 B
char name[128]; // 128 B
int mileage; // 4 B
struct {
int cylinders;
int horsepower;
int hours_smoh;
} engine; // 12 B
union {
char vin[17];
char tail_number[8];
char hull_id[12];
} vehicle_id; // 17 B
} gremlin, cayman, cessna180, montauk;
cayman.type = AUTOMOTIVE;
if (cayman.type == AUTOMOTIVE) {
printf("this is a car");
}
cayman.engine.cylinders = 6;
cayman.mileage = 1240;
strcpy(vehicle_id.vin, "123456");
printf("%s\n", vehicle_id.vin); // 123456
vehicle_id.tail_number = "F-BOZQ";
printf("%s\n", vehicle_id.vin); // F-BOZQ
printf("Size: %lu\n", sizeof(vehicle_id)); // 17
typedef enum {
AUTOMOTIVE = 0;
AERONAUTICAL = 1;
MARINE = 2;
} VEHICLE_TYPE;
typedef struct {
int cylinders;
int horsepower;
int hours_smoh;
} ENGINE_INFO;
typedef union {
char vin[17];
char tail_number[8];
char hull_id[12];
} VEHICLE_IDENT;
typedef struct {
char name[128];
int mileage;
VEHICLE_TYPE type;
ENGINE_INFO engine;
VEHICLE_IDENT vehicle_id;
} VEHICLE;
VEHICLE gremlin, cayman, cessna180, montauk;
VEHICLE *vehicle = &cayman;
strcpy(vehicle->name, "2013 Porsche Cayman S");
vehicle->engine.cylinders = 6;
printf(
"*** Vehicle Information ***\n"
"Name : %s\n"
"Mileage : %u\n"
"Vehicle Type : %s\n"
"Cylinders : %u\n"
"Horsepower : %u hp\n"
"SMOH : %u hours\n"
"VIN : %s\n",
vehicle->name,
vehicle->mileage,
(vehicle->type == AUTOMOTIVE) ? "car" :
(vehicle->type == AERONAUTICAL) ? "airplane" : boat,
vehicle->engine.cylinders,
vehicle->engine.horsepower,
vehicle->engine.hours_smoh,
(vehicle->type == AUTOMOTIVE) ? vehicle->vehicle_id.vin :
(vehicle->type == AERONAUTICAL) ? vehicle->vehicle_id.tail_number : vehicle->vehicle_id.hull_id;
);
bit fields#
struct {
unsigned int width; // 4 B
unsigned int height; // 4 B
} status;
Arrays#
int arr[100]; // declaration - the compiler allocates 100 ints worth of storage in the stack initialized with garbage data
// an array does not know its own size, so the following is better
int SIZE = 100;
int arr[SIZE];
// arrays with fewer values than size are initialized with 0
int arr2[10] = { [0 ... 2] = 1 }; // 1 1 1 0 0 0 0 0 0 0
arr2[3] = 1; // 1 1 1 1 0 0 0 0 0 0
arr2[10] = 1; // overflow!
Array names can be used as pointers to the first element of the array.
/* array_and_pointer.c
*/
#include <stdio.h>
int main (void) {
const int SIZE = 6;
int a[SIZE] = { [0 ... 5] = 42 };
for (int i = 0; i < SIZE; i++) {
printf("a[%d] = %d, addr %p", i, *(a + i), a + i); printf("\n");
}
int *p = a;
for (int i = 0; i < SIZE; i++) {
printf("p[%d] = %d, addr %p", i, *(p + i), p + i); printf("\n");
}
p++;
for (int i = 0; i < SIZE; i++) { // overflow!
printf("p[%d] = %d, addr %p", i, *(p + i), p + i); printf("\n");
}
return 0;
}
An array is a contiguous block of memory.
Arrays are zero-indexed.
Arrays are declared with notation type name[size]
or declared + initialized with notation type name[size] = {value0, value1, ..., value(n - 1)}
. If fewer values are provided than what is indicated in the size of the array, the remaining values are initialized with the value 0
.
An array of six integers requires \(6 \times 4 = 24\) bytes of memory.
Arrays have no methods and do not know their own length (and there is no bounds checking).
sizeof(array)
is not reliable and only works in some situations
int scores[100];
// allocates 100 "ints worth" of memory where each array element contains garbage data initially
int n = 100;
int scores[n]; // C99 allows the array size to be an expression
int x[6] = {34, 11, -129, 49, 708, -11};
x[7] = 45; // legal, but will cause a memory fault
Arrays are passed by reference by default#
void min_max (int array[], int len, int *min, int *max) {
// find the index of the largest value and assign it to max_i
// find the index of the smallest value and assign it to min_i
*max = array[max_i];
*min = array[min_i];
}
int main (void) {
int x[100];
int largest, smallest;
// some code that populates the array x
min_max(x, 100, &smallest, &largest);
printf("smallest = %d, largest = %d\n", smallest, largest);
}
Pointers#
#include <stdio.h>
int main (void) {
int x = 42; // variable definition
int *p; // pointer declaration
int *q = 43; // pointer definition
int *r;
p = &x; // assign address of x to pointer variable p
printf(" x is %d\n", x); // 42
printf("&x is %p\n", &x); // address of x
printf(" p is %p\n", p); // address of x
*p = 43; // dereference: *p is just x
printf(" x is %d\n", x); // 43
r = p; // pointer assignment: r points to x just like p
printf(" r is %p\n", r); // address of x
return 0;
}
Pass by reference in C via pointers
void min_max (int arr[], int len, int *min, int *max) {
// ^^^^^^^^ ^^^^^^^^ pass by reference
// min_i = the index of the smallest value of arr
// max_i = the index of the largest value of arr
*min = arr[min_i]
*max = arr[max_i]
}
int main (void) {
int x[100];
int min, max;
// populate arr
min_max(x, 100, &min, &max);
// ^^^^ ^^^^ pass by reference
printf("min = %d, max = %d", min, max); printf("\n");
}
Pointers (and dynamically-allocated memory) can be returned from a function. Local addresses cannot be returned.
int *max (int *a, int *b) {
if (*a > *b) return a;
else return b;
// ^^ ^^ comparing values of variables pointed to by a and b
// ^^^^^^^^ returning the address of the variable that contains max value
}
int main (void) {
int x = 5;
int y = 6;
printf("max of %d and %d is %d", x, y, *max(&x, &y)); printf("\n");
}
int *foo (void) {
int x = 5;
return &x; // WRONG!!!
}
/* boxarrow.c */
#include <stdio.h> /* printf */
int main (int argc, char** argv) {
int x = 1;
int arr[3] = {2, 3, 4};
int* p = &arr[1];
printf("&x: %p; x: %d\n", &x, x);
printf("&arr[0]: %p; arr[0]: %d\n", &arr[0], arr[0]);
printf("&arr[2]: %p; arr[2]: %d\n", &arr[2], arr[2]);
printf("&p: %p; p: %p; *p: %d\n", &p, p, *p);
return 0;
}
the stack grows downward
address | name | value (this col represents main's stack frame)
--------------------|--------|------
&x (0xbfff2dc) | x | 1
&arr[2] (0xbfff2d8) | arr[2] | 4
&arr[1] (0xbfff2d4) | arr[1] | 3
&arr[0] (0xbfff2d0) | arr[0] | 2
&p (0xbfff2cc) | p | &arr[1] (0xbfff2d4)
$ gcc -g boxarrow.c -o boxarrow
$ ./boxarrow
&x: 0x7fce62361c; x: 1
&arr[0]: 0x7fce623610; arr[0]: 2
&arr[2]: 0x7fce623618; arr[2]: 4
&p: 0x7fce623608; p: 0x7fce623614; *p: 3
/* boxarrow2.c */
int main (int argc, char** argv) {
int x = 1;
int arr[3] = {2, 3, 4};
int* p = &arr[1];
int** dp = &p;
**dp += 1 /* arr[1] = 4 */
p += 1 /* p = &arr[2] */
**dp += 1 /* arr[2] = 5 */
return 0;
}
address | name | value (this col represents main's stack frame)
--------------------|--------|------
&x (0xbfff2dc) | x | 1
&arr[2] (0xbfff2d8) | arr[2] | 4 --[ **dp += 1 ]--> 5
&arr[1] (0xbfff2d4) | arr[1] | 3 --[ **dp += 1 ]--> 4
&arr[0] (0xbfff2d0) | arr[0] | 2
&p (0xbfff2cc) | p | &arr[1] (0xbfff2d4) --[ p += 1 ]--> &arr[2] (0xbfff2d8)
&dp (0xbfff2c8) | dp | &p (0xbfff2cc)
/* doublepointer.c */
#include <stdio.h> /* printf */
int main (int argc, char** argv) {
char hi[6] = {'h', 'e', 'l', 'l', 'o', '\0'};
char* p;
char** dp;
p = &hi[0];
dp = &p;
printf("%c %c\n", *p, **dp); /* 'h' */
printf("%p %p %p\n", p, *dp, hi); /* &hi[0] */
p += 1; /* p = &hi[1] */
printf("%c %c\n", *p, **dp); /* 'e' */
printf("%p %p %p\n", p, *dp, hi); /* &hi[1], &hi[1], &hi[0] */
*dp += 2; /* p = &hi[3] */
printf("%c %c\n", *p, **dp); /* 'l' */
printf("%p %p %p\n", p, *dp, hi); /* &hi[3], &hi[3], &hi[0] */
return 0;
}
Why is the stack not growing down on the Raspberry Pi?
address | name | value (this col represents main's stack frame)
--------------------------------|-------|------
&dp (0x7ffffff208) | dp | &p
&hi[5] (0x7ffffff205) | hi[5] | '\0'
&hi[4] (0x7ffffff204) | hi[4] | 'o'
&hi[3] (0x7ffffff203) | hi[3] | 'l' <-- *(p + 3)
&hi[2] (0x7ffffff202) | hi[2] | 'l'
&hi[1] (0x7ffffff201) <-- p + 1 | hi[1] | 'e' <-- *(p + 1)
&hi[0] (0x7ffffff200) | hi[0] | 'h' <-- *p, **dp
&p (0x7ffffff1f8) | p | &hi[0] <-- *dp
$ gcc -g doublepointer.c -o doublepointer
$ ./doublepointer
h h
0x7fd2eab8e0 0x7fd2eab8e0 0x7fd2eab8e0
e e
0x7fd2eab8e1 0x7fd2eab8e1 0x7fd2eab8e0
l l
0x7fd2eab8e3 0x7fd2eab8e3 0x7fd2eab8e0
/* pointer_arithmetic.c */
#include <stdio.h> /* printf */
int main (int argc, char** argv) {
int arr[3] = {1, 2, 3};
int* int_ptr = &arr[0];
char* char_ptr = (char *)int_ptr; /* &arr[0] */
printf(" int_ptr: %p; *int_ptr: %d\n", int_ptr, *int_ptr); /* &arr[0], 1 */
int_ptr += 1; /* int_ptr = &arr[1] */
printf(" int_ptr: %p; *int_ptr: %d\n", int_ptr, *int_ptr); /* &arr[1], 2 */
int_ptr += 2; /* int_ptr = &arr[3] does not exist */
printf(" int_ptr: %p; *int_ptr: %d\n", int_ptr, *int_ptr); /* ??? */
printf("char_ptr: %p; *char_ptr: %d\n", char_ptr, *char_ptr); /* &arr[0], 1 */
char_ptr += 1;
printf("char_ptr: %p; *char_ptr: %d\n", char_ptr, *char_ptr); /* &arr[0], 0 */
char_ptr += 2;
printf("char_ptr: %p; *char_ptr: %d\n", char_ptr, *char_ptr); /* &arr[0], 0 */
return 0;
}
int_ptr: 0x7ff0810150; *int_ptr: 1
int_ptr: 0x7ff0810154; *int_ptr: 2
int_ptr: 0x7ff081015c; *int_ptr: 127
char_ptr: 0x7ff0810150; *char_ptr: 1
char_ptr: 0x7ff0810151; *char_ptr: 0
char_ptr: 0x7ff0810153; *char_ptr: 0
Multidimensional arrays#
C arrays are stored in row-major order.
int x[][4] =
int x[2][4] = {{1, 2, 3, 4},
{5, 6, 7, 8}}; // row-major order -> 1 2 3 4 5 6 7 8
// ^ 2 row (width)
// ^ 4 col (height)
int x[][2][4] =
int x[3][2][4] = {{{ 1, 2, 3, 4}, { 5, 6, 7, 8}},
{{ 9, 10, 11, 12}, {13, 14, 15, 16}},
{{17, 18, 19, 20}, {21, 22, 23, 24}}};
// ^ 3 row (width)
// ^ 2 col (height)
// ^ 4 (depth, first elem in front as col rep)
//
// row-major order -> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
//
// if you were just looking at the front of this 3d array, it would look like this
// x[3][2] = {{{ 1}, { 5}},
// {{ 9}, {13}},
// {{17}, {21}}}
//
// there are 3 "2d" arrays stacked on top of one another, each of which has 2 row and 4 col
Arrays of strings#
char planets[][8] = {
"Mercury", // M e r c u r y \0
"Venus", // V e n u s \0 \0 \0
"Earth", // E a r t h \0 \0 \0
"Mars", // M a r s \0 \0 \0 \0
"Jupiter", // J u p i t e r \0
"Saturn", // S a t u r n \0 \0
"Uranus", // U r a n u s \0 \0
"Neptune", // N e p t u n e \0
};
// array of pointers where each pointer points to a string
char *planets[] = {
"Mercury", // -> M e r c u r y \0
"Venus", // -> V e n u s \0
"Earth", // -> E a r t h \0
"Mars", // -> M a r s \0
"Jupiter", // -> J u p i t e r \0
"Saturn", // -> S a t u r n \0
"Uranus", // -> U r a n u s \0
"Neptune", // -> N e p t u n e \0
};
for (int i = 0; i < 8; ++i) {
if (planets[i][0] == 'M')
printf("%s\n", planets[i]);
}
for (char **p = planets; p < planets + 8; ++p) {
if (**p == 'M')
printf("%s\n", *p)
}
#include <stdio.h>
int main (void) {
int i = 5;
int *ip = &i;
printf("%d\n", i); // 5
printf("%p\n", ip); // memory address
*ip = 42; // re-assign the value of variable i via dereference
printf("%d\n", i); // 42
printf("%d\n", *ip); // 42
}
gcc -g -Wall pointer.c -o myprog
Pointers and pass by value and pass by reference#
Example#
C passes by value: values passed into a function are copied and local modifications to those copies are not reflected in the original values.
However, pointers let you pass by reference.
void add_pbv (int c) {
c += 10;
printf("pbv c: %d\n", c); // 11
}
void add_pbr (int *c) { // this parameter is a pointer
*c += 10; // which is dereferenced
printf("pbr *c: %d\n", *c); // 11
}
int main (void) {
int x = 1;
printf("x: %d\n", x); // 1
add_pbv(x); // pass a copy of the value of the variable, the value of the variable remains unchanged
printf("x: %d\n", x); // 1, not 11
add_pbr(&x); // pass the memory location of the variable
printf("x: %d\n", x); // 11
return 0;
}
Example#
PASS BY VALUE
#include <stdio.h>
void swap (int a, int b) {
int tmp = a;
a = b;
b = tmp;
}
int main (void) {
int a = 42, b = -7;
swap(a, b);
printf("a: %d, b: %d\n", a, b);
return 0;
}
./pointer_pass_by_value
a: 42, b: -7
PASS BY REFERENCE VIA POINTERS
#include <stdio.h>
void swap (int *a, int *b) {
int tmp = *a;
*a = *b;
*b = tmp;
}
int main (void) {
int a = 42, b = -7;
swap(&a, &b);
printf("a: %d, b: %d\n", a, b);
return 0;
}
./pointer_pass_by_reference
a: -7, b: 42
Back to pointers#
We use notation type *name;
to declare (or type *name = &address;
to declare and initialize) a pointer variable.
We subsequently use dereference notation *name
to refer to the value of the variable pointed to by the pointer variable.
We can use notation name
to refer to the memory address stored in the pointer variable.
EXAMPLE
int x = 42; // regular variable declaration + initialization memory address 0x1
int *p; // pointer variable declaration memory address 0x2
p = &x; // pointer variable initialization
printf("%d", x); // 42
printf("%p", &x); // 0x1
printf("%d", *p); // 42
printf("%p", p); // 0x1
Pointers as return values#
We can return a pointer that was passed (or dynamically allocated memory???).
int *max (int *a, int *b) {
if (*a > *b) return a;
return b;
}
int main (void) {
int x = 5, y = 6;
printf("max of %d and %d is %d\n", x, y, *max(&x, &y));
}
We cannot return the address of a local variable.
int *foo (void) {
int x = 5;
return &x;
}
int main (void) {
printf("%d\n", *foo());
}
Adding integer i
to pointer p
yields a pointer to the element i
places after the element that p
points to.
int a[6] = { [0 ... 5] = 42 }; // `42 42 42 42 42 42`
int *p = &a[2]; // |
int *q = p + 3; // |
++p; // |
The name of an array can be used as a pointer.
a
is the same as &a[0]
a[0]
is the same as *(&a[0] + 0)
or *(a + 0)
or *a
a[1]
is the same as *(&a[0] + 1)
or *(a + 1)
a[2] == 2[a]
a[2]
is the same as&a[0] + 2
2[a]
is the same as2 + &a[0]
int a[5] = {0, 1, 2, 3, 4};
printf("%d %d\n", a[2], 2[a]);
Pointer Arithmetic#
The pointer is incremented based on the size of a single array element.
Pointer arithmetic is only applicable to pointers that point to arrays. Otherwise, it is undefined.
Pointers can only be subtracted if they point to the same array. Otherwise, it is undefined.
Example#
# include <stdio.h>
int main (void) {
int foo[10];
int *p = foo; // point to the 0-th element
printf("%p\n", p);
++p; // point to the next element
printf("%p\n", p);
char bar[10];
char *q = bar; // point to the 0-th element
printf("%p\n", q);
++q; // point to the next element
printf("%p\n", q);
}
gcc -Wall foo.c -o foo && ./foo
0x16fdc2620
0x16fdc2624
0x16fdc2616
0x16fdc2617
Example#
#include <stdio.h>
int main (void) {
int foo[10] = { [0 ... 9] = 42 };
int sum1 = 0, sum2 = 0, sum3 = 0, sum4 = 0;
for (int i = 0; i < 10; ++i)
sum1 += foo[i];
for (int *p = &foo[0]; p < &foo[10]; ++p)
sum2 += *p;
for (int *p = foo; p < foo + 10; ++p)
sum3 += *p;
int *p = foo;
for (int i = 0; i < 10; ++i)
sum4 += p[i] // &foo[0] + i
printf("%d %d %d %d\n", sum1, sum2, sum3, sum4);
}
gcc -Wall sum.c -o sum && ./sum
420 420 420 420
Strings#
There are no strings proper, only character arrays which are terminated by the null character \0
.
C “strings” are not objects and have no methods, but string.h
has helpful utilities.
char *x = "hello\n"; // x --> h e l l o \n \0
#include <string.h>
The C string API is error-prone so study secure C coding guidelines.
https://wiki.sei.cmu.edu/confluence/display/c/SEI+CERT+C+Coding+Standard
ASCII
int a = 65;
printf("%d is ASCII \'%c\'\n", a, (char)a);
String literals are stored in read-only memory. A string literal-initialized array’s contents are copied from the read-only memory and so it writable. A string-literal initialized pointer points directly to the read-only memory and so is read-only.
// define a string h e l l o \n \0
char *str1 = "hello\n"; printf("%s", str1); // read-only
char str2[] = "hello\n"; printf("%s", str2); // initialized by the compiler
char str[7] = "hello\n"; printf("%s", str); // initialized by the compiler?
str1[4] = 'j' // segmentation fault!
str2[4] = 'j' // this is okay
Determining the size of a string
sizeof
returns the size of the declaration
strlen
returns the number of characters less the null terminator
char *str1 = "text for example";
char str2[17] = "text for example";
printf("str1 has size %lu\n", sizoef(str1)); // 8, the size in bytes of a pointer on a 64-bit machine
printf("str2 has size %lu\n", sizoef(str2)); // 17
printf("str1 has leng %lu\n", strlen(str1)); // 16
printf("str2 has leng %lu\n", strlen(str2)); // 16
char *str1 = "abc"; // read-only
char str2[] = "abc"; // compiler determines size 4
char str3[4] = "abc";
char str4[4] = "abcd"; // incorrect! should be size 5, including the null terminator
char str5[] = {'a', 'b', 'c', '\0'}; // compiler determines size 4
char str6[3] = {'a', 'b', 'c'}; // incorrect! should be size 4, including the null terminator
char str7[9] = {'a', 'b', 'c'}; // a b c \0 \0 \0 \0 \0 \0 \0
printf("str1 = %s\n", str1);
printf("str2 = %s\n", str2);
printf("str3 = %s\n", str3);
printf("str4 = %s\n", str4); // continues to read memory past 3 characters until a null terminator is encountered
printf("str5 = %s\n", str5);
printf("str6 = %s\n", str6); // continues to read memory past 3 characters until a null terminator is encountered
printf("str7 = %s\n", str7);
String copy#
strcpy(dst, src);
copies src to dst up to and including the null terminator
char *str1 = "abcde"; // 6 characters, including the null terminator
char str2[6];
char str3[3];
int i = 0xff; // 0000 0000 0000 0000 0000 0000 1111 1111
// d -> 68 -> 0100 0100
// e -> 69 -> 0100 0101
// 0000 0000
// 101 0110 0101 ???
printf("str1 = %s\n", str1); // abcde
strcpy(str2, str1);
printf("str2 = %s\n", str2); // abcde
printf( "i = %d\n", i); // 255
strcpy(str3, str1); // overflow! d e \0 overwrote other data
printf("str3 = %s\n", str3); // abcde
printf( "i = %d\n", i); // 101
strncpy(dst, src, n)
char *str1 = "abcde";
char str2[6];
char str3[3];
int i = 0xff;
printf("str1 = %s\n", str1); // abcde
strcpy(str2, str1);
printf("str2 = %s\n", str2); // abcde
printf( "i = %d\n", i); // 255
strncpy(str3, str1, 2);
str3[2] = 0x0;
printf("str3 = %s\n", str3); // ab
printf( "i = %d\n", i); // 255
String concatenation#
strcat(dst, src)
appends src to dst, strncat(dst, src, n)
appends at most n bytes of src to dst
char str1[20] = "abcde";
char *str2 = "efghi";
char str3[20] = "abcde";
strcat(str1, str2);
printf("str1 is [%s]\n", str1); // abcdeefghi
strncat(str3, str2, 20);
printf("str3 is [%s]\n", str3); // abcdeefghi
char* strncat (char* dst, const char* src, size_t n)
/* ^^^^^^^^^ a pointer to the destination buffer, which should be large enough to contain
* len(dst_string) + len(src_string) + '\0'
* ^^^^^^^^^^^^^^^ a pointer to the string to concatenate
* ^^^^^^^^ the maximum number of characters to concatenate
*
* strncat
* -------
* appends up to `n` characters of the string pointed to by `src` to the string pointed to by `dst`
*
* returns
* -------
* a pointer to the destination buffer
*/
String comparison#
Strings are compared to see whether they match, or whether one is lexicographically larger or smaller than the other
strcmp(s1, s2)
compares the strings, strncmp(s1, s2, n)
compares the first n bytes of the strings
return neg_int if s1 < s2
return 0 if s1 = s2
return pos_int if s1 > s2
String search#
strchr(str, char_to_find)
scans the string from the front to the back for a character
strrchr(str, char_to_find)
scans the string from the back to the front for a character
strstr(str, str_to_find)
scans the string from the fromnt to the back for a string (case sensitive)
strcasestr(str, str_to_find)
scans the string from the fromnt to the back for a string (case insensitive)
return a pointer to the address of the found item, or the null pointer
char *str = "xxxx0xxxFindmexxxx0xxxxFindme2xxxxx"; // xxxx0xxxFindmexxxx0xxxxFindme2xxxxx
printf("Looking for character %s, strchr : %s\n", '0', strchr(str, '0')); // 0xxxFindmexxxx0xxxxFindme2xxxxx
printf("Looking for character %s, strrchr : %s\n", '0', strrchr(str, '0')); // 0xxxxFindme2xxxxx
printf("Looking for character %5s, strstr : %s\n", "Findme", strstr(str, "Findme")); // Findmexxxx0xxxxFindme2xxxxx
printf("Looking for character %5s, strstr : %s\n", "FINDME", strstr(str, "FINDME")); // null
printf("Looking for character %5s, strcasestr : %s\n", "FINDME", strcasestr(str, "FINDME")); // Findmexxxx0xxxxFindme2xxxxx
String parse#
sscanf(str, "format", &args)
(arguments must be passed by reference using &
)
char *str = "1 3.14 a bob";
char c;
char s[20];
float f;
int ret, i;
ret = sscanf(str, "", &i, &f, &c, s);
printf("Scanned %d fields int [%d], float [%f], char [%c], string [%s]\n", ret, i, f, c, s);
#include <stdio.h>
#include <string.h>
#include <stdint.h>
// print an array of chars
void print_char_array (const char *arr, size_t size) {
// ^^^^^^^^^^^^^^^ pointer to an array of constant chars
for (int i = 0; i < size; ++i)
printf("%c ", arr[i]);
printf("\n");
}
// copy elems from array `src` to array `dst` via function `memcpy`
void copy_whole_array (char *src, char *dst, int copy_length) {
memcpy(dst, src, copy_length);
}
// copy the string from string `src_str` to string `dst_str` via function `memcpy` and include the null terminator
void copy_whole_string (char *src_str, char *dst_str, int copy_length) {
memcpy(dst_str, src_str, copy_length);
}
// copy length elems from idx 0 in array `src_partial` to array `dst_partial` via function `memcpy`
void copy_partial_array (char *src_partial, char *dst_partial, int copy_length) {
memcpy(dst_partial, src_partial, copy_length);
}
// copy length elems from idx 3 in array `src_partial` to array `dst_partial` via function `memcpy`
void copy_partial_array_1 (char *src_partial, char *dst_partial, int copy_length) {
memcpy(dst_partial, src_partial + 3, copy_length);
}
// copy elems from array `src1` to array `dst` following the pattern of copying 2 elems and skipping 1 elem and repeating until the required size is achieved via function `memcpy`
void copy_multiple_3_indices (char *src1, char *dst, int src_length, int required_size) {
int copied_elements = 0;
int start_idx = 0;
int dst_idx = 0;
while (copied_elements < required_size) {
for (int i = 0; i < src_length; ) {
int elements_to_copy = 2;
memcpy(dst + dst_idx, src1 + start_idx, 2);
dst_idx += elements_to_copy;
start_idx += 3;
copied_elements += elements_to_copy;
i += 3;
}
}
}
int main () {
// 1 - array of chars
char src[] = {'1', '2', '3', '4', '5'};
char dst[5];
copy_whole_array(src, dst, 5);
printf("EXERCISE 1 - array of chars\n");
printf("src: ");
print_char_array(src, 5);
printf("dst: ");
print_char_array(dst, 5);
printf("\n");
// 2 - string
char src_str[] = "Hello, world!";
char dst_str[20];
copy_whole_string(src_str, dst_str, 14);
printf("EXERCISE 2 - string\n");
printf("src: %s\n", src_str);
printf("dst: %s\n", dst_str);
printf("\n");
// 3 - portion of an array
char src_partial[] = {'a', 'b', 'c', 'd', 'e', 'i', 'o', 'u'};
char dst_partial[4];
copy_partial_array(src_partial, dst_partial, 3);
printf("EXERCISE 3 - portion of an array\n");
printf("src: ");
print_char_array(src_partial, 8);
printf("dst: ");
print_char_array(dst_partial, 3);
printf("\n");
// 4 - portion of an array
char dst_partial2[4];
copy_partial_array_1(src_partial, dst_partial2, 4);
printf("EXERCISE 4 - portion of an array\n");
printf("src: ");
print_char_array(src_partial, 8);
printf("dst: ");
print_char_array(dst_partial2, 4);
printf("\n");
// 5 - portion of an array
char src1[15] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o'};
char dst[6];
int required_size = 6;
copy_multiple_3_indices(src1, dst, 15, required_size);
printf("EXERCISE 5 - portion of an array\n");
printf("src: ");
print_char_array(src1, 15);
printf("dst: ");
print_char_array(dst, 6);
printf("\n");
return 0;
}
Bit Operations#
A logic left shift or logic right shift shifts bits and fills vacant positions with zeros.
0xFFFF1234
0xFFF12340 4-bit logic left shift
0x0FFFF123 4-bit logic right shift
An artihmetic shift preserves the sign bit. C99 requires that a signed integer be represented in two’s complement or one’s complement and so the MSB (most significant bit) of the integer is the sign bit.
0xFFFF1234
0xFFF12340 4-bit arithmetic left shift (fill in with 0s)
0xFFFFF123 4-bit arithmetic right shift (fill in with 1s to preserve the sign bit)
0x12345678
0x23456780 4-bit arithmetic left shift (fill in with 0s)
0x01234567 4-bit arithmetic right shift (fill in with 0s to preserve the sign bit)
Bitwise left shifts are zero-padded whether they are arithmetic or logical, signed or unsigned.
/* convert a byte to a binary string
*
* for each bit, if the bit is set then assign '1' to the appropriate position in the string; otherwise, '0'
*/
void byte_to_binary (uint8_t byte, char *buffer) {
for (int i = 7; i >= 0; i--)
buffer[7 - i] = (byte & (1 << i)) ? '1' : '0';
buffer[8] = '\0';
}
/* extract n adjacent bits
*/
uint32_t mask (uint32_t b, uint32_t n) {
/* ^^^^^^^^^^ start bit
* ^^^^^^^^^^ number of bits
*
* construct a bit mask with number of bits equal to the return type
* mask(4, 4) creates the following 32-bit bit mask: 0000 0000 0000 0000 0000 0000 1111 0000
*/
return ((1 << n) - 1) << b;
}
print(int('11111111111111111', 2))
print(int('01111100000000000', 2))
131071
63488
Bit \(\in \{ 0, 1\}\)
Nibble = 4 bits
Byte = 8 bits
Word = 4 bytes = 32 bits
Number Systems#
Base b (base b to decimal)
\(\texttt{decimal\_value} = \sum_{i = 0}^{k} b^i \times p_i \quad \text{st} \quad p_i \in \{ 0, \dotsc, b - 1 \}\)
\( \underset{p_2}{}\underset{p_1}{}\underset{p_0}{} = \underbrace{b^2 \times p_2}_{} + \underbrace{b^1 \times p_1}_{} + \underbrace{b^0 \times p_0}_{} = \)
Base 10
\(\texttt{decimal\_value} = \sum_{i = 0}^{k} 10^i \times p_i \quad \text{st} \quad p_i \in \{ 0, \dotsc, 9 \}\)
\( \underset{p_2}{1}\underset{p_1}{2}\underset{p_0}{3} = \underbrace{10^2 \times 1}_{100} + \underbrace{10^1 \times 2}_{20} + \underbrace{10^0 \times 3}_{3} = 123 \)
Base 2 (binary to decimal)
\(\texttt{decimal\_value} = \sum_{i = 0}^{k} 2^i \times p_i \quad \text{st} \quad p_i \in \{ 0, 1 \}\)
\( \underset{p_3}{1}\underset{p_2}{0}\underset{p_1}{1}\underset{p_0}{1} = \underbrace{2^3 \times 1}_{8} + \underbrace{2^2 \times 0}_{0} + \underbrace{2^1 \times 1}_{2} + \underbrace{2^0 \times 1}_{1} = 11 \)
Base 16 (hexadecimal to decimal)
\(\texttt{decimal\_value} = \sum_{i = 0}^{k} 16^i \times p_i \quad \text{st} \quad p_i \in \{ 0, \dotsc, 9, \underset{10}{\texttt{a}}, \underset{11}{\texttt{b}}, \underset{12}{\texttt{c}}, \underset{13}{\texttt{d}}, \underset{14}{\texttt{e}}, \underset{15}{\texttt{f}} \}\)
\( \underset{p_2}{\texttt{a}}\underset{p_1}{\texttt{f}}\underset{p_0}{\texttt{c}} = \underbrace{16^2 \times \texttt{a}}_{2560} + \underbrace{16^1 \times \texttt{f}}_{240} + \underbrace{16^0 \times \texttt{c}}_{12} = 2812 \)
Decimal to base b
p = [] // contains hexadecimal digit values from least-sig to most-sig
i = 0
while dec > 0
p[i] = dec % b // get the next hexadecimal digit value, from least-sig to most-sig
dec /= b
i++
// print the number by iterating through the digit array backwards since most-sig digits come first
Decimal to binary
p = [] // contains hexadecimal digit values from least-sig to most-sig
i = 0
while dec > 0
p[i] = dec % 2 // get the next hexadecimal digit value, from least-sig to most-sig
dec /= 2
i++
// print the number by iterating through the digit array backwards since most-sig digits come first
Decimal to hexadecimal
p = [] // contains hexadecimal digit values from least-sig to most-sig
i = 0
while dec > 0
p[i] = dec % 16 // get the next hexadecimal digit value, from least-sig to most-sig
dec /= 16
i++
// print the number by iterating through the digit array backwards since most-sig digits come first
Binary and hexadecimal
\( \texttt{deadbeef} = \underbrace{1101}_{\texttt{d}} \, \underbrace{1110}_{\texttt{e}} \, \underbrace{1010}_{\texttt{a}} \, \underbrace{1101}_{\texttt{d}} \, \underbrace{1011}_{\texttt{b}} \, \underbrace{1110}_{\texttt{e}} \, \underbrace{1110}_{\texttt{e}} \, \underbrace{1111}_{\texttt{f}} \)
// dec_to_bin.c
#include <stdio.h>
void dec_to_bin (int dec) {
int bin[32];
int idx = 0;
while (dec != 0) { // `dec > 0` could also work here
bin[idx++] = dec % 2; // from rightmost (least-sig) binary digit to leftmost (most-sig) binary digit
dec /= 2; // when `dec` equals 1, 1/2 evaluates to 0 and the loop exits
}
if (idx == 0) printf("0"); // when the while loop never iterated because the condition was false on its first pass (meaning `dec == 0`)
for (int i = idx - 1; i >= 0; i--) { // most-sig binary digits were computed last, and are printed first here
printf("%d", bin[i]);
}
printf("\n");
}
int main () {
int dec;
printf("Enter a decimal integer: ");
scanf("%d", &dec);
dec_to_bin(dec);
return 0;
}
Endianness#
How are bytes within a multi-byte word ordered in memory?
Big endianness means the least-significant byte has the highest memory address.
Little endianness means that the least significant byte has the lowest memory address.
EXAMPLE
int x = 305419896;
printf("%p\n", &x); // 0x0
\( 305,419,896 = 12345678_{16}\)
Big endian representation in memory: memory addresses are read from the lower address \(\texttt{0x0}\) to higher ones
\( \overset{\texttt{0x0}}{12} \quad \overset{\texttt{0x1}}{34} \quad \overset{\texttt{0x2}}{56} \quad \overset{\texttt{0x3}}{78} \)
Little endian representation in memory: memory addresses are read from the lower address \(\texttt{0x0}\) to higher ones
\( \overset{\texttt{0x0}}{78} \quad \overset{\texttt{0x1}}{56} \quad \overset{\texttt{0x2}}{34} \quad \overset{\texttt{0x3}}{12} \)
#include <stdio.h>
#include <stdint.h> // for type `uint8_t`
void show_bytes (uint8_t *start, int len) {
for (int i = 0; i < len; ++i)
printf("%p\t0x%.2x\n", start + i, start[i]);
printf("\n");
}
int main (void) {
int a = 305419896;
printf("a lives at address %p\n\n", &a);
show_bytes((uint8_t *)&a, sizeof(int));
}
Bitwise Operations#
& // and
| // or
~ // not
^ // exclusive or
~0x00
\( \begin{aligned} 00_{16} &= 0000 \quad 0000 \\ \sim 00_{16} &= 1111 \quad 1111 = \text{FF} \\ \end {aligned} \)
~0x41
\( \begin{aligned} 41_{16} &= 0100 \quad 0001 \\ \sim 41_{16} &= 1011 \quad 1110 = \text{BE} \\ \end {aligned} \)
0x69 & 0x55
\( \begin{aligned} 69_{16} &= 0110 \quad 1001 \\ 55_{16} &= 0101 \quad 0101 \\ 69_{16} \,\&\, 55_{16} &= 0100 \quad 0001 = 41_{16} \\ \end {aligned} \)
0x69 | 0x55
\( \begin{aligned} 69_{16} &= 0110 \quad 1001 \\ 55_{16} &= 0101 \quad 0101 \\ 69_{16} \,|\, 55_{16} &= 0111 \quad 1101 = \text{7D} \\ \end {aligned} \)
Bit shift#
uint8_t x = 172 // 10101100
x = x << 3 // 01100 000
printf("%d\n", x); // 96
Representing mathematical sets#
A width-\(w\) bitstring represents subsets of the set \(A = \{ 0, \dotsc, w - 1 \}\); namely, \(a_j = 1 \implies j \in A\).
For example, the bitstring
\( \underset{7}{0} \underset{6}{1} \underset{5}{0} \underset{4}{1} \underset{3}{0} \underset{2}{1} \underset{1}{0} \underset{0}{1} \)
represents the set
\(\{ 6, 4, 2, 0 \}\)
and the bitstring
\( \underset{7}{0} \underset{6}{1} \underset{5}{1} \underset{4}{0} \underset{3}{1} \underset{2}{0} \underset{1}{0} \underset{0}{1} \)
represents the set
\(\{ 6, 5, 3, 0 \}\)
Then the bitwise operators can be interpreted as the set operations.
& // intersection
| // union
~ // complement
^ // symmetric difference
\( \begin{aligned} \underset{7}{0} \underset{6}{1} \underset{5}{0} \underset{4}{1} \underset{3}{0} \underset{2}{1} \underset{1}{0} \underset{0}{1} \,\&\, \underset{7}{0} \underset{6}{1} \underset{5}{1} \underset{4}{0} \underset{3}{1} \underset{2}{0} \underset{1}{0} \underset{0}{1} &= \underset{7}{0} \underset{6}{1} \underset{5}{0} \underset{4}{0} \underset{3}{0} \underset{2}{0} \underset{1}{0} \underset{0}{1} \end {aligned} \)
\(\{ 6, 4, 2, 0 \} \cap \{ 6, 5, 3, 0 \} = \{ 6, 0 \}\)
\( \begin{aligned} \underset{7}{0} \underset{6}{1} \underset{5}{0} \underset{4}{1} \underset{3}{0} \underset{2}{1} \underset{1}{0} \underset{0}{1} \,|\, \underset{7}{0} \underset{6}{1} \underset{5}{1} \underset{4}{0} \underset{3}{1} \underset{2}{0} \underset{1}{0} \underset{0}{1} &= \underset{7}{0} \underset{6}{1} \underset{5}{1} \underset{4}{1} \underset{3}{1} \underset{2}{1} \underset{1}{0} \underset{0}{1} \end {aligned} \)
\(\{ 6, 4, 2, 0 \} \cup \{ 6, 5, 3, 0 \} = \{ 6, 5, 4, 3, 2, 0 \}\)
\( \begin{aligned} \sim \underset{7}{0} \underset{6}{1} \underset{5}{0} \underset{4}{1} \underset{3}{0} \underset{2}{1} \underset{1}{0} \underset{0}{1} &= \underset{7}{1} \underset{6}{0} \underset{5}{1} \underset{4}{0} \underset{3}{1} \underset{2}{0} \underset{1}{1} \underset{0}{0} \end {aligned} \)
\(\{ 6, 4, 2, 0 \}^\text{C} = \{ 7, 5, 3, 1 \}\)
\( \begin{aligned} \underset{7}{0} \underset{6}{1} \underset{5}{0} \underset{4}{1} \underset{3}{0} \underset{2}{1} \underset{1}{0} \underset{0}{1} \,\wedge\, \underset{7}{0} \underset{6}{1} \underset{5}{1} \underset{4}{0} \underset{3}{1} \underset{2}{0} \underset{1}{0} \underset{0}{1} &= \underset{7}{0} \underset{6}{0} \underset{5}{1} \underset{4}{1} \underset{3}{1} \underset{2}{1} \underset{1}{0} \underset{0}{0} \end {aligned} \)
\(\{ 6, 4, 2, 0 \} \Delta \{ 6, 5, 3, 0 \} = \{ 5, 4, 3, 2 \}\)
for (uint8_t n = 10; n >= 0; --n) {
printf("do I ever terminate?\n");
}
Since a uint8_t
has a value in the range 0-255 it will never become negative but rather when \(n\) reaches 0 it will be decremented to 255.
// an int contains a value ranging from -2,147,483,648 to 2,147,483,647
// the value of 0xffffffff is the equal to the magnitude of this range
// so the following wraps around to the value -1
int x = 0xffffffff;
printf("%d\n", x); // -1
\( \underbrace{16^7 \times \texttt{f}}_{4,026,531,840} + \underbrace{16^6 \times \texttt{f}}_{251,658,240} + \underbrace{16^5 \times \texttt{f}}_{15,728,640} + \underbrace{16^4 \times \texttt{f}}_{983,040} + \underbrace{16^3 \times \texttt{f}}_{61,440} + \underbrace{16^2 \times \texttt{f}}_{3,840} + \underbrace{16^1 \times \texttt{f}}_{240} + \underbrace{16^0 \times \texttt{f}}_{15} = 4,294,967,295 = 2 \times 2,147,483,647 + 1 \)
Say you want to place multiple bytes/values into a four-byte word (32-bit int).
value \(a\) into the least-significant byte (bits 0-7)
value \(b\) into the second byte (bits 8-15)
value \(c\) into the third byte (bits 16-23)
value \(d\) into the fourth byte (bits 24-31)
uint32_t pack_bytes (uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
uint32_t ret_val = 0x0, tempa, tempb, tempc, tempd;
tempa = a & 0xff; // make sure to get only the least-significant 8 bits
tempb = (b & 0xff) << 8; // shift the value to the second byte
tempc = (c & 0xff) << 16; // shift the value to the third byte
tempd = (d & 0xff) << 24; // shift the value to the fourth, most-significant byte
rev_val = tempa | tempb | tempc | tempd;
printf("a: 0x%08x\n", tempa);
printf("b: 0x%08x\n", tempb);
printf("c: 0x%08x\n", tempc);
printf("d: 0x%08x\n", tempd);
}
printf("Packed bytes: 0x%08x\n", pack_bytes(0x111, 0x222, 0x333, 0x444));
Storage Classes#
Global
accessible from anywhere within the same file
accessible in other
.c
/.h
files via keywordextern
Static
static global
are variables that are global to the local file onlystatic local
are variables that are local to the scope of a functionunlike automatic variables, preserves changes across invocations
keyword
static
is used to identity variables as local only
Global and static variables are initialized before program execution begins with supplied value or are given a default value, often 0
, and changes are preserved until the end of program execution
pointer type gets null pointer
arithmetic type gets positive/unsigned zero
aggregate type, every member is initialized recursively according to these rules
union type, the first named member is initialized recursively according to these rules
Auto
are automatically allocated/deallocated variables (local function variables declared on the stack)
given indeterminate default values, which means that the compiler does what it wants (most compilers use whatever value is already in memory)
EXAMPLE
// main.c
#include <stdio.h>
int x = 42;
int triple (void);
int main (void) {
printf("%d\n", x);
printf("%d\n", triple());
}
// triple.c
extern int x;
int triple (void) {
return x * 3;
}
gcc -g -Wall main.c triple.c -o myprog && ./myprog
42
126
EXAMPLE
// main.c
#include <stdio.h>
static int x = 42; // <--
int triple (void);
int main (void) {
printf("%d\n", x);
printf("%d\n", triple());
}
// triple.c
extern int x;
int triple (void) {
return x * 3;
}
gcc -g -Wall main.c triple.c -o myprog && ./myprog
COMPILATION FAILS
EXAMPLE - static local vs automatic
// foo.c
#include <stdio.h>
void foo (void) {
int x = 0;
static int y = 0;
x += 1;
y += 1;
printf("x=%d y=%d\n", x, y);
}
int main (void) {
foo();
foo();
}
gcc -g -Wall foo.c -o foo && ./foo
x=1 y=1
x=1 y=2
I/O#
Console#
scanf()
printf()
library libc
has portable routines for reading/writing
Unix standard streams provided to all programs.
printf("this is printed to standard output\n");
fprintf(stdout, "this is printed to standard output as well\n");
fprintf(stderr, "this is printed to standard error\n");
File#
C library has portable routines for reading/writing
fopen()
fread()
fwrite()
fclose()
does buffering by default
is blocking by default
OS provides system calls (more control over buffering and blocking)
low-level binary read/write
read()
write()
open()
close()
Signals#
C has no try-catch mechanism to handle exceptions.
Errors are returned from functions as integer error codes.
“If you do something bad you’ll end up spraying bytes around memory. Hopefully, this causes a segmentation fault and crash.”
There are mechanisms for exception handling (or exception-al control flow) at different levels of a computer system.
low-level
exceptions: aborts, faults, interrupts, traps (implemented using a combination of hardware and OS software)
high-level
process context switch (implemented by OS software, and hardware timer)
signals (implemented by OS software)
non-local jumps: longjmp(), setjmp() (implemented by the C runtime library)
Signal
A signal is a special message sent through the OS to tell a process (or thread) about some command or event. The process execution (or processor’s control flow) stops, and signal handler code starts. After the signal handler control flow finishes, the processor control flow can resume where it left off.
SIGHUP 1 // handup (POSIX)
SIGINT 2 // interrupt (ANSI)
SIGQUIT 3 // quit (POSIX)
SIGILL 4 // illegal instruction (ANSI) error
SIGTRAP 5 // trace trap (POSIX) error
SIGABRT 6 // abort (ANSI)
SIGIOT 6 // IOT trap (BSD 4.2) error
SIGBUS 7 // bus error (BSD 4.2) error
SIGFPE 8 // floating-point exception (ANSI) error
SIGKILL 9 // unblockable kill (POSIX) control process execution
SIGUSR1 10 // user-defined signal 1 (POSIX) sent by another application
SIGSEGV 11 // segmentation violation (ANSI) error
SIGUSR2 12 // user-defined signal 2 (POSIX) sent by another application
SIGTERM 15 // termination (ANSI)
SIGSTKFLT 16 // stack fault
SIGCHLD 17 // child status has changed (POSIX)
SIGCONT 18 // continue (POSIX) control process execution
SIGSTOP 19 // unblockable stop (POSIX) control process execution
SIGSYS 31 // bad system call
ps -<signal> <PID> # if no signal is provided, default SIGTERM
ps -U <username>
kill
# send a signal to all instances of a particular program (default SIGTERM)
killall -<signal> <program_name>
SIGTERM
interrupts the program and “asks” it to shut down, which it should
sometimes, this does not work (e.g., when the program is in a locked state)
it is often desirable to add a signal handler to handle the SIGTERM so that it can gracefully shutdown the process, cleanup memory, close files, etc.
SIGKILL
kills the process
can lead to inconsistent state because there is no opportunity to gracefully shutdown the process
Graceful shutdown refers to the proper and complete sync with secondary storage, disposal of resources, and normal termination.
int raise (int sig)
/*
*
* allow a process to send signals to itself
* suspend itself - SIGSTOP
* kill itself - SIGKILL
* reset its configuration - SIGHUP
* user-defined - SIGUSR1
*/
void suicide_signal (void) {
raise(SIGKILL);
return; // this will never be reached
}
Creating a signal handler
Create a signal handler by creating a function of the form
void signal_hander_function_name (int signal_number)
and passing a function pointer of the form
sighandler_t signal (int signum, sighandler_t signal_handler_function_name)
to the function. Thereafter, whenever a signal of the type signal_number
is raised the program is called instead of the default handler.
void signal_handler (int sig_no) {
printf("the signal handler got a [%d]\n", sig_no);
return;
}
signal(SIGHUP, signal_handler);
signal(SIGINT, signal_handler);
Function pointer
A function pointer is a pointer to a function that can be assigned, passed as a parameter, and called.
<return> (*<var>)(<params>);
/* ^^^^^^^^ the return type of the function
* ^^^^^ the variable names
* ^^^^^^^^ comma-separated list of parameter names
*/
int my_func (int i) {
printf("Got into function `my_func` with %d\n", i);
return 0;
}
int main (void) {
int (*func)(int);
func = my_func;
func(7);
return 0;
}
sigaction
int sigaction (int signum, const struct sigaction *act, struct sigaction *oldact)
/* ^^^^^^^^^^ the signal to be handled
* ^^^^^^^^^^^^^^^^^^^^^^^^^^^ a structure that contains information about the new signal handler, NULL means ignore the signal
* ^^^^^^^^^^^^^^^^^^^^^^^^ a pointer to the previously assigned handler as it is assigned in the function call
*/
The sigaction()
system call changes the action taken by a process on receipt of a specific signal.
struct sigaction new_action, old_action;
new_action.sa_handler = signal_handler;
new_action.sa_flags = SA_NODEFER | SA_ONSTACK;
/* ^^^^^^^^^^ don't suspend signals while in the handler
* ^^^^^^^^^^ provide an alternate stack for the signal handler
* SA_RESETHAND - restore the signal action to the default upon entry to the signal handler
*/
sigaction(SIGINT, &new_action, &old_action);
It is argued that sigaction
is better than signal
.
signal
does not block other signals from arriving while the current handler is executing whereassigaction
can block other signals until the current action returnssignal
resets the signal action back to the defaultSIG_DFL
for almost all signalsprovides better tuning of signals/controls of process through flags
Cache#
https://www.baeldung.com/cs/cache-write-policy
Cache Hierarchy#
The idea of a memory hierarchy is that the faster, smaller device at level \(k\) serves as a cache for the larger, slower device at level \(k + 1\), for each \(k\). Programs tend to access the data at level \(k\) more often then they access the data at level \(k + 1\) due to locality, and so the storage at level \(k + 1\) can be slower, larger, and cheaper per bit. The memory hierarchy creates a large pool of storage that costs as much as the cheap storage near the bottom but that serves data to programs at the rate of the fast storage near the top. Instruction caches are different from data caches.
The memory hierarchy
L0 - CPU registers
hold the words retrieved from L1
L1 cache (SRAM)
holds cache lines retrieved from L2
very fast (usually 100x faster than RAM), small, processor-adjacent
L2 cache (SRAM)
holds cache lines retrieved from L3
a bit slower (usually 25x faster than RAM) but often much larger
L3 main memory (DRAM)
holds disk blocks retrieved from local disks
slower still, larger still, may be off-chip (may be shared amongst processors in a multi-core system)
L4 local secondary storage (local disks)
hold files retrieved from disks on the remote network
slowest, least expensive
L5 remote secondary storage (tapes, distributed file systems, web servers)
EXAMPLE - AMD’s Ryzen 5 5600X
384 KB L1
3 MB L2
32 MB L3
EXAMPLE - Intel Core i5 3570K CPU
L1 data cache = 32 KB x 4 (8-way set associative, 64 B line size)
L1 instruction cache = 32 KB x 4 (8-way set associative, 64 B line size)
L2 cache = 256 KB x 4 (8-way set associative, 64 B line size)
L3 cache = 6 MB (12-way set associative, 64 B line size)
A cache is a smaller, faster storage device that acts as a staging area for a subset of the data in a larger, slower device.
A cache block (or cache line) is the basic unit for cache storage which contains multiple bytes or words of data.
A cache set (or cache row) is a row in the cache.
Locality
Caches exploit two types of locality to improve their performance.
Spatial locality, data that needs to be accessed tends to be close to data that has already been accessed
Temporal locality, data that has already been accessed is likely to be accessed again soon
These ideas lead to two cache design strategies:
spatial - cache items in blocks larger than that accessed
temporal - keep stuff that was used recently around longer
Cache Placement Policy#
The placement policy determines where in the cache a copy of an entry in main memory will go. If the placement policy is free to choose any entry in the cache to hold the copy then the cache is called fully associative. On the other hand, if each entry in main memory can go in just one place in the cache then the cache is direct-mapped. Choosing the right value of associativity involves a tradeoff between the efficiency of searching the cache and the hit rate.
In a cache with \(n\) cache sets the incoming memory address \(y\) is mapped to cache set \(x = y \mod n\).
Let \(\ell\) be the minimum number of bits required to uniquely represent a memory address.
If the cache block size is \(B \,\,\text{bytes}\) then the \(\log_2 B = b\) least significant bits of the memory address are the cache block offset bits.
If there are \(S\) cache sets in the cache then the \(\log_2 S = s\) next least significant bits of the memory address are the cache set index bits.
The remaining \(\ell - b - s = t\) most-significant bits of the memory address are the tag bits.
\( \ell\text{-bit memory address} = \underbrace{\boxed{\vphantom{Tg}\quad\quad\text{tag}\quad\quad}}_{t}\underbrace{\boxed{\vphantom{Tg}\quad\quad\text{cache set index}\quad\quad}}_{s}\underbrace{\boxed{\vphantom{Tg}\quad\text{offset}\quad}}_{b} \)
In a direct-mapped (or \(1\)-way set associative) cache each memory address is stored in a single cache block.
Locating data in the cache
use the index bits of the address of the memory block to determine the cache set
compare the tag bits of the address of the memory block with the tag bits of the cache block: if they match then there is a cache hit and the cache block is returned; otherwise there is a cache miss and the memory block is fetched from main memory
Loading data into the cache
use the index bits of the address of the memory block to determine the cache set
place the memory block into the cache set and store use the tag bits of the address of the memory block to store the tag in the associated tag field
if the cache block is occupied, replace the current memory block with the new memory block
In a \(2\)-way set associative cache each memory address can be stored in one of two cache blocks.
Locating data in the cache
Given a memory address it can be determined whether the data at that memory address is in the cache via the following procedure:
Use the index of the cache block to determine which cache set the address should reside in
For each cache block in the cache set, compare the tag associated with that cache block to the tag of the memory address. If they match, proceed to the next step. Otherwise, the data is not in the cache.
For the cache block where the data was found, inspect the valid bit. If it is \(1\) then the data is in the cache. Otherwise, it is not.
If the data at that memory address is in the cache then we use the block offset from that address to find the data within the cache block
Loading data into the cache
If the requested address is not found in the cache then it will be brought in from memory and placed there along with other addresses that are near it to take advantage of spatial locality. To determine which addresses will be brought in, we find the starting address and ending address of the range that will be brought in.
In a fully associative cache each memory address can be stored in any cache block.
Locating data in the cache
Loading data in the cache
A cache block is sought based on the value of its valid bit. If a cache block’s valid bit is \(1\) then a different cache block is sought. If a cache block’s valid bit is \(0\) then the memory block is placed into that cache block.
If the cache is completely full then a cache block is evicted according to the cache replacement policy and the memory block is placed into that cache block.
EXAMPLE
Consider a main memory of \(\overset{2^4}{16} \,\,\overset{2^{10}}{\text{KB}}\) which is organized into \(\overset{2^2}{4}\text{-B}\) memory blocks for a total of \(\overset{2^{12}}{4,096}\) memory blocks.
\( \begin{aligned} \frac{16 \,\,\text{KB}}{\text{memory}} &\times \frac{1 \,\,\text{block}}{4 \,\,\text{B}} &&= 4,096 \,\,\text{memory block} \\ \end {aligned} \)
And consider a cache of \(\overset{2^8}{256} \,\,\text{B}\) which is organized into \(\overset{2^2}{4}\text{-B}\) cache blocks for a total of \(\overset{2^6}{64}\) cache blocks.
\( \begin{aligned} \frac{256 \,\,\text{B}}{\text{cache}} &\times \frac{1 \,\,\text{block}}{4 \,\,\text{B}} &&= 64 \,\,\text{cache block} \\ \end {aligned} \)
A minimum of \(\ell = 14\) bits is needed to uniquely represent a memory address in a \(16\text{-KB}\) memory. Regardless of the cache placement policy, the incoming \(14\text{-bit}\) memory address will contain \(2\) offset bits because there are \(2^2 \,\,\text{B per cache block}\).
\(b = \log_2 4\)
If the cache is direct-mapped (or \(\overset{m}{1}\)-way set associative) then each of the \(64\) cache blocks is mapped to a set. There are \(\overset{n}{64}\) cache sets and the cache is represented as a \(64 \times 1\) column matrix. The incoming \(14\text{-bit}\) memory address will contain \(6\) index bits in order to uniquely represent the \(2^6\) cache sets, and \(14 - 6 - 2 = 6\) tag bits.
\(\underbrace{00 \, 0000}_{\text{tag}} \, \underbrace{0000 \, 00}_{\text{index}}\underbrace{00}_{\text{offset}}\)
In a direct-mapped cache, each memory address can only go in one cache block. This means that there is no replacement policy: if two memory addresses are mapped to the same cache block then they may continually knock each other out. Although a direct-mapped cache is simpler than an associative cache, it needs to be much larger to give comparable performance.
If the cache is \(\overset{m}{2}\)-way set associative then there are \(2\) cache blocks per cache set. There are \(\overset{n}{32}\) cache sets and the cache is represented as a \(32 \times 2\) matrix. The incoming \(14\text{-bit}\) memory address will contain \(5\) index bits in order to uniquely represent the \(2^5\) cache sets, and \(14 - 5 - 2 = 7\) tag bits.
\(\underbrace{00 \, 0000 \, 0}_{\text{tag}}\underbrace{000 \, 00}_{\text{index}}\underbrace{00}_{\text{offset}}\)
If the cache is fully associative (or \(\overset{m}{64}\)-way set associative) then there are \(64\) cache blocks per cache set. In other words, there is just \(\overset{n}{1}\) cache set and the cache is represented as a \(1 \times 64\) row matrix. The incoming \(14\text{-bit}\) memory address will contain \(0\) index bits in order to uniquely represent the \(2^0\) (i.e., single) cache set, and \(14 - 2 = 12\) tag bits.
\(\underbrace{00 \, 0000 \, 0000 \, 00}_{\text{tag}}\underbrace{00}_{\text{offset}}\)
Some memory addresses and their corresponding cache placements
\( \begin{aligned} \text{address, hex} && \text{address, bin} && \text{mem blck} && 64 \times 1 \,\,\text{tag, idx} && 32 \times 2 \,\,\text{tag, idx} && 1 \times 64 \,\,\text{tag} \\ 0x0000 && 00 \, 0000 \, 0000 \, 00\underbrace{00}_{2^0 - 1} && 0 && 0, 0 && 0, 0 && 0 \\ 0x0004 && 00 \, 0000 \, 0000 \, 01\underbrace{00}_{2^0 - 1} && 1 && 0, 1 && 0, 1 && 1 \\ 0x00\text{FF} && 00 \, 0000 \, 1111 \, 11\underbrace{11}_{2^2 - 1} && 63 && 0, 63 && 1, 31 && 63 \\ 0x0100 && 00 \, 0001 \, 0000 \, 00\underbrace{00}_{2^0 - 1} && 64 && 1, 0 && 3, 0 && 64 \\ \end {aligned} \)
Cache Eviction/Replacement#
When the cache is full a previously stored block must be evicted/replaced. The performance of the cache is determined by the cache eviction/replacement policy.
Least Recently Used (LRU)
In the LRU replacement policy the block that has been in the cache the longest without being used is evicted.
Least Frequently Used (LFU)
In the LFU replacement policy the block that has been used the least is evicted.
First In First Out (FIFO)
Queue.
Cache Performance#
A cache hit occurs when the block that is requested is found in and served from the cache.
A cache miss occurs when the block that is requested is not found in and served from the cache.
Types of cache misses
1. A cold miss (or compulsory miss) occurs because the cache is empty.
2. A capacity miss occurs when the set of active cache blocks (called the working set) is larger than the cache.
3. Most caches limit the blocks at level \(k + 1\) to a small subset–sometimes a singleton–of the block positions at level \(k\). For example, block \(i\) at level \(k + 1\) must be placed in block \(i \mod 4\) at level \(k\). A conflict miss occurs when the level \(k\) cache is large enough but multiple data objects all map to the same level \(k\) block. For example, requesting blocks \(0, 8, 0, 8, 0, 8, 0, 8\) would miss every time.
The efficiency of a cache is almost entirely determined by the hit ratio.
\(\text{hit ratio} = \frac{\text{num cache hits}}{\text{num requests}}\) where \(\text{num requests} = \text{num cache hits} + \text{num cache misses}\)
Average memory access time (or average memory latency)
\(\text{AMAT} = \text{H} + \text{MR} \times \text{AMP}\)
hit time \(\text{H}\) is the time it takes to read data from the cache
miss ratio \(\text{MR}\) is the probability that a cache access results in a miss: \(\text{miss ratio} = 1 - \text{hit ratio}\)
average miss penalty \(\text{AMP}\) is the cost to serve the block out of level \(k + 1\)
\(\text{AMAT} = \text{H} + \text{MR} \times \text{AMAT}_{k + 1}\)
EXAMPLE
\( \begin{aligned} \text{hit time} &= 25 \text{us} \\ \text{miss penalty} &= 250 \text{us} \\ \text{hit ratio} &= 0.8 \\ \text{miss ratio} &= 1 - 0.8 = 0.2 \\ \text{average} &= 25 \text{us} + 0.2 \times 250 \text{us} = 75 \text{us} \\ \end {aligned} \)
EXAMPLE
\(4\)-block LRU cache
https://www.makeuseof.com/tag/what-is-cpu-cache/
https://cseweb.ucsd.edu/classes/su07/cse141/cache-handout.pdf
[ w ] Average Memory Access Time (AMAT)
[ w ] Cache
[ w ] Cache Coherence
[ w ] Cache Control Instruction
[ w ] Cache Hierarchy
[ w ] Cache Hit
[ w ] Cache Miss
[ w ] Cache Performance
[ w ] Cache Placement Policy
[ w ] Cache Pollution
[ w ] Cache Replacement Policy
[ w ] Content-Addressable Memory (CAM)
[ w ] Data Remanence
[ w ] Dirty Bit
[ w ] Disk
[ w ] Dynamic RAM (DRAM)
[ w ] Gather/Scatter
[ w ] Locality of Reference
[ w ] Magnetic-Core Memory
[ w ] Memory
[ w ] Memory Access Pattern
[ w ] Memory Address
[ w ] Memory Management Unit (MMU)
[ w ] Non Volatile RAM (NVRAM)
[ w ] Non Volatile SRAM (nvSRAM)
[ w ] Page Replacement Algorithm
[ w ] Pseudo LRU (PLRU)
[ w ] Random Access
[ w ] Random Access Memory (RAM)
[ w ] Read-Only Memory (ROM)
[ w ] Scratchpad Memory
[ w ] Sequential Access
[ w ] Sequential Access Memory (SAM)
[ w ] Shared Memory
[ w ] Static RAM (SRAM)
[ w ] Translation Lookaside Buffer (TLB)
[ w ] Volatile Memory
[ w ] Application-Specific Integrated Circuit (ASIC)
[ w ] Complementary Metal-Oxide Semiconductor (CMOS)
[ w ] Complex Programmable Logic Device (CPLD)
[ w ] Configurable Logic Block (CLB)
[ w ] Field-Programmable Gate Array (FPGA)
[ w ] Programmable Logic Device (PLD)
[ w ] Reconfigurable Computing
[ w ] Load-Store Architecture
[ w ] Non Uniform Memory Access (NUMA)
[ w ] Processor Register
[ w ] Register File
Memory Ops#
Pointers are typed, and pointer arithemtic obeys types: when you add 1 to a pointer you add sizeof(type)
.
Buffer
A buffer is just a memory region that has some use which is typically referenced/maintained by a pointer. A buffer is a temporary holding place.
char *buffer;
void *buffer;
A void buffer is a general-purpose pointer, a pointer to a raw address.
Memory Management#
C has no garbage collector.
Any memory that is allocated must subsequently be un-allocated (freed). MEMORY LEAKS
local variables are allocated off the stack
freed when you return from the function
global and static variables are allocated in a data segment
are freed when the program exits
Memory can be allocated in the heap segment using malloc()
. This memory must be freed via free()
.
Failing to free allocated memory is a memory leak. Double-freeing allocated memory is an error (hopefully, a crash).
memcpy
copies one memory region to another, or from a src buffer to a dst buffer. The size must be explicit because there is no terminator.
char buf1[] = {0, 1, 2, 3};
char buf2[4] = {0, 0, 0, 0};
printf("Before\n");
for (int i = 0; i < 4; i++) {
printf("buf1[i] = %ld, buf2[i] = %ld\n", (int)buf1[i], (int)buf2[i]);
}
memcpy(buf2, buf1, 4);
printf("After\n");
for (int i = 0; i < 4; i++) {
printf("buf1[i] = %ld, buf2[i] = %ld\n", (int)buf1[i], (int)buf2[i]);
}
memset
fills memory with a given constant byte
void *memset (void *buf, int c, size_t n)
char a[4] = {0, 1, 2, 3};
memset(a, 0, 4);
int b[4] = {0, 1, 2, 3};
memset(b, 0, 4);
memcmp(buf1, buf2, n)
compares the first n bytes of the buffers
returns
neg int if
buf1
<buf2
0 if
buf1
=buf2
pos int if
buf1
>buf2
int x;
char cmps[4][2] = {
{0x0, 0x1}, {0x1, 0x0}, {0x0, 0x1}, {0x9, 0x0}
};
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
x = memcmp(&cmps[i][0], &cmps[j][0], 2);
printf("compare %ld%ld with %ld%ld = %d\n", cmps[i][0], cmps[i][1], cmps[j][0], cmps[j][1], x);
}
}
malloc
allocates a block of memory of the given size and returns a void pointer to the first byte of that memory, which does not need to be case because it is automatically promoted, or NULL if the memory could not be allocated. Initialized with garbage values.
void *malloc (size_t size)
#include <stdlib.h>
// allocate a 10-float array
float *arr = malloc(10 * sizeof(float));
if (arr = NULL)
return errcode;
arr[0] = 5.1;
calloc
returns an array of nmemb
members, each of size bytes, and zeroes out allocated memory: all bytes have the value 0x0. calloc
is slightly slower than malloc
and is preferred for non performance-critical code.
void *calloc (size_t nmemb, size_t bytes);
#include <stdlib.h>
// allocate a 10-long int array
long *arr = calloc(10, sizeof(long));
if (arr = NULL)
return errcode;
arr[0] = 5L;
void free (void *ptr)
releases the memory pointed to by the pointer
the ptr must point to the first byte of the heap-allocated memory (i.e., something returned by either
malloc
orcalloc
)set ptr to NULL after freeing a block of memory to avoid a dangling pointer
#include <stdlib.h>
long *arr = calloc(10 * sizeof(long));
if (arr = NULL)
return errcode;
free(arr);
arr = NULL;
typedef struct {
double real;
double imag;
} Complex, *ComplexPtr;
ComplexPtr AllocComplex (double real, double imag) {
Complex *retval = malloc(sizeof(Complex));
if (retval != NULL) {
retval->real = real;
retval->imag = imag;
}
return retval;
}
void *realloc (void *ptr, size_t size)
modifies a previous allocation by resizing it in-place if possible; if not, it creates a new allocation and copies as much data as it can. It returns NULL if the memory could not be allocated.
char *buf, *rbuf;
buf = malloc(2); // allocate a 2-byte array
memset(buf, 0xa, 2);
printf("buf = %x %x\n", buf[0], buf[1]);
rbuf = realloc(buf, 4); // resize to 4 bytes
printf("rbuf = %x %x %x %x\n", rbuf[0], rbuf[1], rbuf[2], rbuf[3]);
Heap#
The heap is a pool of unused memory that is used for dynamically-allocated data: malloc
allocates chunks of heap and free
deallocates them, and malloc
maintains bookkeeping data in the heap to keep track of allocated chunks.
Memory#
Units
\( \begin{aligned} \text{bit b} && \\ \text{byte B} && 1 \text{B} &= 8 \text{b} \\ \text{halfword (short) hw} && 1 \text{hw} &= 2 \text{B} \\ \text{word w} && 1 \text{w} &= 4 \text{B (sometimes 2B, then double word dw = 4B)} \\ \text{giant g} && 1 \text{g} &= 2 \text{w} = 8 \text{B} \\ \end {aligned} \)
The number of possible memory addresses on 32-bit architecture and 64-bit architecture respectively:
\( \begin{aligned} 2^{32} &= 4.29496730 \times 10^9 &&= 4,294,967,296 \\ 2^{64} &= 1.84467441 \times 10^{19} &&= 18,446,744,073,709,551,616 \\ \end {aligned} \)
64-bit processors can run in 32-bit compatibility mode which allows them to run 32-bit code quickly.
print('Possible memory addresses (theoretical limit)')
print(f'2^ 8 = {2** 8:>55,.0f} B')
print(f'2^ 16 = {2** 16:>55,.0f} B')
print(f'2^ 32 = {2** 32:>55,.0f} B (about 4.3 million)')
print(f'2^ 64 = {2** 64:>55,.0f} B (about 18.4 quintillion, or 16 EiB)')
print(f'2^128 = {2**128:>55,.0f} B (about 340 undecillion)')
print()
print('Some common physical memory limits')
print(f'2^ 48 = {2** 48:>55,.0f} B (about 0.3 quadrillion, or 256 TiB)')
print(f'2^ 52 = {2** 52:>55,.0f} B (about 4.5 quadrillion, or 4 PiB)')
print()
print('Some common user space and kernel space limits')
print(f'2^ 47 = {2** 47:>55,.0f} B (Linux user space)')
print(f'2^ 17 = {2** 17:>55,.0f} B (Linux kernel space)')
Possible memory addresses (theoretical limit)
2^ 8 = 256 B
2^ 16 = 65,536 B
2^ 32 = 4,294,967,296 B (about 4.3 million)
2^ 64 = 18,446,744,073,709,551,616 B (about 18.4 quintillion, or 16 EiB)
2^128 = 340,282,366,920,938,463,463,374,607,431,768,211,456 B (about 340 undecillion)
Some common physical memory limits
2^ 48 = 281,474,976,710,656 B (about 0.3 quadrillion, or 256 TiB)
2^ 52 = 4,503,599,627,370,496 B (about 4.5 quadrillion, or 4 PiB)
Some common user space and kernel space limits
2^ 47 = 140,737,488,355,328 B (Linux user space)
2^ 17 = 131,072 B (Linux kernel space)
MEMORY
0xFFFFFFFF +--------------------------------------------------+
| Kernel
+--------------------------------------------------+
| STACK (8 MB limit)
|
bp --> | stack frame
| function params/args
| automatic (local-to-function) vars
| saved frame pointer (SFP) - used to restore the frame pointer (bp) to its previous value
| return address pointer - used to restore the instruction pointer (ip) to the next instruction after the function call
sp --> +--------------------------------------------------+
| |
| v
+--------------------------------------------------+
| Shared Libraries (here, or above the stack???)
+--------------------------------------------------+
| ^
| |
+--------------------------------------------------+ <-- program break, moves up and down as memory is allocated to, deallocated from the heap
| HEAP
| dynamic memory (de)allocation - C malloc/free, C++ new/delete
+--------------------------------------------------+
| BSS (.bss)
| uninitialized static data, read from the binary executable and loaded into memory at this address
| writable, but has a fixed size
+--------------------------------------------------+
| DATA (.data)
| initialized static data such as globals, static locals, and strings, read from the binary executable and loaded into memory at this address
| writable, but has a fixed size
+--------------------------------------------------+
| CODE/TEXT (.rodata)
| static const
+--------------------------------------------------+
| CODE/TEXT (.text)
| assembled machine instructions, read from the binary executable and load it into memory at this address
| read-only, and has a fixed size
0x00000000 +--------------------------------------------------+
PROGRAM EXECUTION
The instruction pointer is set to point to the first instruction of the text segement (function `main`), and the processor proceeds as follows:
1. read the instruction that the instruction pointer is pointing to
2. add the byte length of the instruction to the instruction pointer
3. execute the instruction that was read in (1)
4. go back to (1)
Signed representation#
// in sign-and-magnitude, -1 is encoded as 100...001
if (-1 & 3) == 1
printf("The encoding is sign-and-magnitude");
// in one's complement, -1 is encoded as 111...110
else if (-1 & 3) == 2
printf("The encoding is one's complement");
// in two's complement, -1 is encoded as 111...111
else if (-1 & 3) == 3
printf("The encoding is two's complement");
else
printf("Not possible in the C standard");
From lower memory addresses to higher addresses:
1. Text segment
stores the executable code of the program
usually read-only
2. Data Segment .data
initialized static/global variables
static int a = 3;
read/write
the size of the data segment is determined by the size of the values in the program’s source code and does not change at runtime
Read-Only Data Segment .rodata
initialized static/global constants
read-only
3. BSS segment
uninitialized static/global constants/variables
static int b;
this segment is filled with zeros by the operating system and so uninitialized variables are initialized with zeros
4. Heap
grows from lower addresses to higher addresses
used to provide space for dynamic memory allocation (
malloc
,calloc
,realloc
,free
, etc.)
5. Stack
grows from higher addresses to lower addresses
used to store local variables defined inside functions as well as data related to function calls such as return addresses, arguments, etc.
C’s memory layout from higher addresses to lower addresses
command line arguments
dynamic memory
stack - a new stack frame for each function call, not for each function
heap
static memory
uninitialized data segment
initialized data segment
text/code segment - read-only permission prevents accidental modification; shareable, so that only a single copy is required amng multiple programs like text editors, shells, etc.
Post compilation, the instructions in the binary executable are loaded into the text segment.
print('32 b arch can address 2^32 B of RAM')
print(f'{(2**32) :>15,.0f} B')
print(f'{(2**32)*8:>15,.0f} b')
32 b arch can address 2^32 B of RAM
4,294,967,296 B
34,359,738,368 b
EXAMPLE
int x = 100; // data segment
int main () {
int a = 2; // stack
float b = 2.5; // stack
static int y; // bss segment
int *ptr = (int *)malloc(2 * sizeof(int)); // local variable `ptr` is stored in the stack, points to a block of memory in the heap
ptr[0] = 5; // heap
ptr[1] = 6; // heap
free(ptr);
return 1;
}
x86 Assembly#
https://www.quora.com/What-hardware-do-you-need-to-start-learning-x86-assembly
Syntax Flavor
AT&T
puts destination registers after source registers
Intel
puts destination registers before source registers
Registers
general-purpose - act as temp variables for the CPU when it's executing machine instructions
===============
ax accumulator
bx base
cx counter
dx data
sp stack pointer - stores an address
bp base pointer - stores an address - references local function variables in the current stack frame
- frame pointer (FP) or local base (LB) pointer
si src idx - stores an address, when data needs to be read
di dst idx - stores an address, when data needs to be written
ip instruction pointer - always holds the memory address of the next instruction to execute in the program's code segment
the processor increments %rip automatically after each instruction
control flow instructions, like branches, set the value of %rip to change the next instruction
flags - consists of several bit flags that are used for comparisons and memory segmentations
Registers
eax accumulator (general-purpose register)
ebx base (general-purpose register)
ecx counter (general-purpose register)
edx data (general-purpose register)
esp stack pointer (general-purpose register), pointer that stores a memory address
ebp base pointer (general-purpose register), pointer that stores a memory address
esi src index (general-purpose register), pointer used by load (read data) and store (write data) instructions
edi dst index (general-purpose register), pointer used by load (read data) and store (write data) instructions
eip instruction pointer, points to the instruction that the processor is reading
eflags several bit flags that are used for comparisons and memory segmentations
cs
ds
es
fs
gs
ss
Register Formats
63 31 15 7 0
+-------------------------------+-------------------------------+
| | | | |
+---------------------------------------------------------------+
|---------------------%rax (64 bits/8 bytes)--------------------|
|-----%eax (32 bits/4 bytes)----|
|-%ax (16b/2B)--|
|--%ah--|--%al--| <-- 8 bits/1 byte each
Instructions
/**************************************************
*
* computation
*
**************************************************/
src src/dst
add %rax,%rbx %rbx := %rbx + %rax
sub %rax,%rbx %rbx := %rbx - %rax
inc
and
or?
/**************************************************
*
* data movement
*
**************************************************/
src dst
mov %rax,%rbx %rbx := %rax
/**************************************************
* control flow transfers - deviations from sequential instruction execution (function calls loops, conditionals)
*
* unconditional branch - the instruction pointer is set to a new value
* conditional branch - the instruction pointer is set to a new value if a condition is true
* function call
* function return
**************************************************/
cmp "compare"
j "jump"
lea "load effective address"
INTEL AT&T
and
call
cmp cmpl compare
inc incl increment
jle jump if less than or equal to
jmp jump
lea
leave
mov
movl
nop
push
ret
sub
GDB#
~/.gdbinit
set dis intel
set disassemble-next-line on
disass (disassemble)
~/.gdbinit
b main break main
dis main disassemble main
i r eip info register eip
p print
q quit
list
nexti next instruction
run
set dis intel set disassembly intel
x/<f> <addr> examine memory address `addr` with format `f`
x/i $eip instruction
x/o $eip octal
x/t $eip binary
x/u $eip decimal
x/x $eip hexadecimal
x/2x $eip 2 words
x/12x $eip 12 words
x/2xb $eip 2 B
x/2xh $eip 2 halfword ( 4 B = 2 halfword x 2 B/halfword)
x/2xw $eip 2 word ( 8 B = 2 word x 4 B/ word)
x/2xg $eip 2 giant (16 B = 4 word x 4 B/ word)
x/xdw double word
gcc -S # compile C to 32-bit assembly on a 32-bit system
gcc -S -m32 # compile C to 32-bit assembly on a 64-bit system
gcc -Wall -o prog prog.c # compile C to binary executable
gcc -g -Wall -o prog prog.c && gdb -q prog
# ^^ provide debugging information to gdb
# ^^ quiet mode
objdump -M intel -D prog | grep -A20 main.:
objdump
#
readelf
#
https://en.wikipedia.org/wiki/Readelf
file a.out # check whether it is an ELF file
readelf -h a.out # view the ELF header
readelf -S a.out # view sections in the ELF file
readelf -l a.out # view program headers (describes the segments used at runtime)
readelf -s a.out # view the symbol table
readelf -r a.out # view the relocation sections (shows how the binary modifies itself at runtime)
readelf -d a.out # view the dynamic section (contains information on dynamic linking)
Building#
Build
edit ->
foo.h
,foo.c
(source file)compile ->
foo.o
(object file)link ->
foo
+libZ.a
statically-linked library (executable)load ->
bar
+libc.so
shared library (process)execute, debug, profile
# gcc is used to build (compile + link) a program
# ld can be used to link the program
# compilation
# gcc <src files> [options]
# -c generate object file
# -g generate debug information
# -o output specify the output file
# -Wall show all warnings
gcc -Wall -g -c -o hello.o hello.c
# ^^^^^^^^ generate debug information and display errors
# ^^^^^^^^^^^^^ compile, but do not link
# linking
# gcc <obj files> [options]
# -g generate debug information
# -l<lib> link with a library
# -o output specify the output file
gcc -g -lmyexample -o hello hello.o goodbye.o
# ^^^^^^^^^^^ link with a library
# ^^^^^^^^ output file
Static Library
Static linking is done at link time. A library is a collection of related code and functions that are linked against a C program. The library exports symbols. Program object code has unresolved symbols. The linker pulls chunks of the library containing those symbols and places them into the program. The program is done when all the pieces are resolved.
A statically-linked library produces object code that is inserted into the program at link time.
Build an archive of the library which the linker uses to search for and transfer code into the program.
The name of the library is linked against, not the name of the file in which the library exists
# ar rcs lib<libname>.a <object files>
# ^ replace existing code with the objects passed in
# ^ create the library, if needed
# ^ create an index for relocatable code
#
ar rcs libmyexample.a a.o b.o c.o d.o
Dynamic Library
A dynamic library is a collection of related code and functions that are resolved at run time. The program’s object code contains unresolved symbols, and the library exports symbols. The loader pulls chunks of the library containing those symbols and places them into the process. The symbols are resolved when the process is started or later during execution. The library is called dynamic because this process can be done at any time.
A dynamically-linked library produces object code that is inserted into the program at execution time.
A loadable version of the library is being built, which the loader uses to launch the application. All object files to be placed in the library must have been compiled to position-independent code (PIC). PIC is not dependent on being located at any predefined location in memory; it uses relative jumps, so it does not matter where it is loaded at exection time.
gcc -Wall -g -c -fpic -o a.o a.c
// ^^^^^^^^ compile to PIC
gcc -shared -o libmyexample.so a.o b.o c.o d.o
Pros - static linking
resolve all implementation issues at link time
avoids problems of API versioning
linker can optimize library code at link time
Pros - dynamic linking
reduced executable
system updates
Preprocessor
The preprocessor processes input source code files for commands that setup the compile environment specific to that execution. #
directives are used to indicate what the programmer wants done.
#include // include data from some other data file
#include "foo.h" // look local, used in application programming
#include <foo.h> // look at the default directories and any directories provided in the command line
#define // create a definition symbol that gets searched-replaced throughout
// commonly used to define a constant that might be changed in the future (e.g., the size of arrays)
// can also be used to create functions called macros; macros are not called, but are replaced during preprocessing (and so there are no function call overheads, stack operations)
#undef // undoes the binding done by #define
// conditionally compile parts of a program
#if
#ifdef
#ifndef
gcc -I<path> # look in a specific directory for include files
#define NUM_ENTRIES 15
#define DEFINED
#if 0
/* not compiled */
#else
/* compiled */
#endif
#ifdef UNKNOWNVALUE
/* not compiled */
#else
/* compiled */
#endif
#ifndef DEFINED
/* not compiled */
#else
/* compiled */
#endif
int main (void) {
float myFloats[NUM_ENTRIES];
#if 0
for (int i = 0; i < NUM_ENTRIES; i++)
scanf("%f", &myFloats[i]);
printCharline('*', 69);
printf("Received and computed\n");
#endif
}
Make
The make utility is a utility for building complex programs/systems.
determine which parts of a program/system are out of date
determine the dependencies between objects
issue commands to create intermediate and final project files
Each project/system has one or more files defining the build called the makefile: what to build, how to build them, how they are related. The target is the thing to build. The prerequisites are the things that the target depends on. a.o
depends on a.c
. Variables are data that defines elements of interest to the build. Rules are statements of targets, prerequisites, and commands.
Productions or rules are statements consisting of targets, prerequisites, and commands which define how a particular item is built.
builtin variables
$@
the current rule target$^
the prerequisite list$<
the first prerequisite
Pattern rule
%
matches any nonempty string
Run the commands to build the target when any of the prerequisites are out of date:
target: prereq1, prereq2, prereq3
command1
command2
CC=gcc
LINK=gcc
CFLAGS=-c -Wall -I.
OBJ_FILES= sample.o support.o
sample : $(OBJ_FILES)
$(CC) $^ -o $@
sample.o : sample.c support.h
$(CC) $(CFLAGS) sample.c -o $@
support.o : support.c support.h
$(CC) $(CFLAGS) support.c -o $@
make
CC=gcc
CFLAGS=-c -Wall -I. -fpic -g -fbounds-check
LIBS=-lcrypto
OBJS=tester.o util.o mdadm.o
%.o : %.c %.h
$(CC) $(CFLAGS) $< -o $@
tester : $(OBJS) jbod.o
$(CC) -o $@ $^ $(LIBS)
clean :
rm -f $(OBJS) tester
# source code
vim foo.c foo.h bar.c
# assembling
gcc -a foo.c -o foo.s
gcc -a bar.c -o bar.s
as
# compilation
#
# generates object files
gcc -c foo.c -o foo.o
gcc -c bar.c -o bar.o
# linking
#
# produces executable `bar`
gcc foo.o bar.o -llibZ.a -o foo
ld
# loading
#
# produces process `bar`
libc.so # shared library, linked with process `bar`
# execute, debug, profile, etc.
####################
#
# Example of assemble + compile + link
#
####################
gcc hello.c -o hello -g -Wall -O0 -fsanitize=address -I.
# ^^ the compiler generates debug information
# ^^^^^ checks
# ^^^ disable the compiler optimizations, for debugging
# ^^^^^^^^^^^^^^^^^^ AddressSanitizer memory debugger
# ^^^ include data from data files in the working directory
Static linking is the link-time process in which the linker places chunks of static libraries into the source code in order to resolve unresolved symbols. A statically-linked library produces object code which is inserted into the program at link-time.
# build an archive of the library, which the linker uses to search for and transfer code into the program
ar rcs libMyStaticLibrary.a a.o b.o c.o d.o
# ^ replace existing code with the objects
# ^ create the library if it doesn't exist
# ^ create an index for relocatable code
# ^^^^^^^^^^^^^^^^^^^^ the name of the static library
# ^^^^^^^^^^^^^^^ object files
Dynamic linking is the run-time process in which the loader places chunks of dynamically-linked libraries into the process in order to resolve unresolved symbols. A dynamically-linked library produces object code which is inserted into the program at run-time.
# all object files to be placed in the dynamically-linked library must be compiled to position-independent code
gcc -c a.c -o a.o -fpic -g -Wall
gcc -c b.c -o b.o -fpic -g -Wall
gcc -c c.c -o c.o -fpic -g -Wall
gcc -c d.c -o d.o -fpic -g -Wall
# build a loadable version of the library, which the loader uses to launch the program
gcc -shared -o libMyDynamicLibrary.so a.o b.o c.o d.o
C Preprocessor#
The C preprocessor processes source code files for #
directives that setup the compile environment.
#include <stdio.h> /* include data from data file `stdio.h` either in the default directories or on the command line */
#include "foo.h" /* include data from data file `foo.h` in the local directory */
#define MY_CONSTANT 10 /* define a symbol to be replaced throughout the program, usually for a constant that might be changed */
#undef /* */
/* macros
*
* the `#define` directive can also be used to create simple functions called macros
* macros are not called like functions are, but rather substituted during the preprocessing phase of compilation
* thus, there is no function call overhead on the stack (is this true???)
*/
#define swap (x, y) { int temp = x; x = y; y = temp; }
#if
#else
#endif
#ifdef
#ifndef
Makefile#
the target is the thing to build
the prerequisites are the things that the target depends on
the dependencies are relationships between files like
a.c
anda.o
productions/rules are statements that include targets, prerequisites, and commands
// run the commands to build the target when any of the prerequisites are out-of-date
target : prereq1, prereq2, prereq3, ...
<TAB>command1
<TAB>command2
builtins
$@
the current rule target$^
the prerequisites$<
the first prerequisite
1
sample : sample.o support.o
gcc sample.o support.o -o sample
sample.o : sample.c support.h
gcc -c sample.c -o sample.o -Wall -I.
support.o : support.c support.h
gcc -c support.c -o support.o -Wall -I.
2
OBJECT_FILES = sample.o support.o
sample : $(OBJECT_FILES)
gcc $(OBJECT_FILES) -o sample
sample.o : sample.c support.h
gcc -c sample.c -o sample.o -Wall -I.
support.o : support.c support.h
gcc -c support.c -o support.o -Wall -I.
3
CC = gcc
LINK = gcc
CFLAGS = -c -Wall -I.
OBJECT_FILES = sample.o support.o
sample : $(OBJECT_FILES)
$(CC) $^ -o $@
sample.o : sample.c support.h
$(CC) $(CFLAGS) sample.c -o $@
support.o : support.c support.h
$(CC) $(CFLAGS) support.c -o $@
# assemble + compile
gcc -S source.c # produces `source.s`
as source.s # produces `source.o`
# compile
gcc -c source.c # produces `source.o`
# link
gcc source.c -o source # produces executable file
gcc -g -Wall source.c -o source # view warnings
Source File .c
Header File .h
Assembly File .s
Object File .o
contains re-locatable machine code produced by the compiler
generally, cannot be executed without some manipulation (e.g., via a linker)
may contain references to external symbols in other object files: linking will resolve external references
Library
a collection of object files
Object files and libraries are linked to produce an executable file.
Profiling#
gprof
#
gprof
measures a program’s performance and behavior
gcc -pg -o profiling profiling.c # 1. compile with flag `-pg`
./profiling # 2. execute (generates file `gmon.out`)
gprof profiling # 3. run `gprof`
# 4. review, optimize, re-profile
Amdahl’s Law
\(k\) is the percentage of total execution time spent in the optimized module(s)
\(s = \frac{\text{original execution time}}{\text{improved execution time}}\) is the execution time expressed in terms of n-factor speedup (2x, 3x)
The overall speedup of the program is expressed as
\( \begin{aligned} T = \frac{1}{(1 - k) + \frac{k}{s}} \end {aligned} \)
where \(1 - k\) is the part of the program that is unchanged and \(k/s\) is the ratio of altered program size to speedup
EXAMPLE
Consider an optimized program module \(A\) which represents \(45\%\) of the program run time. The optimization reduces the runtime of the module from \(750\text{ ms}\) to \(50\text{ ms}\). What is the speedup?
\( \begin{aligned} k &= 0.45 \\ s &= \frac{750}{50} = 15 \\ T &= \frac{1}{(1 - 0.45) + \frac{0.45}{15}} = \frac{1}{0.55 + 0.03} \approx 1.72 \end {aligned} \)
The program speedup is \(1.72\) times.
EXAMPLE
Memory operations take \(30\%\) of the total execution time. A new widget called a cache speeds up \(80\%\) of memory operations by a factor of \(4\). A second new widget called an L2 cache speeds up \(1/2\) the remaining \(20\%\) by a factor of \(2\). What is the total speedup?
\( \begin{aligned} k &= 0.3 \\ k_1 &= 0.3 \times 0.8 = 0.24 \\ s_1 &= 4 \\ k_2 &= 0.3 \times \frac{(1 - 0.8)}{2} = 0.03 \\ s_2 &= 2 \\ T &= \frac{1}{(1 - k_1 - k_2) + \frac{k_1}{s_1} + \frac{k_2}{s_2}} \\ &= \frac{1}{(1 - 0.24 - 0.03) + \frac{0.24}{4} + \frac{0.03}{2}} \\ &= \frac{1}{0.73 + 0.06 + 0.015} \\ &\approx 1.24 \\ \end {aligned} \)
EXAMPLE
Consider four modules that are measured prior to optimization and after optimization and suppose that the run time of the original execution is 2000 usec. What is the speedup?
module |
before (usec) |
after (usec) |
---|---|---|
A |
200 |
60 |
B |
450 |
11 |
C |
1000 |
600 |
D |
125 |
1 |
\( \begin{aligned} k_A &= \frac{200}{2000} = 0.1 \\ s_A &= \frac{200}{60} \approx 3.33 \\ T_A &= \frac{1}{(1 - k_A) + \frac{k_A}{s_A}} \\ &= \frac{1}{(1 - 0.1) + \frac{0.1}{3.33}} \\ &= \frac{1}{0.9 + 0.03} \\ &\approx 1.075 \\ \end {aligned} \)
\( \begin{aligned} k_B &= \frac{450}{2000} = 0.225 \\ s_B &= \frac{450}{11} \approx 40.91 \\ T_B &= \frac{1}{(1 - k_B) + \frac{k_B}{s_B}} \\ &= \frac{1}{(1 - 0.225) + \frac{0.225}{40.91}} \\ &= \frac{1}{0.775 + 0.0055} \\ &\approx 1.28 \\ \end {aligned} \)
\( \begin{aligned} k_C &= \frac{1000}{2000} = 0.5 \\ s_C &= \frac{1000}{600} \approx 1.67 \\ T_C &= \frac{1}{(1 - k_C) + \frac{k_C}{s_C}} \\ &= \frac{1}{(1 - 0.5) + \frac{0.5}{1.67}} \\ &= \frac{1}{0.5 + 0.299} \\ &\approx 1.25 \\ \end {aligned} \)
\( \begin{aligned} k_D &= \frac{125}{2000} = 0.0625 \\ s_D &= \frac{125}{1} = 125 \\ T_D &= \frac{1}{(1 - k_D) + \frac{k_D}{s_D}} \\ &= \frac{1}{(1 - 0.0625) + \frac{0.0625}{125}} \\ &= \frac{1}{0.9375 + 0.0005} \\ &\approx 1.066 \\ \end {aligned} \)
\(1.075 \times 1.28 \times 1.25 \times 1.066 \approx 1.8335\)
\( \begin{aligned} k_A &= \frac{ 200}{2000} = 0.1 \\ k_B &= \frac{ 450}{2000} = 0.225 \\ k_C &= \frac{1000}{2000} = 0.5 \\ k_D &= \frac{ 125}{2000} = 0.0625 \\ s_A &= \frac{ 200}{ 60} \approx 3.33 \\ s_B &= \frac{ 450}{ 11} \approx 40.91 \\ s_C &= \frac{1000}{ 600} \approx 1.67 \\ s_D &= \frac{ 125}{ 1} = 125 \\ T_D &= \frac{1}{(1 - k_A - k_B - k_C - k_D) + \frac{k_A}{s_A} + \frac{k_B}{s_B} + \frac{k_C}{s_C} + \frac{k_D}{s_D}} \\ &= \frac{1}{(1 - 0.1 - 0.225 - 0.5 - 0.0625) + \frac{0.1}{3.33} + \frac{0.225}{40.91} + \frac{0.5}{1.67} + \frac{0.0625}{125}} \\ &= \frac{1}{0.1125 + 0.03 + 0.0055 + 0.299 + 0.0005} \\ &= \frac{1}{0.4475} \\ &\approx 2.235 \\ \end {aligned} \)
Concurrency#
Sequential processing: processing a network connection as it arrives and fulfilling the exchange completely; connections are processed in sequence of their arrival.
listen_fd = Listen(port);
while (1) {
client_fd = accept(listen_fd);
buf = read(client_fd);
write(client_fd, buf);
close(client_fd);
}
the benefits of sequential processing
it is simple to implement
there is very little persistent state to maintain
there are few complex error conditions
the disadvantes of sequential processing
sometimes there is poor performance: one slow client causes all others to block; this is poor utilization of the network and the CPU
Concurrent processing: process multiple requests concurrently
Approaches to concurrent server design
asynchronous servers
select()
multiprocess servers
fork()
multithreaded servers
pthreads
Concurrency with processes
The server process blocks on accept()
and waits for a new client to connect. When a new connection arrives the parent calls fork()
to create another process. The child process handles the new connection and exit()
s when the connection terminates. Children become zombies after death: use wait()
to reap the children.
fork
creates a new process by duplicating the calling process: the new child process is an exact duplicate of the calling parent process except that it has its own PID and pending signal queue. fork
returns 0 for the child process or the child’s PID in the parent code.
pid_t fork (void);
/* creates a new child by duplicating the calling parent process
* the new child process is an exact duplicate, except that it has its own PID and pending signal queue
*
* returns
* 0 for the child process
* child's PID in the parent code
*/
void exit (int status);
/* causes normal process termination with a return value
* status is sent to the calling parent process
* `return` is a keyword which returns (a value) from a function
* `exit` is a function which eventually calls the system call `_exit`: it terminates the process immediately and returns the status to the parent
*/
pid_t wait (int *status);
/* ^^^^^^^^^^^ the return value that is set by the child process
*
* used to wait for state changes in a child of the calling process
*
* returns
* child PID
*/
Concurrency with threads
A single process handles all the connections, but a parent thread forks (dispatches) a new thread to handle each connection. The child thread handles the new connection and exits when the connection terminates. As many threads can be created as one wants, up to the limits of the system.
A thread is an independent stream of instructions that can be scheduled to run as such by the OS. To the software developer, a thread is a procedure that runs independently of its main program. Imagine a main program that contains a number of procedures, and imagine that all the procedures can be scheduled to run simultaneously with (or independently of) the OS. This is multi-threaded
accept
pthread_create
The indpendent flow of control is accomplished because a thread maintains its own
stack pointer
registers
scheduling properties (such as policy or priority)
set of pending and blocked signals
thread specific data
A thread exists within a process and uses the process’s resources. It has its own independent flow of control as long as its parent process exists and the OS supports it. A thread duplicates only the essential resources it needs to be independently schedulable. It may share process resources with other threads that act equally as independently. A thread dies if the parent process dies. A thread is lightweight because most of the overhead has already been accomplished through the creation of its process.
Since threads within the same process share resources changes made by one thread (such as closing a file) will be seen by all other threads; two pointers having the same value point to the same data; reading and writing to the same memory locations is possible and so requires explicit synchronization by the programmer. Shared data between threads can cause conflicts, deadlocks, etc.
Thread control
main
pthread_create()
create a threadpthread_join()
wait for the thread to finish
thread
begins as a function pointer
runs until
return
orpthread_exit
library support
#include <pthread.h>
compile with option
-lpthread
to link with the pthread library
pthread_t pthread_self (void);
/* returns the ID of the calling thread
*/
int pthread_create (pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
/* ^^^^^^^^^^^^^^^^^ a pthread library structure which contains thread information
* ^^^^^^^^^^^^^^^^^^^^^^^^^^ a set of attributes to apply to the thread
* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ the thread function pointer
* ^^^^^^^^^ an opaque data pointer to pass to the thread
*
* starts a new thread in the calling process
* the thread becomes alive, it may even run before pthread_create returns
*
* returns
* 0 on success
* non-zero error code on failure
*/
int pthread_join (pthread_t thread, void **retval);
/* ^^^^^^^^^^^^^^^^ a pthread library structure which contains thread info
* ^^^^^^^^^^^^^ a pointer to a pointer to a return value
*
* `pthread_join` waits for the thread to terminate
*
* returns
* 0 on success
* non-zero error code on failure
*/
void pthread_exit (void *retval);
/* ^^^^^^^^^^^^ a pointer to a return value
*
* `pthread_exit` terminates the calling thread and returns a value
*/
Race Condition
A race condition occurs when the outcome of a program depends on the interleaving of the execution of multiple threads accessing a critical section.
A critical section is a piece of code that accesses a shared variable and must not be concurrently executed by more than one thread.
Each instruction is executed atomically. Multiple threads executing these instructions can result in different interleavings (and outcomes) due to uncontrollable OS scheduling.
mov 0x2e50(%rip),%eax # 4014 <counter>
add $0x1,%eax
mov %eax,0x2e47(%rip) # 4014 <counter>
Avoiding race conditions
To avoid a race condition it must be ensured that only a single thread can execute a critical section at any given time.
Atomics: modifying a variable results in a single CPU instruction
#include <stdatomic.h>
In general, a critical section may contain complex logic. Primitives for mutual exclusion are needed–a guarantee that only one thread is executing the critical section while others are prevented from doing so. One way to achieve mutual exclusion is via locks
lock_t mutex
lock(&mutex)
critical_section
unlock(&mutex)
Advantages of threads
much of the code is identical
parallel execution: good CPU and network utilization
lower overhead than processes
shared-memory communication is possible
Disadvantages
synchronization is complicated
shared fate within a process: one rogue thread can hurt you
relatively insecure, no isolation
Network connections are processed in the sequence of their arrival. Sequential processing is relatively simple to implement, has relatively little persistent state to maintain, and has relatively few complex error conditions; but may have poor performance if a slow client blocks all the others, and poorly utilizes the network or CPU.
listen_fd = Listen(port);
while (1) {
client_fd = accept(listen_fd);
buf = read(client_fd);
write(client_fd, buf);
close(client_fd);
}
Multi Processing#
Multi Threading#
Multithreading: a single process handles all connections
a parent thread dispatches/forks a new, child thread to handle each connection
a child thread handles a connection and exits when the connection terminates
one can create as many threads as desired, up to system limits
“forking multiple threads of execution in one process”
multiple threads share the same code and the same heap (but have different registers and different stacks) (shared data structures?)
the independency of threads is maintained via its own stack pointer, registers, scheduling properties (policy, priority, etc.), set of pending and blocked signals, and thread-specific data
A thread is an independent stream of instructions that can be scheduled to run as such by the OS. From the perspective of the developer, a thread is a procedure that runs independently of its main program. A multi-threaded program can be viewed as a main program that consists of a collection of procedures which are scheduled to run independently, or simultaneously.
a thread exists within a process, and uses the process’s resources, which it may share with other threads; if a thread modifies shared process resources then the modifications are seen be all other threads
closing a file
two pointers with the same value point to the same data
it is possible to read/write to the same memory location
as long as its process exists, and the OS supports it, a thread has its own, independent control flow
a thread replicates only those resources which it requires to be indepentantly schedulable
a thread dies when its process dies
a thread is lightweight in the sense that most of its overhead has already been handled via the creation of its process
/* server */
accept(); // client 1 connects to parent thread
pthread_create(); // client 1 connection transfers to child thread 1
accept(); // client 2 connects to parent thread
pthread_create(); // client 2 connection transfers to child thread 2
/* thread.c */
#include <pthread.h>
int main (int argc, char** argv) {
pthread_create (pthread_t* thread, const pthread_attr_t* attr, void* (*start_routine)(void *), void* arg)
/* ^^^^^^^^^^^^^^^^^ a pthread library struct, holding thread info
* ^^^^^^^^^^^^^^^^^^^^^^^^^^ a set of attributes to apply to the thread
* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ the function pointer
* ^^^^^^^^^ an opaque data pointer to pass to the thread
*
* pthread_create - start a new thread in the calling process
* the thread starts at the function pointer, and may be executed before `pthread_create` returns or `pthread_exit`
*/
pthread_self ()
pthread_join (pthread_t* thread, void** retval)
/* ^^^^^^^^^^^^^^^^^ a pthread library struct, holding thread info
* ^^^^^^^^^^^^^ a pointer to a pointer to a return value
*
* wait for a thread to terminate (maybe)
*/
pthread_exit (void* retval)
/* ^^^^^^^^^^^^ a pointer to a return value
*
* pthread_exit - terminates the calling thread, and returns a value
*/
return 0;
}
gcc -lpthread -g -Wall -O0 thread.c -o thread
/* thread_no_args.c */
#include <pthread.h> /* pthread_create, pthread_join, pthread_self, pthread_t */
#include <stdio.h> /* printf */
void* func (void* arg) {
printf("Hello from thread %lx\n", pthread_self());
return NULL;
}
int main (int argc, char** argv) {
pthread_t t1, t2, t3;
printf("main thread %lx starting a new thread\n", pthread_self());
pthread_create(&t1, NULL, func, NULL);
pthread_create(&t2, NULL, func, NULL);
pthread_create(&t3, NULL, func, NULL);
pthread_join(t1, NULL);
pthread_join(t2, NULL);
pthread_join(t3, NULL);
return 0;
}
/* thread_1_arg.c */
#include <pthread.h> /* pthread_create, pthread_join, pthread_self, pthread_t */
#include <stdio.h> /* printf */
void* func (void* arg) {
char* s = (char *) arg;
printf("Hello from thread %s\n", s);
return NULL;
}
int main (int argc, char** argv) {
pthread_t t1, t2, t3;
pthread_create(&t1, NULL, func, "a");
pthread_create(&t2, NULL, func, "b");
pthread_create(&t3, NULL, func, "c");
pthread_join(t1, NULL);
pthread_join(t2, NULL);
pthread_join(t3, NULL);
return 0;
}
/* thread_multiple_args.c
*
* this is, effectively, a procedure call - why?
*/
#include <pthread.h> /* pthread_create, pthread_join, pthread_self, pthread_t */
#include <stdio.h> /* printf */
typedef struct {
int num;
const char* str;
} foo_t;
void* func (void* arg) {
foo_t* val = arg;
printf("thread %lx was passed values %d, %s\n", pthread_self(), val->num, val->str);
return NULL;
}
int main (int argc, char** argv) {
foo_t v = {5678, "bar"};
pthread_t t;
printf("main thread %lx starting a new thread\n", pthread_self());
pthread_create(&t, NULL, func, &v);
pthread_join(t, NULL);
return 0;
}
/* thread_returning_values.c */
#include <ctype.h> /* toupper */
#include <pthread.h> /* pthread_create, pthread_join, pthread_self, pthread_t */
#include <stdio.h> /* printf */
#include <stdlib.h> /* malloc */
#include <string.h> /* strcpy, strlen */
typedef struct {
int num;
char* str;
} foo_t;
void* func (void* arg) {
foo_t* a = arg;
foo_t* b = malloc(sizeof(foo_t));
b->num = a->num * 2;
b->str = malloc(strlen(a->str) + 1);
strcpy(b->str, a->str);
for (char* p = b->str; *p; ++p) {
*p = toupper(*p);
}
return b;
}
int main (int argc, char** argv) {
foo_t v = {5678, "bar"};
foo_t* p;
pthread_t t;
printf("main thread %lx starting a new thread\n", pthread_self());
pthread_create(&t, NULL, func, &v);
pthread_join(t, (void**) &p);
printf("thread returned num = %d, str = %s\n", p->num, p->str);
return 0;
}
/* thread_returning_values_bad.c */
#include <pthread.h> /* pthread_create, pthread_join, pthread_self, pthread_t */
#include <stdio.h> /* printf */
typedef struct {
int num;
const char* str;
} foo_t;
void* func (void* arg) {
foo_t p;
// fill p
return &p; // <-- segfaults, bc returns a pointer to a stack-allocated local variable
}
int main (int argc, char** argv) {
foo_t v = {5678, "bar"};
foo_t* p;
pthread_t t;
printf("main thread %lx starting a new thread\n", pthread_self());
pthread_create(&t, NULL, func, &v);
pthread_join(t, (void**) &p);
printf("thread returned num = %d, str = %s\n", p->num, p->str);
return 0;
}
$ ./thread_returning_values_bad
main thread 7fab984ec0 starting a new thread
Segmentation fault
Race Conditions and Mutual Exclusion#
A race condition occurs when the result of a program depends on the interleaving of the execution of multiple threads that are accessing a critical section.
A critical section is a piece of code that accesses a shared piece of data, and which must not be executed by more than one thread concurrently.
To avoid race conditions, it must be ensured that only a single thread can execute a critical section at a given time. Mutual exclusion primitives would guaranteee that only one thread is executing the critical section while others are prevented from doing so.
atomics
locks
/* threads_no_mutex.c */
#include <pthread.h> /* pthread_create, pthread_join, pthread_self, pthread_t */
#include <stdio.h> /* printf */
static int counter = 0;
void* func (void* arg) {
for (int i = 0; i < 5000; ++i) {
++counter;
}
return NULL;
}
int main (int argc, char** argv) {
pthread_t t1, t2;
printf("counter = %d\n", counter);
pthread_create(&t1, NULL, func, NULL);
pthread_create(&t2, NULL, func, NULL);
pthread_join(t1, NULL);
pthread_join(t2, NULL);
printf("both threads completed, counter = %d\n", counter);
return 0;
}
If more than one thread executes the following instructions, the program produces nondeterministic output (due to uncontrollable OS scheduling)
mov 0x2e50(%rip),%eax # 4014 <counter>
add 0x1,%eax
mov %eax,0x2e47(%rip) # 4015 <counter>
one possible interleaving:
(values after the instruction)
OS T1 T2 program_counter eax counter
(before critical section) 100 0 50
mov 8049a1c,%eax 105 50 50 <-- copy the value in variable `counter` (50) to register `eax`
add $0x1,%eax 108 51 50 <-- add 1 to the value in register `eax` (51 = 1 + 50)
interrupt
save T1
restore T2 100 0 50
mov 8049a1c,%eax 105 50 50 <-- copy the value in variable `counter` (50) to register `eax`
add $0x1,%eax 108 51 50 <-- add 1 to the value in register `eax` (51 = 1 + 50)
mov %eax,8049a1c 113 51 51 <-- copy the value in register `eax` (51) to variable `counter` (which is currently 50)
interrupt
save T2
restore T1 108 51 51
mov %eax,8049a1c 113 51 51 <-- copy the value in register `eax` (51) to variable `counter` (which is already 51)
atomics#
/* threads_mutex_via_atomics.c */
#include <pthread.h> /* pthread_create, pthread_join, pthread_self, pthread_t */
#include <stdatomic.h> /* atomic_int - modifying a variable results in a single CPU instruction */
#include <stdio.h> /* printf */
static atomic_int counter = 0;
void* func (void* arg) {
for (int i = 0; i < 5000; ++i) {
++counter;
}
return NULL;
}
int main (int argc, char** argv) {
pthread_t t1, t2;
printf("counter = %d\n", counter);
pthread_create(&t1, NULL, func, NULL);
pthread_create(&t2, NULL, func, NULL);
pthread_join(t1, NULL);
pthread_join(t2, NULL);
printf("both threads completed, counter = %d\n", counter);
return 0;
}
Locks#
/* threads_mutex_via_locks.c */
#include <pthread.h> /* pthread_create, pthread_join, PTHREAD_MUTEX_INITIALIZER, pthread_mutex_lock, pthread_mutex_t, pthread_mutex_unlock, pthread_self, pthread_t */
#include <stdio.h> /* printf */
static int counter = 0;
pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
void* func (void* arg) {
for (int i = 0; i < 5000; ++i) {
pthread_mutex_lock(&lock); //
++counter; // <-- critical section
pthread_mutex_unlock(&lock); //
}
return NULL;
}
int main (int argc, char** argv) {
pthread_t t1, t2;
printf("counter = %d\n", counter);
pthread_create(&t1, NULL, func, NULL);
pthread_create(&t2, NULL, func, NULL);
pthread_join(t1, NULL);
pthread_join(t2, NULL);
printf("both threads completed, counter = %d\n", counter);
return 0;
}
Resources#
Tools and Technologies
Courses
Brown’s Fundamentals of Computer Systems
[ h ] C Programming for Everybody
Online
YouTube#
Chris Kanich
[ y ]
12-02-2020
“what’s the difference between processes, threads, and io multiplexing?”[ y ]
11-17-2020
“The Linux socket API explained”.[ y ]
11-03-2020
“understanding mmap, the workhorse behind keeping memory access efficient in linux”.[ y ]
10-08-2020
“What’s behind a file descriptor in Linux? Also, i/o redirection with dup2.”.[ y ]
10-08-2020
“Interacting with files in Linux”.[ y ]
09-30-2020
“Signal handling in Linux”.[ y ]
09-23-2020
“Moving beyond fork() for process creation in Linux”.[ y ]
09-16-2020
“Introduction to Processes in Linux”.[ y ]
09-02-2020
“Linux Executable Symbol Relocation Explained”.[ y ]
09-01-2020
“How do linkers resolve symbols? Systems Programming CS Lecture”.
freeCodeCamp
[ y ]
05-29-2024
“Learn C Programming and OOP with Dr. Chuck [feat. classic book by Kernighan and Ritchie]”.[ y ]
08-18-2022
“Dr. Chuck reads C Programming (the classic book by Kernigan and Ritchie)”.[ y ]
01-27-2021
“Data Structures - Full Course Using C and C++”.[ y ]
08-15-2018
“C Programming Tutorial for Beginners”.
Jacob Sorber
[ y ]
06-03-2023
. “How to get an IP address from a host name? (Example in C)”.[ y ]
02-28-2023
. “Sockets and Pipes Look Like Files (Unix/fdopen)”.[ y ]
05-31-2022
. “Detect Corruption with a Checksum”.[ y ]
07-27-2021
. “Highlighting a Student Programming Project (Code Review)”.[ y ]
06-01-2021
. “Why are Progress Bars Always Wrong? Progress Bar Example in C.”.[ y ]
11-04-2020
. “Easy Networking in C (libcurl)”.[ y ]
02-18-2020
. “How one thread listens to many sockets with select in C.”.[ y ]
10-24-2019
. “How to write a multithreaded webserver using condition variables (Part 3)”.[ y ]
10-08-2019
. “Multithreaded Server Part 2: Thread Pools”.[ y ]
09-20-2019
. “How to write a multithreaded server in C (threads, sockets)”.[ y ]
01-30-2019
. “Dealing with Endianness Issues in your Programs”.[ y ]
12-04-2018
. “Socket servers can get client addresses. (accept, inet_ntop)”.[ y ]
11-20-2018
. “Program your own web server in C. (sockets)”.[ y ]
10-12-2018
. “How to build a web client? (sockets)”.
More
[ y ]
05-29-2024
Cococry. “Make a GUI Task Management App in pure C (no bloat required)”.[ y ]
04-11-2024
kimylamp. “Signals. I spent 2 years to understand this part.”.[ y ]
10-17-2024
Core Dumped. “How a Single Bit Inside Your Processor Shields Your Operating System’s Integrity”.
[ h ][ w ] Berkeley Packet Filter (BPF)
[ y ]
02-23-2023
freeCodeCamp. “Introduction to Linux – Full Course for Beginners”. YouTube.
[ y ]
11-25-2023
Nir Lichtman. “Making Minimalist Web Server in C on Linux”.[ y ]
03-15-2023
Tsoding Daily. “C is Just Better than Bash”.[ y ]
05-28-2023
NeuralNine. “5 Awesome Linux Terminal Tools You Must Know”.[ y ]
07-05-2024
typecraft. “This may be my favorite CLI tool ever”.[ y ]
09-26-2024
Dave’s Garage. “Kernel Mode vs User Mode: Why it Matters, What You Need to Know”.[ y ]
01-24-2017
_Drunk Engineer_. “OSI and TCP IP Models - Best Explanation”.
https://cs.brown.edu/courses/csci0300/2022/notes/l09.html
https://stackoverflow.com/questions/3952123/representation-of-negative-numbers-in-c
https://guyonbits.com/from-rodata-to-rwdata-introduction-to-memory-mapping-and-ld-scripts/
https://stackoverflow.com/questions/12102332/when-should-i-use-perror-and-fprintfstderr
https://www.cs.yale.edu/homes/aspnes/pinewiki/C(2f)Pointers.html
https://stackoverflow.com/questions/7672560/reading-in-a-variable-length-string-user-input-in-c
https://www.quora.com/Why-does-char-0-successfully-convert-a-char-to-int-in-C
Texts#
C
2019
Amini, Kamran. Extreme C: Taking you to the limit in Concurrency, OOP, and the most advanced capabilities of C. Packt.[ h ]
2015
Bryant, Randal E. & David R. O’Hallaron. Computer Systems: A Programmer’s Perspective. 3e.[ h ][ g ]
2022
Deitel, Paul & Harvey Deitel. C How to Program. 9e.2019
Gustedt, Jens. Modern C. Manning.1996
Hanson, David R. C Interfaces and Implementations: Techniques for Creating Reusable Software. Addison-Wesley Professional.2020
Harwani, B. M. Practical C Programming: Solutions for modern C developers to create efficient and well-structured programs. Packt.[ w ]
1988
Kernighan, Brian W. & Dennis M. Ritchie. C Programming Language. 2e. Pearson.2014
Klemens, Ben. 21st Century C: C Tips from the New School. 2e. O’Reilly.1999
Loudon, Kyle. Mastering Algorithms with C: Useful Techniques from Sorting to Encryption. O’Reilly.2021
Loy, Marc. Smaller C: Learn Code for Small Machines. O’Reilly.2022
Oualline, Stephen. Bare Metal C: Embedded Programming for the Real World. No Starch Press.2022
Preschern, Christopher. Fluent C: Principles, Practices, and Patters. O’Reilly.2013
Reese, Richard M. Understanding and Using C Pointers: Core Techniques for Memory Management. O’Reilly.2023
Sandler, Nora. Writing a C Compiler: Build a Real Programming Language from Scratch. No Starch Press.2020
Seacord, Robert C. Effective C: An Introduction to Professional C Programming. No Starch Press.1997
Sedgewick, Robert. “Algorithms in C, Parts 1-4: Fundamentals, Data Structures, Sorting, Searching”. 3e. Addison-Wesley Professional.2001
Sedgewick, Robert. “Algorithms in C, Part 5: Graph Algorithms”. 3e. Addison-Wesley Professional.[ h ][ w ]
2013
Stevens, W. Richard & Stephen A. Rago. Advanced Programming in the UNIX Environment. 3e. Addison-Wesley Professional.2020
Szuhay, Jeff. Learn C Programming: A beginner’s guide to learning C programming the easy and disciplined way. Packt.1989
Tenenbaum, Aaron M.; Yedidyah Langsam; & Moshe J. Augenstein. “Data Structures Using C”. Prentice Hall.1994
Van Der Linden, Peter. Expert C Programming: Deep C Secrets. Pearson.2019
Van Winkle, Lewis. Hands-On Network Programming with C: Learn socket programming in C and write secure and optimized network code. Packt.
Systems
[ h ] Arpaci-Dusseau, Remzi h. & Andrea C. Arpaci-Dusseau. Operating Systems: Three Easy Pieces.
Gregg, Brendan. (2020). Systems Performance: Enterprise and the Cloud. 2nd Ed. Pearson.
Matthews, Suzanne J.; Tia Newhall, & Kevin C. Webb. (2022). Dive Into Systems: A Gentle Introduction to Computer Systems. No Starch Press.
Linux
[ h ][ g ] Albing, Carl & JP Vossen. (2022). bash Idioms: Write Powerful, FLexible, Readable Shell Scripts. O’Reilly.
[ h ][ g ] Albing, Carl & JP Vossen. (2017). bash Cookbook: Solutions and Example for bash Users. O’Reilly.
[ h ] Arpaci-Dusseau, Remzi H. & Andrea C. Arpaci-Dusseau. Operating Systems: Three Easy Pieces.
Barrett, Daniel J. (2024). Linux Pocket Guide: Essential Commands. 4e. O’Reilly.
[ h ][ g ] Barrett, Daniel J. (2022). Efficient Linux at the Command Line: Boost Your Command-Line Skills. O’Reilly.
[ h ] Bryant, Randal E. & David R. O’Hallaron. (2015). Computer Systems: A Programmer’s Perspective. 3e. Pearson.
Calavera, David & Lorenzo Fontana. (2019). Linux Observability with BPF: Advanced Programming for Performance Analysis and Networking. O’Reilly.
[ h ] Cooper, Mendel. (2014). Advanced Bash-Scripting Guide: An in-depth exploration of the art of shell scripting. The Linux Documentation Project.
[ h ] Garrels, Machtelt. (2008). Bash Guide for Beginners. The Linux Documentation Project.
Gregg, Brendan. (2019). BPF Performance Tools: Linux System and Application Observability. Addison-Wesley Professional.
[ h ][ g ] Hausenblas, Michael. (2022). Learning Modern Linux: A Handbook for the Cloud Native Practitioner. O’Reilly.
[ h ][ g ] Hess, Kenneth. (2023). Practical Linux System Administration: A Guide to Installation, Configuration, and Management. O’Reilly.
[ w ] Kernighan, Brian W. & Rob Pike. (1984). The Unix Programming Environment.
[ h ] Kerrisk, Michael. (2010). The Linux Programming Interface: A Linux and UNIX System Programming Handbook. No Starch Press.
[ h ] Matthews, Suzanne J.; Tia Newhall; & Kevin C. Webb. (2022). Dive Into Systems: A Gentle Introduction to Computer Systems. No Starch Press.
Messier, Ric. (2024). Learning Kali Linux: Security Testing, Penetration Testing, & Ethical Hacking. 2e. O’Reilly.
Nemeth, Evi et al. (2021). Unix and Linux System Administration Handbook. 5e. Pearson.
Nikkel, Bruce. (2021). Practical Linux Forensics: A Guide for Digital Investigators. No Starch Press.
[ h ] OccupyTheWeb. (2018). Linux Basics for Hackers: Getting Started with Networking, Scripting, and Security in Kali. No Starch Press.
[ w ] Raymond, Eric S. (2003). The Art of Unix Programming. Addison-Wesley Professional.
Robbins, Arnold. (2016). Bash Pocket Reference: Help for Power Users & Sys Admins. O’Reilly.
Schroder, Carla. (2021). Linux Cookbook: Essential Skills for Linux Users and System & Network Administrators. 2e. O’Reilly.
Shotts, William E. (2019). The Linux Command Line: A Complete Introduction. 2e. No Starch Press.
[ h ][ w ] Stevens, W. Richard & Stephen A. Rago. (2013). Advanced Programming in the UNIX Environment. 3e. Addison-Wesley Professional.
[ w ] Tanenbaum, Andrew S. Modern Operating Systems. 5e.
[ w ] Tanenbaum, Andrew S. Operating Systems: Design and Implementation. Pearson.
[ h ][ g ] Troncone, Paul & Carl Albing. (2019). Cybersecurity Ops with bash: Attack, Defend, and Analyze from the Command Line. O’Reilly.
Ward, Brian. (2021). How Linux Works: What Every Superuser Should Know. 3e. No Starch Press.
Weiss, Steward. (2025). Introduction to System Programming in Linux. No Starch Press.
2003
[ g ] Stevens, W. Richard; Bill Fenner; & Andrew M. Rudoff. The Sockets Networking API: UNIX Network Programming Vol. I. 3e. Addison-Wesley Professional.
2011
Fall, Kevin R. & W. Richard Stevens. TCP/IP Illustrated, Volume 1: The Protocols. 2e. Addison-Wesley Professional.
1995
Wright, Gary R. & W. Richard Stevens. TCP/IP Illustrated, Volume 2: The Implementation. Addison-Wesley Professional.
2014
Comer, Douglas. Internetworking with TCP/IP, Volume 1: Principles, Protocol, and Architecture. 6e. Pearson.
Networking
Chou, Eric. (2020). Mastering Python Networking. 3rd Ed. Packt.
[ g ][ g ] Collins, Michael. (2017). Network Security Through Data Analysis: From Data to Action. 2nd Ed. O’Reilly.
Fall, Kevin R. & W. Richard Stevens. (2011). TCP/IP Illustrated Vol I: The Protocols. 2nd Ed. Addison-Wesley Professional.
Fettig, Abe & Jessica McKellar. (2013). Twisted Network Programming Essentials: Event-Driven Network Programming with Python. 2nd Ed. O’Reilly.
Gilman, Evan & Doug Barth. (2017). Zero Trust Networks: Building Secure Systems in Untrusted Networks. O’Reilly.
Keshav, Srinivasan. (2012). Mathematical Foundations of Computer Networking. Addison-Wesley Professional.
Kozierok, Charles M. (2005). TCP/IP Guide: A Comprehensive, Illustrated Internet Protocols Reference. No Starch Press.
[ h ] Kurose, James & Keith Ross. (2016). Computer Networking: A Top-Down Approach. 8th Ed. Pearson.
Liu, Cricket & Paul Albitz. DNS and Bind: Help for System Administrators. 5th Ed. O’Reilly.
Nadeau, Thomas D. & Ken Gray. (2013). SDN Software Defined Networks: An Authoritative Review of Network Programmability Technologies. O’Reilly.
[ g ] Oswalt et al. (2023). Network Programmability and Automation: Skills for the Next-Generation Network Engineer. 2nd Ed. O’Reilly.
[ h ][ d ][ g ] Peterson, Larry & Bruce Davie. (2021). Computer Networks: A Systems Approach. The Morgan Kaufmann Series in Networking.
[ h ][ d ][ g ] Peterson, Larry et al. Software-Defined Networks: A Systems Approach.
Sanders, Chris. (2017). Practical Packet Analysis: Using Wireshark to Solve Real-World Network Problems. 3rd Ed. No Starch Press.
Tanenbaum, Andrew S.; Nick Feamster; & David Wehterall. (2021). Computer Networks. 6th Ed. Pearson.
White, Russ & Ethan Banks. (2017). Computer Networking Problems and Solutions: An Innovative Approach to Building Resilient, Modern Networks. Pearson.
Profiling and Performance
2020
Gregg, Brendan. Systems Performance: Enterprise and the Cloud. Addison-Wesley Professional.
[ h ][ pdf ] Arndt, Jörg. Matters Computational: Ideas, Algorithms, and Source Code.
Figures#
[ w ]
1937-2019
Cohen, Danny
Terms#
[ w ] Address Space
[ w ] ANSI C
[ w ] Bounds Checking
[ w ] BSS (Block Starting Symbol) Segment
[ w ] C11
[ w ] C17
[ w ] C23
[ w ] C99
[ w ] C Data Types
[ w ] C File I/O
[ w ] C POSIX Library
[ w ] C Preprocessor
[ w ] C
[ w ] C Standard Library
[ w ] C Syntax
[ w ] Call Stack
[ w ] Checksum
[ w ] Code/Text Segment
[ w ] Column-Major Order
[ w ] Data Segment
[ w ] Device Driver
[ w ] Directive
[ w ] Include Directive
[ w ] Instruction Scheduling
[ w ] Interrupt
[ w ] Interrupt Handler
[ w ] Linker
[ w ] Loader
[ w ] Make
[ w ] Memory Address
[ w ] Memory Management
[ w ] Object File
[ w ] Pointer
[ w ] Pragma
[ w ]
printf
[ w ] Reference
[ w ] Relocation
[ w ] Row-Major Order
[ w ]
scanf
[ w ] Shared Library
[ w ]
sizeof
[ w ] Stack
[ w ] Stack-Based Memory Allocation
[ w ]
struct
[ w ] Systems Programming
[ w ]
typedef
[ w ] Union
[ w ] 64-bit computing
[ w ] Addressing Mode
[ w ] Berkeley Sockets
[ w ] Berkeley Software Distribution, history
[ w ] C Data Types
[ w ] C Standard Library
[ w ] Instruction Set Architecture
[ w ] Loader
[ w ] Network Socket
[ w ] 32-bit Computing
[ w ] Berkeley Sockets
[ w ] Berkeley Software Distribution (BSD)
[ w ] Bit Numbering
[ w ] Bitwise Operation
[ w ] Byte
[ w ] Byte Addressing
[ w ] Endianness
[ w ] File Descriptor
[ w ] Handle
[ w ] Inter Process Communication (IPC)
[ w ] Network Socket
[ w ] POSIX
[ w ] Standard Stream
[ w ] Unix Domain Socket
[ w ] Unix Philosophy
[ w ] Word
[ w ] Word Addressing
[ w ] Word Mark
Memory
[ w ] Address Space
[ w ] Bank Switching
[ w ] Cache
[ w ] CPU Cache
[ w ] Flat Memory Model
[ w ] Memory Address
[ w ] Memory Paging
Memory Segmentation
C Standard Library
[ w ]
errno.h
Programming
[ w ] Automatic Variable
[ w ] Continuation
[ w ] Continuation-Passing Style
[ w ] Control Flow
[ w ] Global Variable
[ w ] Local Variable
[ w ] Return Statement
[ w ] Re-entrancy
[ w ] Scope
[ w ] Static Variable
[ w ] Variable
Architecture
[ w ] 1-bit Computing
[ w ] 8-bit Computing
[ w ] 16-bit Computing
[ w ] 8008 (8-bit microprocessor)
[ w ] 8080 (8-bit microprocessor)
[ w ] 8086 (16-bit microprocessor)
[ w ] 8088
[ w ] 80186
[ w ] 80286 (16-bit microprocessor)
[ w ] 80386 (i386) (32-bit microprocessor)
[ w ] 80486 (i486) (32-bit microprocessor)
[ w ] Asynchronous Circuit
[ w ] Bus Interface Unit (BIU)
[ w ] Cache Prefetching
[ w ] Clock Generator
[ w ] Clock Signal
[ w ] Combinatorial Logic
[ w ] Complex Instruction Set Computer (CISC)
[ w ] Duty Cycle
[ w ] Execution Unit (EU)
[ w ] Flip-Flop
[ w ] Instruction Set Architecture (ISA)
[ w ] Instructions Per Second (IPS)
[ w ] Logic Gate
[ w ] Opcode
[ w ] Prefetch Input Queue (PIQ)
[ w ] Race Condition
[ w ] Reduced Instruction Set Computer (RISC)
[ w ] Sequential Logic
[ w ] Synchronous Circuit
[ w ] x86
[ w ] x86-64 (x64)
[ w ] x86 Memory Segmentation
[ w ] 32-bit x86
64-bit x64