Data Processing

Creating a SparkSession Object

1
2
3
4
from pyspark.sql import SparkSession
spark=SparkSession.builder.appName('data_processing').getOrCreate()
import pyspark.sql.functions as F
from pyspark.sql.types import *

Creating Dataframes

1
2
3
4
5
6
7
8
9
10
11
12
13
schema=StructType() \
.add("user_id","string") \
.add("country","string") \
.add("browser", "string") \
.add("OS",'string') \
.add("age", "integer")
df=spark.createDataFrame([
("A203",'India',"Chrome","WIN",33),
("A201",'China',"Safari","MacOS",35),
("A205",'UK',"Mozilla","Linux",25)
],schema=schema)
df.printSchema()
df.show()

Null Values

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
df_na=spark.createDataFrame([
("A203",None,"Chrome","WIN",33),
("A201",'China',None,"MacOS",35),
("A205",'UK',"Mozilla","Linux",25)
],schema=schema)
df_na.show()
df_na.fillna('0').show()
df_na.fillna( { 'country':'USA', 'browser':'Safari' } ).show()
df_na.na.drop().show() # dropna row
df_na.na.drop(subset='country').show() # dropna row for column
df_na.replace("Chrome","Google Chrome").show()
df_na.drop('user_id').show() # drop column

df=spark.read.csv("customer_data.csv",header=True,inferSchema=True)
df.count()
len(df.columns)
df.show(3)
df.summary().show() # describe

Subset of a Dataframe

1
2
3
4
5
6
7
df.select(['Customer_subtype','Avg_Salary']).show()

df.filter(df['Avg_Salary'] > 1000000).count()
df.filter(df['Avg_Salary'] > 1000000).show()
df.filter(df['Avg_Salary'] > 500000).filter(df['Number_of_houses'] > 2).show()

df.where((df['Avg_Salary'] > 500000) & (df['Number_of_houses'] > 2)).show()

Aggregations

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
df.groupBy('Customer_subtype').count().show()

for col in df.columns:
if col !='Avg_Salary':
print(f" Aggregation for {col}")
df.groupBy(col).count().orderBy('count',ascending=False).show(truncate=False)

df.groupBy('Customer_main_type').agg(F.mean('Avg_Salary')).show()
df.groupBy('Customer_main_type').agg(F.max('Avg_Salary')).show()
df.groupBy('Customer_main_type').agg(F.min('Avg_Salary')).show()
df.groupBy('Customer_main_type').agg(F.sum('Avg_Salary')).show()

df.sort("Avg_Salary", ascending=False).show()

df.groupBy('Customer_subtype') \
.agg(F.avg('Avg_Salary') \
.alias('mean_salary')) \
.orderBy('mean_salary',ascending=False) \
.show(50,False)
df.groupBy('Customer_subtype') \
.agg(F.max('Avg_Salary') \
.alias('max_salary')) \
.orderBy('max_salary',ascending=False) \
.show()

Collect

1
2
3
4
5
6
7
8
9
df.groupby("Customer_subtype") \
.agg(F.collect_set("Number_of_houses")) \
.show()
df.groupby("Customer_subtype") \
.agg(F.collect_list("Number_of_houses")) \
.show()

df=df.withColumn('constant',F.lit('finance'))
df.select('Customer_subtype','constant').show()

User-Defined Functions (UDFs)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from pyspark.sql.functions import udf
df.groupby("Avg_age").count().show()

def age_category(age):
if age == "20-30 years":
return "Young"
elif age== "30-40 years":
return "Mid Aged"
elif ((age== "40-50 years") or (age== "50-60 years")) :
return "Old"
else:
return "Very Old"
age_udf=udf(age_category,StringType())
df=df.withColumn('age_category',age_udf(df['Avg_age']))
df.select('Avg_age','age_category').show()
df.groupby("age_category").count().show()

df.select('Avg_Salary').summary().show()
min_sal=1361
max_sal=48919896
from pyspark.sql.functions import pandas_udf, PandasUDFType
def scaled_salary(salary):
scaled_sal=(salary-min_sal)/(max_sal-min_sal)
return scaled_sal
scaling_udf = pandas_udf(scaled_salary, DoubleType())
df.withColumn("scaled_salary",scaling_udf(df['Avg_Salary'])) \
.show(10,False)

Joins

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
region_data = spark.createDataFrame([
('Family with grown ups','PN'),
('Driven Growers','GJ'),
('Conservative families','DD'),
('Cruising Seniors','DL'),
('Average Family ','MN'),
('Living well','KA'),
('Successful hedonists','JH'),
('Retired and Religious','AX'),
('Career Loners','HY'),('Farmers','JH')
], schema=StructType() \
.add("Customer_main_type","string") \
.add("Region Code","string"))
new_df=df.join(region_data,on='Customer_main_type')
new_df.groupby("Region Code").count().show()

Pivoting

1
2
3
4
5
6
7
8
9
10
df.groupBy('Customer_main_type') \
.pivot('Avg_age') \
.sum('Avg_Salary') \
.fillna(0) \
.show()
df.groupBy('Customer_main_type') \
.pivot('label') \
.sum('Avg_Salary') \
.fillna(0) \
.show()

Window Functions or Windowed Aggregates

1
2
3
4
5
6
7
8
9
from pyspark.sql.window import Window
from pyspark.sql.functions import col,row_number
win = Window.orderBy(df['Avg_Salary'].desc())
df=df.withColumn('rank', row_number().over(win).alias('rank')).show()

win_1=Window.partitionBy("Customer_subtype").orderBy(df['Avg_Salary'].desc())
df=df.withColumn('rank', row_number().over(win_1).alias('rank'))
df.groupBy('rank').count().orderBy('rank').show()
df.filter(col('rank') < 4).show()

Cmake

Cmake Cookbook

Chapter 1: From a Simple Executable to Libraries

1. Compiling a single source file into an executable

CmakeLists.txt

1
2
3
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
project(recipe-01 LANGUAGES CXX) # default language is c++
add_executable(hello-world hello-world.cpp)

Normal steps:

1
2
3
4
$ mkdir -p build
$ cd build
$ cmake .. # ../CmakeLists.txt exsits
$ cmake --build .bash

Single command build:

1
$ cmake -H. -Bbuild

Build outside:

1
2
3
4
$ mkdir -p /tmp/someplace
$ cd /tmp/someplace
$ cmake /path/to/source
$ cmake --build .

available targets:

1
2
3
4
5
6
7
8
9
10
11
12
$ cmake --build . -t help # --target

The following are some of the valid targets for this Makefile:
... all (the default if no target is provided)
... clean
... depend
... edit_cache
... rebuild_cache
... hello-world
... hello-world.o
... hello-world.i
... hello-world.s

2. Switching generators

Choose Ninja:

1
2
3
4
$ mkdir -p build
$ cd build
$ cmake -G Ninja .. # must install Ninja first
$ cmake --build .

Single command:

1
cmake -H. -Bbuild -GNinja

3. Building and linking static and shared libraries

CmakeLists.txt

1
2
3
4
5
6
7
8
9
10
11
12
13
14
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)

project(recipe-03 LANGUAGES CXX)

# generate a library from sources
add_library(message
STATIC
Message.hpp
Message.cpp
)

add_executable(hello-world hello-world.cpp)

target_link_libraries(hello-world message)
  • add_library(message STATIC Message.hpp Message.cpp): add_library’s first param is target name, available for entire CMakeLists.txt to be refered. Actually name will be prefixed with lib and suffixed withbash proper extension by cmake, determined by (STATIC or SHARED) and OS
  • target_link_libraries(hello-world message): link library to executable and make sure hello-world depends correctly on library. Must after add_library.

After build, we get libmessage.a together with executable hello-world.

second parameter of add_library:

  • STATIC:用于创建静态库,即编译文件的打包存档,以便在链接其他目标时使用,例如:可执行文件。
  • SHARED:用于创建动态库,即可以动态链接,并在运行时加载的库。可以在 CMakeLists.txt 中使用add_library(message SHAREDbash Message.hpp Message.cpp)从静态库切换到动态共享对象(DSO)。
  • OBJECT:可将给定 add_library 的列表中的源码编译到目标文件,不将它们归档到静态库中,也不能将它们链接到共享对象中。如果需要一次性创建静态库和动态库,那么使用对象库尤其有用。我们将在本示例中演示。
  • MODULE:又为 DSO 组。与 SHARED 库不同,它们不链接到项目中的任何目标,不过可以进行动态加载。该参数可以用于构建运行时插件。

other special type of library:

Make dynamic linking, optional to set name to message-shared and use set_target_properties so #include "message.h" is available in source code:

CmakeLists.txt

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)

project(recipe-03 LANGUAGES CXX)

add_library(message-shared
SHARED
Message.hpp
Message.cpp
)
set_target_properties(message-shared
PROPERTIES
OUTPUT_NAME "message"
)

add_executable(hello-world hello-world.cpp)

target_link_libraries(hello-world message-shared)

4. Controlling compilation with conditionals

CmakeLists.txt

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# set minimum cmake version
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)

# project name and language
project(recipe-04 LANGUAGES CXX)

# introduce a toggle for using a library
set(USE_LIBRARY OFF)

message(STATUS "Compile sources into a library? ${USE_LIBRARY}")

# BUILD_SHARED_LIBS is a global flag offered by CMake
# to toggle the behavior of add_library
set(BUILD_SHARED_LIBS OFF)

# list sources
list(APPEND _sources Message.hpp Message.cpp)

if(USE_LIBRARY)
# add_library will create a static library
# since BUILD_SHARED_LIBS is OFF
add_library(message ${_sources})

add_executable(hello-world hello-world.cpp)

target_link_libraries(hello-world message)
else()
add_executable(hello-world hello-world.cpp ${_sources})
endif()

5. Presenting options to the user

CmakeLists.txt

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# set minimum cmake version
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)

# project name and language
project(recipe-05 LANGUAGES CXX)

# expose options to the user
option(USE_LIBRARY "Compile sources into a library" OFF)

message(STATUS "Compile sources into a library? ${USE_LIBRARY}")

include(CMakeDependentOption)

# second option depends on the value of the first
cmake_dependent_option(
MAKE_STATIC_LIBRARY "Compile sources into a static library" OFF
"USE_LIBRARY" ON
)

# third option depends on the value of the first
cmake_dependent_option(
MAKE_SHARED_LIBRARY "Compile sources into a shared library" ON
"USE_LIBRARY" ON
)

set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)

# list sources
list(APPEND _sources Message.hpp Message.cpp)

if(USE_LIBRARY)
message(STATUS "Compile sources into a STATIC library? ${MAKE_STATIC_LIBRARY}")
message(STATUS "Compile sources into a SHARED library? ${MAKE_SHARED_LIBRARY}")

if(MAKE_SHARED_LIBRARY)
add_library(message SHARED ${_sources})

add_executable(hello-world hello-world.cpp)

target_link_libraries(hello-world message)
endif()

if(MAKE_STATIC_LIBRARY)
add_library(message STATIC ${_sources})

add_executable(hello-world hello-world.cpp)

target_link_libraries(hello-world message)
endif()
else()
add_executable(hello-world hello-world.cpp ${_sources})
endif()

to toggle options:

1
$ cmake -D USE_LIBRARY=OFF -D MAKE_SHARED_LIBRARY=ON ..

6. Specifying the compiler

prefered:

1
$ cmake -D CMAKE_CXX_COMPILER=clang++ ..

not prefered:

1
$ env CXX=clang++ cmake ..

CMakeLists.txt

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# set minimum cmake version
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)

# project name and language
project(recipe-06 LANGUAGES C CXX)

message(STATUS "Is the C++ compiler loaded? ${CMAKE_CXX_COMPILER_LOADED}")
if(CMAKE_CXX_COMPILER_LOADED)
message(STATUS "The C++ compiler ID is: ${CMAKE_CXX_COMPILER_ID}")
message(STATUS "Is the C++ from GNU? ${CMAKE_COMPILER_IS_GNUCXX}")
message(STATUS "The C++ compiler version is: ${CMAKE_CXX_COMPILER_VERSION}")
endif()

message(STATUS "Is the C compiler loaded? ${CMAKE_C_COMPILER_LOADED}")
if(CMAKE_C_COMPILER_LOADED)
message(STATUS "The C compiler ID is: ${CMAKE_C_COMPILER_ID}")
message(STATUS "Is the C from GNU? ${CMAKE_COMPILER_IS_GNUCC}")
message(STATUS "The C compiler version is: ${CMAKE_C_COMPILER_VERSION}")
endif()

7. Switching the build type

  • Debug:用于在没有优化的情况下,使用带有调试符号构建库或可执行文件。
  • Release:用于构建的优化的库或可执行文件,不包含调试符号。
  • RelWithDebInfo:用于构建较少的优化库或可执行文件,包含调试符号。
  • MinSizeRel:用于不增加目标代码大小的优化方式,来构建库或可执行文件。

CMakeLists.txt

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# set minimum cmake version
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)

# project name and language
project(recipe-07 LANGUAGES C CXX)

# we default to Release build type
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
endif()

message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")

message(STATUS "C flags, Debug configuration: ${CMAKE_C_FLAGS_DEBUG}")
message(STATUS "C flags, Release configuration: ${CMAKE_C_FLAGS_RELEASE}")
message(STATUS "C flags, Release configuration with Debug info: ${CMAKE_C_FLAGS_RELWITHDEBINFO}")
message(STATUS "C flags, minimal Release configuration: ${CMAKE_C_FLAGS_MINSIZEREL}")

message(STATUS "C++ flags, Debug configuration: ${CMAKE_CXX_FLAGS_DEBUG}")
message(STATUS "C++ flags, Release configuration: ${CMAKE_CXX_FLAGS_RELEASE}")
message(STATUS "C++ flags, Release configuration with Debug info: ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
message(STATUS "C++ flags, minimal Release configuration: ${CMAKE_CXX_FLAGS_MINSIZEREL}")

to switch build type:

1
$ cmake -D CMAKE_BUILD_TYPE=Debug ..

8. Controlling compiler flags

CMakeLists.txt

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# set minimum cmake version
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)

# project name and language
project(recipe-08 LANGUAGES CXX)

message("C++ compiler flags: ${CMAKE_CXX_FLAGS}")

list(APPEND flags "-fPIC" "-Wall")
if(NOT WIN32)
list(APPEND flags "-Wextra" "-Wpedantic")
endif()

add_library(geometry
STATIC
geometry_circle.cpp
geometry_circle.hpp
geometry_polygon.cpp
geometry_polygon.hpp
geometry_rhombus.cpp
geometry_rhombus.hpp
geometry_square.cpp
geometry_square.hpp
)

target_compile_options(geometry
PRIVATE
${flags}
)

add_executable(compute-areas compute-areas.cpp)

target_compile_options(compute-areas
PRIVATE
"-fPIC"
)

target_link_libraries(compute-areas geometry)

9. Setting the standard for the language

CMakeLists.txt

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# set minimum cmake version
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)

# project name and language
project(recipe-09 LANGUAGES CXX)

set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)

add_library(animals
SHARED
Animal.cpp
Animal.hpp
Cat.cpp
Cat.hpp
Dog.cpp
Dog.hpp
Factory.hpp
)

set_target_properties(animals
PROPERTIES
CXX_STANDARD 14
CXX_EXTENSIONS OFF
CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE 1
)

add_executable(animal-farm animal-farm.cpp)

set_target_properties(animal-farm
PROPERTIES
CXX_STANDARD 14
CXX_EXTENSIONS OFF
CXX_STANDARD_REQUIRED ON
)

target_link_libraries(animal-farm animals)

10. Using control flow constructs

CMakeLists.txt

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# set minimum cmake version
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)

# project name and language
project(recipe-10 LANGUAGES CXX)

add_library(geometry
STATIC
geometry_circle.cpp
geometry_circle.hpp
geometry_polygon.cpp
geometry_polygon.hpp
geometry_rhombus.cpp
geometry_rhombus.hpp
geometry_square.cpp
geometry_square.hpp
)

# we wish to compile the library with the optimization flag: -O3
target_compile_options(geometry
PRIVATE
-O3
)

list(
APPEND sources_with_lower_optimization
geometry_circle.cpp
geometry_rhombus.cpp
)

# we use the IN LISTS foreach syntax to set source properties
message(STATUS "Setting source properties using IN LISTS syntax:")
foreach(_source IN LISTS sources_with_lower_optimization)
set_source_files_properties(${_source} PROPERTIES COMPILE_FLAGS -O2)
message(STATUS "Appending -O2 flag for ${_source}")
endforeach()

# we demonstrate the plain foreach syntax to query source properties
# which requires to expand the contents of the variable
message(STATUS "Querying sources properties using plain syntax:")
foreach(_source ${sources_with_lower_optimization})
get_source_file_property(_flags ${_source} COMPILE_FLAGS)
message(STATUS "Source ${_source} has the following extra COMPILE_FLAGS: ${_flags}")
endforeach()

add_executable(compute-areas compute-areas.cpp)

target_link_libraries(compute-areas geometry)

Variable

Stylistic Conventions

  • Names of variables are in lower case.

  • Word separation can be indicated by underscores (‘_’), but use of underscores is discouraged unless the name would be hard to read otherwise.

  • Names of Types and Modules begin with a capital letter and word separation is shown with upper camel case instead of underscores.

  • Names of functions and macros are in lower case, without underscores.

  • Functions that write to their arguments have names that end in !. These are sometimes called “mutating” or “in-place” functions because they are intended to produce changes in their arguments after the function is called, not just return a value.

Package

Add package

1
import Pkg; Pkg.add("Date")

Use package

1
2
3
4
5
using Dates
isleapyear(4)

import Dates
Dates.isleapyear(4)

Template literals (String literals)

add variables at runtime

1
2
let msg = `some ${variable1}, 
${"$" + variable2}`;

use tag function to add bold style to variables in template literals

1
2
3
4
5
6
7
8
9
10
11
function highlighText(strings, ...values) {
let str = "";
for (var i = 0; i < strings.raw.lengths; i++) {
if (i > 0) {
str += `<b>${values[i - 1]}</b>`;
}
str += strings.raw[i];
}
}

let msg = highlighText`some ${variable1}, ${"$" + variable2}`;

var, let and const

var:

  • no block scope
  • can be redeclared anywhere
  • can be used and reassigned anywhere

let:

  • block scope
  • can’t be redeclared within scope
  • can be reassigned within scope

const:

  • block scope
  • can’t be reassigned or redeclared
  • the value can be changed

const is more used as readability purpose

1
2
3
4
const arr = [3, 4, 5];
arr = 3; // error
arr[0] = 22; // okay!
var arr = Object.freeze([3, 4, 5]); // instead

Destructing an array or object

array:

  • don’t have to catch all values in the array
  • variable is undefined if arr is not enough to unpack
  • can set default value
1
var [a, b = true, c, ...moreArgs] = arr;

object:

1
var { Id, ApplicantName = "Barry" } = obj;

String

1
2
3
4
5
6
str.trim(); // trim white space
str.toLowerCase();
str.startWith("dr");
str.endWith("md", 4);
str.search("house");
str.includes("house");

Number

1
2
3
Number.isInteger(num); // 25.0 is integer
Number.MAX_SAFE_INTEGER; // Number.MIN_SAFE_INTEGER
Number.isSafeInteger(num);

Symbol

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
var id = Symbol("My Id");
var id2 = Symbol("My Id");
console.log(id === id2); // false

var id = Symbol.for("My Id");
var id2 = Symbol.for("My Id");
console.log(id === id2); // true

// create secret property
var loan = {
name: "Barry",
[Symbol("income")]: 15000
};
console.log(load[Symbol.for("income")]); // 15000
console.log(Object.getOwnPropertyNames(loan)); // ["name"]
console.log(Object.getOwnPropertySymbol(loan)); // [Symbol(income)]

OSI Model

OSI_model

Protocols

Data Transfer

http

File Transfer

ftp

Email

email

Authentication

auth

Network Service

dhcp

Domain Name System(DNS)

dns

1
2
3
$ nslookup google.com # check google's ip
$ nslookup
> facebook.com
1
2
$ nslookup
> server 8.8.8.8 # config dns server

Network Time Protocol(NTP)

ntp

Network Management

ssh_telnet

ssh: encryted; telnet: clear text

ssh used encrypt ftp

snmp

Walk the tree: server collect information(statistics, log) from client

Trap: client send SNMP trap to server

Remote Desktop Protocol(RDP)

rdp

Audio/Visual Protocol

h323

sip

session initiation protocol: voice over ip communication

TCP and UDP

TCP: transmission control protocol

UDP: user datagram protocol

TCP

reliable, verifiable(sequence numbers / acknowledge numbers), notion of session

The 3-way handshake

3way

  1. SYN: send syn msg, wait for reply from server(change state to SYN-RECEIVED)
  2. SYN-ACK: send msg to client
  3. ACK: client respond to server

then session establish between client and server by layer 4 protocol

client or server can ask for missing / additional information from each other

then use layer 7 protocol

The 4-way Disconnect

4way

  1. FIN: server to client
  2. FIN-ACK: client to server
  3. FIN: client to server
  4. FIN-ACK

shutdown the session

RST: tcp reset, server to client, to shutdown quickly

UDP

no 3-way handshake, no reliable communication, no sequence numbers / acknowledge numbers

very efficient for small data transfer (e.x. DNS)

udp

Port numbers(Transport layer addressing)

port

Source port and Destination port

src_dstnt

Application layer portocol dependency

protocol_dependency_1

protocol_dependency_2

IP Addressing

  • unicast: class A, B, C(public internet), one device to one device

  • multicast: class D(enterprise org’s live video streamming), one device to many devices

  • experimental: class E

class A

class_a

class B

class_b

class C

class_c

class D

class_d

Address types

address_types

Private ip address

127.0.0.1: loopback address, localhost

private_ip

Kubernetes Architecture

k8s_architecture

Master Node

master_node

control plane responsible for managing the state of a Kubernetes cluster(brain)

users send requests to the master node via a Command Line Interface (CLI) tool, a Web User-Interface (Web UI) Dashboard, or Application Programming Interface (API)

master node replicas are added to the cluster, configured in High-Availability (HA) mode to ensure the control plane’s fault tolerance. While only one of the master node replicas actively manages the cluster, the control plane components stay in sync across the master node replicas

4 components:

  1. API server
  2. Scheduler
  3. Controller managers
  4. etcd

API Server

a central control plane component, coordinating all the administrative tasks

reads the Kubernetes cluster’s current state from the etcd, and after a call’s execution, saves the resulting state of the Kubernetes cluster in etcd(only master plane component talks to etcd)

designed to scale horizontally: it scales by deploying more instances to balance traffic between those instances

highly configurable and customizable, supports the addition of custom API servers

Scheduler

assign new objects, such as pods, to nodes, based on current Kubernetes cluster state and new object’s requirements

takes into account: individual and collective resource requirements, hardware/software/policy constraints, affinity and anti-affinity specifications, data locality, inter-workload interference, and deadlines.

highly configurable and customizable, supports additional custom schedulers (specify the name of the custom scheduler in object 's configuration data)

Controller Managers

run controllers to regulate the state of the Kubernetes cluster

controllers are watch-loops continuously running and comparing the cluster’s desired state (provided by objects’ configuration data) with its current state (obtained from etcd data store via the API server), includes:

  • Node controller: Responsible for noticing and responding when nodes go down.
  • Replication controller: Responsible for maintaining the correct number of pods for every replication controller object in the system.
  • Endpoints controller: Populates the Endpoints object (that is, joins Services & Pods).
  • Service Account & Token controllers: Create default accounts and API access tokens for new namespaces.

corrective action is taken in the cluster until its current state matches the desired state.

  • kube-controller-manager: runs controllers responsible to act when nodes become unavailable, to ensure pod counts are as expected, to create endpoints, service accounts, and API access tokens.

  • cloud-controller-manager: runs controllers responsible to interact with the underlying infrastructure of a cloud provider when nodes become unavailable, to manage storage volumes when provided by a cloud service, and to manage load balancing and routing.

    • Node controller: For checking the cloud provider to determine if a node has been deleted in the cloud after it stops responding
    • Route controller: For setting up routes in the underlying cloud infrastructure
    • Service controller: For creating, updating and deleting cloud provider load balancers

etcd

a distributed key-value store which holds cluster state related data, to persist the Kubernetes cluster’s state

either configured on the master node (stacked) or on its dedicated host (external)

  • when stacked: HA master node replicas ensure etcd resiliency.
  • when external: etcd hosts have to be separately replicated for HA mode configuration.

based on Raft Consensus Algorithm, written in Golang, besides storing the cluster state, also used to store configuration details such as subnets, ConfigMaps, Secrets, etc.

Worker Node

worker_node

provides a running environment for client applications, which are encapsulated in Pods, controlled by the cluster control plane agents running on the master node

Pods are scheduled on worker nodes, where they find required compute, memory and storage resources to run, and networking to talk to each other and the outside world.

A Pod is the smallest scheduling unit in Kubernetes, a logical collection of one or more containers scheduled together.

to access the applications from the external world, we connect to worker nodes and not to the master node.

4 parts:

  1. Container runtime
  2. kubelet
  3. kube-proxy
  4. Addons for DNS, Dashboard, cluster-level monitoring and logging.

Container Runtime

responsible for running containers, e.x. Docker, containerd, CRI-O

Kubelet

an agent running on each node, communicates with the control plane components from the master node, makes sure containers are running in a Pod.

receives Pod definitions, primarily from the API server, and interacts with the container runtime on the node to run containers associated with the Pod.

monitors the health of the Pod’s running container

connects to the container runtime using Container Runtime Interface (CRI). which consists of protocol buffers, gRPC API, and libraries.

CRI

CRI implements two services:

  • ImageService: responsible for all the image-related operations
  • RuntimeService: responsible for all the Pod and container-related operations.

some examples of CRI shims:

  • dockershim: with dockershim, containers are created using Docker installed on the worker nodes. Internally, Docker uses containerd to create and manage containers.

dockershim

  • cri-containerd: with cri-containerd, we can directly use Docker’s smaller offspring containerd to create and manage containers.

cri-containerd

  • cri-o: cri-o enables using any Open Container Initiative (OCI) compatible runtimes with Kubernetes. At the time this course was created, CRI-O supported runC and Clear Containers as container runtimes. However, in principle, any OCI-compliant runtime can be plugged-in.

cri-o

Kube-proxy

network agent which runs on each node, responsible for dynamic updates and maintenance of all networking rules on the node, abstracts the details of Pods networking and forwards connection requests to Pods.

implements part of the Kubernetes Service concept.

maintains network rules on nodes. These network rules allow network communication to your Pods from network sessions inside or outside of your cluster.

uses the operating system packet filtering layer if there is one and it’s available. Otherwise, forwards the traffic itself.

Addons

use Kubernetes resources (DaemonSet, Deployment, etc) to implement cluster features and functionality not yet available in Kubernetes, therefore implemented through 3rd-party pods and services.

  • DNS: cluster DNS is a DNS server for Kubernetes services, required to assign DNS records to Kubernetes objects and resources
  • Web UI(Dashboard): a general purposed web-based user interface for cluster management
  • Container Resource Monitoring: records generic time-series metrics about containers in a central database, and provides a UI for browsing that data.
  • Cluster-level Logging: responsible for saving container logs to a central log store with search/browsing interface.

Networking Challenges

Container-to-Container Communication Inside Pods

When a Pod is started, a network namespace is created inside the Pod, and all containers running inside the Pod will share that network namespace so that they can talk to each other via localhost.

Pod-to-Pod Communication Across Nodes

Kubernetes network model “IP-per-Pod”

containers are integrated with the overall Kubernetes networking model through the use of the Container Network Interface (CNI) supported by CNI plugins.

Pod-to-External World Communication

by services, complex constructs which encapsulate networking rules definitions on cluster nodes. By exposing services to the external world with kube-proxy, applications become accessible from outside the cluster over a virtual IP.

Installing Kubernetes

Kubernetes Configuration

four major installation types:

  • All-in-One Single-Node Installation:

In this setup, all the master and worker components are installed and running on a single-node. While it is useful for learning, development, and testing, it should not be used in production. Minikube is one such example, and we are going to explore it in future chapters.

  • Single-Node etcd, Single-Master and Multi-Worker Installation:

In this setup, we have a single-master node, which also runs a single-node etcd instance. Multiple worker nodes are connected to the master node.

  • Single-Node etcd, Multi-Master and Multi-Worker Installation:

In this setup, we have multiple-master nodes configured in HA mode, but we have a single-node etcd instance. Multiple worker nodes are connected to the master nodes.

  • Multi-Node etcd, Multi-Master and Multi-Worker Installation:

In this mode, etcd is configured in clustered HA mode, the master nodes are all configured in HA mode, connecting to multiple worker nodes. This is the most advanced and recommended production setup.

Localhost Installation

localhost installation options available to deploy single- or multi-node Kubernetes clusters on our workstation/laptop:

  • Minikube: single-node local Kubernetes cluster
  • Docker Desktop: single-node local Kubernetes cluster for Windows and Mac
  • CDK on LXD: multi-node local cluster with LXD containers.

On-Premise Installation

Kubernetes can be installed on-premise on VMs and bare metal.

  • On-Premise VMs:

Kubernetes can be installed on VMs created via Vagrant, VMware vSphere, KVM, or another Configuration Management (CM) tool in conjunction with a hypervisor software. There are different tools available to automate the installation, such as Ansible or kubeadm.

  • On-Premise Bare Metal:

Kubernetes can be installed on on-premise bare metal, on top of different operating systems, like RHEL, CoreOS, CentOS, Fedora, Ubuntu, etc. Most of the tools used to install Kubernetes on VMs can be used with bare metal installations as well.

Cloud Installation

Kubernetes can be installed and managed on almost any cloud environment:

  • Hosted Solutions:

With Hosted Solutions, any given software is completely managed by the provider. The user pays hosting and management charges. Some of the vendors providing hosted solutions for Kubernetes are:

Google Kubernetes Engine (GKE)
Azure Kubernetes Service (AKS)
Amazon Elastic Container Service for Kubernetes (EKS)
DigitalOcean Kubernetes
OpenShift Dedicated
Platform9
IBM Cloud Kubernetes Service.

  • Turnkey Cloud Solutions:

Below are only a few of the Turnkey Cloud Solutions, to install Kubernetes with just a few commands on an underlying IaaS platform, such as:

Google Compute Engine (GCE)
Amazon AWS (AWS EC2)
Microsoft Azure (AKS).

  • Turnkey On-Premise Solutions:

The On-Premise Solutions install Kubernetes on secure internal private clouds with just a few commands:

GKE On-Prem by Google Cloud
IBM Cloud Private
OpenShift Container Platform by Red Hat.

Kubernetes Installation Tools/Resources

It is worth checking out the Kubernetes The Hard Way GitHub project

some useful tools/resources available:

  • kubeadm:

kubeadm is a first-class citizen on the Kubernetes ecosystem. It is a secure and recommended way to bootstrap a single- or multi-node Kubernetes cluster. It has a set of building blocks to setup the cluster, but it is easily extendable to add more features. Please note that kubeadm does not support the provisioning of hosts.

  • kubespray

With kubespray (formerly known as kargo), we can install Highly Available Kubernetes clusters on AWS, GCE, Azure, OpenStack, or bare metal. Kubespray is based on Ansible, and is available on most Linux distributions. It is a Kubernetes Incubator project.

  • kops

With kops, we can create, destroy, upgrade, and maintain production-grade, highly-available Kubernetes clusters from the command line. It can provision the machines as well. Currently, AWS is officially supported. Support for GCE is in beta, and VMware vSphere in alpha stage, and other platforms are planned for the future. Explore the kops project for more details.

  • kube-aws

With kube-aws we can create, upgrade and destroy Kubernetes clusters on AWS from the command line. Kube-aws is also a Kubernetes Incubator project.

Minikube

A Local Single-Node Kubernetes Cluster

Docker Certified Associate Exam Preparation Guide (v1.3)

Domain 1: Orchestration (25% of exam)

Complete the setup of a swarm mode cluster, with managers and worker nodes

1
2
3
$ docker swarm init
$ docker swarm join-token manager / $ docker swarm join-token worker
$ docker swarm join --token <token> <ip>:<port>

Describe and demonstrate how to extend the instructions to run individual containers into running services under swarm

1
2
$ docker service create --replicas 1 --name <service> <image> <cmd>
$ docker service ls

Describe the importance of quorum in a swarm cluster.

  • Raft Consensus Algorithm to make sure all the manager nodes in charge of managing and scheduling tasks in the cluster, are storing the same consistent state.
  • Having the same consistent state across the cluster means that in case of a failure, any Manager node can pick up the tasks and restore the services to a stable state.
  • Raft tolerates up to (N-1)/2 failures and requires a majority or quorum of (N/2)+1 members to agree on values proposed to the cluster.

Describe the difference between running a container and running a service.

Service -> tasks - containers

Interpret the output of “docker inspect” commands

1
$ docker service|container|network|... inpsect --pretty <name>

Convert an application deployment into a stack file using a YAML compose file with “docker stack deploy”

stack file is a YAML defining services, networks and volumes.

Manipulate a running stack of services

docker service scale; docker service update; update config file, re-deploy by: docker stack deploy -c <stackfile> <stack>

Describe and demonstrate orchestration activities

Tools to manage, scale, and maintain containerized applications are called orchestrators (e.x. Kubernetes and Docker Swarm)

Increase number of replicas

1
$ docker service scale <service>=20

Add networks, publish ports

1
2
$ docker network create -d <driver> <network>
$ docker container create --network <network> -p 8080:8080 <container>

Mount volumes

to attach volume to a container, either by --mount or -v:

1
2
$ docker run -d --name <container> --mount source=<volume>,target=/vol <image>
$ docker run -d --name <container> -v <volume>:/vol <image>

Describe and demonstrate how to run replicated and global services

1
2
$ docker service create --replicas 5 <service> # default replicated mode
$ docker service create --mode global <service> # global mode

Apply node labels to demonstrate placement of tasks

Describe and demonstrate how to use templates with “docker service create”

only --hostname,--mount,--env support templates, e.x.:

1
2
3
4
$ docker service create \
--name hosttempl \
--hostname="{{.Node.Hostname}}-{{.Node.ID}}-{{.Service.Name}}" \ # default container ID
busybox top

Identify the steps needed to troubleshoot a service not deploying

1
2
3
4
5
6
$ docker service ls
$ docker service ps <service>
$ docker service inspect <service>
$ docker inspect <task>
$ docker inspect <container>
$ docker logs <container>

Describe how a Dockerized application communicates with legacy systems

use a bridge, an overlay, a macvlan network, or a custom network plugin.

Describe how to deploy containerized workloads as Kubernetes pods and deployments

1
2
3
4
5
6
$ kubectl apply -f bb.yaml
$ kubectl get pods
$ kubectl get deploy
$ kubectl edit deploy/mysite # edit existing objects, automatic update
$ kubectl scale --replicas=2 deploy/mysite # rescale
$ kubectl delete -f bb.yaml / kubectl delete deploy mysite

Describe how to provide configuration to Kubernetes pods using configMaps and secrets

  1. Secret:

create the YAML file for the Secret, save as mysql-secret.yaml:

1
2
3
4
5
6
7
apiVersion: v1
kind: Secret
metadata:
name: mariadb-root-password
type: Opaque
data:
password: S3ViZXJuZXRlc1JvY2tzIQ==

create the Secret in Kubernetes:

1
$ kubectl apply -f mysql-secret.yaml

view the newly created Secret:

1
$ kubectl describe secret mariadb-root-password

view and edit the Secret:

1
$ kubectl edit secret <secretname>

could also create the secret by:

1
2
3
$ kubectl create secret generic mariadb-user-creds \
--from-literal=MYSQL_USER=kubeuser\
--from-literal=MYSQL_PASSWORD=kube-still-rocks

validate secrets were created and stored correctly:

1
2
3
4
5
6
7
# Get the username
$ kubectl get secret mariadb-user-creds -o jsonpath='{.data.MYSQL_USER}' | base64 --decode -
kubeuser

# Get the password
$ kubectl get secret mariadb-user-creds -o jsonpath='{.data.MYSQL_PASSWORD}' | base64 --decode -
kube-still-rocks
  1. ConfigMap

create a file named max_allowed_packet.cnf:

1
2
[mysqld]
max_allowed_packet = 64M

create configmap by:

1
2
3
$ kubectl create configmap mariadb-config --from-file=max_allowed_packet.cnf # could add multiple --from-file=<filename>
$ kubectl create configmap mariadb-config --from-file=max-packet=max_allowed_packet.cnf # set max-packet as key rather than the file name
configmap/mariadb-config created

Firstvalidate that the ConfigMap was created:

1
$ kubectl get configmap mariadb-config

viewed with the kubectl describe command:

1
$ kubectl describe cm mariadb-config

edit configmap mariadb-config’s value:(in development)

1
$ kubectl edit configmap mariadb-config
  1. Using Secrets and ConfigMaps

Create a file named mariadb-deployment.yaml:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: mariadb
name: mariadb-deployment
spec:
replicas: 1
selector:
matchLabels:
app: mariadb
template:
metadata:
labels:
app: mariadb
spec:
containers:
- name: mariadb
image: docker.io/mariadb:10.4
# env:
# - name: MYSQL_ROOT_PASSWORD
# valueFrom:
# secretKeyRef:
# name: mariadb-root-password
# key: password
# envFrom:
# - secretRef:
# name: mariadb-user-creds
ports:
- containerPort: 3306
protocol: TCP
volumeMounts:
- mountPath: /var/lib/mysql
name: mariadb-volume-1
# - mountPath: /etc/mysql/conf.d
# name: mariadb-config-volume
volumes:
- emptyDir: {}
name: mariadb-volume-1
# - configMap:
# name: mariadb-config
# items:
# - key: max_allowed_packet.cnf
# path: max_allowed_packet.cnf
# name: mariadb-config-volume

add the Secrets to the Deployment as environment variables, same for ConfigMaps by using configMapRef instead of secretKeyRef:

1
2
3
4
5
6
env:
- name: MYSQL_ROOT_PASSWORD
valueFrom:
secretKeyRef:
name: mariadb-root-password
key: password

or using envFrom:

1
2
3
envFrom:
- secretRef:
name: mariadb-user-creds

create a new MariaDB instance from the YAML file:

1
2
$ kubectl create -f mariadb-deployment.yaml
deployment.apps/mariadb-deployment created

view the running MariaDB pod:

1
$ kubectl get pods

kubectl exec to the Pod, validate Secrets and ConfigMaps are in use:

1
2
3
4
$ kubectl exec -it mariadb-deployment-5465c6655c-7jfqm env |grep MYSQL
MYSQL_PASSWORD=kube-still-rocks
MYSQL_USER=kubeuser
MYSQL_ROOT_PASSWORD=KubernetesRocks!

check max_allowed_packet.cnf file was created in /etc/mysql/conf.d:

1
2
3
4
5
6
$ kubectl exec -it mariadb-deployment-5465c6655c-7jfqm ls /etc/mysql/conf.d
max_allowed_packet.cnf

$ kubectl exec -it mariadb-deployment-5465c6655c-7jfqm cat /etc/mysql/conf.d/max_allowed_packet.cnf
[mysqld]
max_allowed_packet = 32M

Domain 2: Image Creation, Management, and Registry (20% of exam)

Describe the use of Dockerfile

Describe options, such as add, copy, volumes, expose, entry point

Identify and display the main parts of a Dockerfile

Describe and demonstrate how to create an efficient image via a Dockerfile

Describe and demonstrate how to use CLI commands to manage images, such as list, delete, prune, rmi

Describe and demonstrate how to inspect images and report specific attributes using filter and format

filter:

1
2
3
4
5
6
7
$ docker images --filter "dangling=true"
$ docker images --filter "label=com.example.version"
$ docker images --filter "label=com.example.version=1.0"
$ docker images --filter "before=image1"
$ docker images --filter "since=image3"
$ docker images --filter=reference='busy*:*libc'
$ docker images --filter=reference='busy*:uclibc' --filter=reference='busy*:glibc'

format:

1
2
$ docker images --format "{{.ID}}: {{.Repository}}"
$ docker images --format "table {{.ID}}\t{{.Repository}}\t{{.Tag}}"

Describe and demonstrate how to tag an image.

1
$ docker tag <image-name/image-id> fedora/httpd:version1.0 

Describe and demonstrate how to apply a file to create a Docker image

Describe and demonstrate how to display layers of a Docker image

Describe and demonstrate how to modify an image to a single layer

Describe and demonstrate registry functions

Deploy a registry

deploy by:

1
2
3
4
5
6
7
$ docker run -d \ 
-e REGISTRY_HTTP_ADDR=0.0.0.0:5001 \
-p 5001:5001 \
--restart=always \
--name registry \
-v /mnt/registry:/var/lib/registry \
registry:2

stop and remove by:

1
$ docker container stop registry && docker container rm -v registry

Log into a registry

1
$ docker login localhost:8080
1
$ cat ~/my_password.txt | docker login --username foo --password-stdin

Utilize search in a registry

1
2
3
4
$ docker search busybox
$ docker search --filter is-official=true --filter stars=3 busybox
$ docker search --format "{{.Name}}: {{.StarCount}}" nginx
$ docker search --format "table {{.Name}}\t{{.IsAutomated}}\t{{.IsOfficial}}" nginx

Push an image to a registry

commit a container to image:

1
$ docker commit c16378f943fe rhel-httpd

tag and push:

1
2
$ docker tag rhel-httpd registry-host:5000/myadmin/rhel-httpd
$ docker push registry-host:5000/myadmin/rhel-httpd

Sign an image in a registry

1
2
$ docker trust sign example/trust-demo:v2
$ docker trust inspect --pretty example/trust-demo

Pull and delete images from a registry

An image may be deleted from the registry via its “name” and “reference”:

1
DELETE /v2/<name>/manifests/<reference>

Domain 3: Installation and Configuration (15% of exam)

Describe sizing requirements for installation

the following minimum requirements for Docker UCP 2.2.4 on Linux:
• UCP Manager nodes running DTR: 8GB of RAM with 3GB of disk space
• UCP Worker nodes: 4GB of RAM with 3GB of free disk space
Recommended requirements are:
• UCP Manager nodes running DTR: 8GB RAM, 4 vCPUs, and 100GB disk space
• UCP Worker nodes: 4GB RAM 25-100GB of free disk space

Describe and demonstrate the setup of repo, selection of a storage driver, and installation of the Docker engine on multiple platforms

Describe and demonstrate configuration of logging drivers (splunk, journald, etc.)

Describe and demonstrate how to set up swarm, configure managers, add nodes, and setup the backup schedule

A Swarm backup is a copy of all the files in directory /var/lib/docker/swarm:

  1. Stop Docker on the Swarm manager node you are performing the backup from(not a good idea to perform the backup on the leader manager). This will stop all UCP containers on the node. If UCP is configured for HA, the other managers will make sure the control plane remains available.
1
$ service docker stop
  1. Backup the Swarm config, e.x.:
1
2
3
4
5
6
$ tar -czvf swarm.bkp /var/lib/docker/swarm/
tar: Removing leading `/' from member names
/var/lib/docker/swarm/
/var/lib/docker/swarm/docker-state.json
/var/lib/docker/swarm/state.json
<Snip>
  1. Verify that the backup file exists. rotate, and store the backup file off-site according to your corporate backup policies.
1
2
$ ls -l
-rw-r--r-- 1 root root 450727 Jan 29 14:06 swarm.bkp
  1. Restart Docker.
1
$ service docker restart

recover Swarm from a backup:

  1. stop docker:
1
$ service docker stop
  1. Delete any existing Swarm configuration:
1
$ rm -r /var/lib/docker/swarm
  1. Restore the Swarm configuration from backup:
1
$ tar -zxvf swarm.bkp -C /
  1. Initialize a new Swarm cluster:
1
2
$ docker swarm init --force-new-cluster
Swarm initialized: current node (jhsg...3l9h) is now a manager.
  1. check by:
1
2
$ docker network ls
$ docker service ls
  1. Add new manager and worker nodes to the Swarm, and take a fresh backup.

Describe and demonstrate how to create and manage user and teams

RBAC via grant:

  • Subject
  • Role
  • Collection

Describe and demonstrate how to configure the Docker daemon to start on boot

1
$ sudo systemctl enable docker

To disable this behavior, use disable instead:

1
$ sudo systemctl disable docker

Describe and demonstrate how to use certificate-based client-server authentication to ensure a Docker daemon has the rights to access images on a registry

Describe the use of namespaces, cgroups, and certificate configuration

Describe and interpret errors to troubleshoot installation issues without assistance

Describe and demonstrate the steps to deploy the Docker engine, UCP, and DTR on AWS and on-premises in an HA configuration. Docker, DTR, UCP,, Docker on AWS and possibly on premises HA config

  1. DTR:

If possible, you should run your DTR instances on dedicated nodes. You definitely
shouldn’t run user workloads on your production DTR nodes.

As with UCP, you should run an odd number of DTR instances. 3 or 5 is best for fault
tolerance. A recommended configuration for a production environment might be:

  • 3 dedicated UCP managers
  • 3 dedicated DTR instances
  • However many worker nodes your application requirements demand

Install DTR:

  1. Log on to the UCP web UI and click Admin > Admin Settings > Docker
    Trusted Registry.
  2. Fill out the DTR configuration form.
  • DTR EXTERNAL URL: Set this to the URL of your external load balancer.
  • UCP NODE: Select the name of the node you wish to install DTR on.
  • Disable TLS Verification For UCP: Check this box if you’re using
    self-signed certificates.
  1. Copy the long command at the bottom of the form.
  2. Paste the command into any UCP manager node.
    The command includes the --ucp-node flag telling UCP which node to
    perform the install on.
    The following is an example DTR install command that matches the configuration
    in Figure 16.10. It assumes that you already have a load balancer
    configured at dtr.mydns.com
1
2
3
4
5
$ docker run -it --rm docker/dtr install \
--dtr-external-url dtr.mydns.com \
--ucp-node dtr1 \
--ucp-url https://34.252.195.122 \
--ucp-username admin --ucp-insecure-tls
  1. Once the installation is complete, point your web browser to your load
    balancer. You will be automatically logged in to DTR.

Configure DTR for high availability:

  1. Log on to the DTR console and navigate to Settings.
  2. Select the Storage tab and configure the shared storage backend.

DTR is now configured with a shared storage backend and ready to have additional
replicas.

  1. Run the following command from a manager node in the UCP cluster.
1
2
3
4
5
$ docker run -it --rm \
docker/dtr:2.4.1 join \
--ucp-node dtr2 \
--existing-replica-id 47f20fb864cf \
--ucp-insecure-tls
  1. Enter the UCP URL and port, as well as admin credentials when prompted.

Describe and demonstrate how to configure backups for UCP and DTR

You can run the backup from any UCP manager node in the cluster, and you only
need to run the operation on one node (UCP replicates its configuration to all
manager nodes, so backing up from multiple nodes is not required).

Backing up UCP will stop all UCP containers on the manager that you’re executing
the operation on. With this in mind, you should be running a highly available UCP
cluster, and you should run the operation at a quiet time for the business.

backup UCP:

1
2
3
4
$ docker container run --log-driver none --rm -i --name ucp \
-v /var/run/docker.sock:/var/run/docker.sock \
docker/ucp:2.2.5 backup --interactive \
--passphrase "Password123" > ucp.bkp

recover UCP:

  1. Remove any existing, and potentially corrupted, UCP installations:
1
2
3
$ docker container run --rm -it --name ucp \
-v /var/run/docker.sock:/var/run/docker.sock \
docker/ucp:2.2.5 uninstall-ucp --interactive
  1. Restore UCP from the backup:
1
2
3
$ docker container run --rm -i --name ucp \
-v /var/run/docker.sock:/var/run/docker.sock \
docker/ucp:2.2.5 restore --passphrase "Password123" < ucp.bkp
  1. Log on to the UCP web UI and ensure that the user created earlier is still present
    (or any other UCP objects that previously existed in your environment).

Backup DTR:
As with UCP, DTR has a native backup command that is part of the Docker image
that was used to install the DTR. This native backup command will backup the DTR
configuration that is stored in a set of named volumes, and includes:

  • DTR configuration
  • Repository metadata
  • Notary data
  • Certificates

Images are not backed up as part of a native DTR backup. It is expected that
images are stored in a highly available storage backend that has its own independent
backup schedule using non-Docker tools.

Run the following command from a UCP manager node to perform a DTR backup:

1
2
3
4
5
6
7
$ read -sp 'ucp password: ' UCP_PASSWORD; \
docker run --log-driver none -i --rm \
--env UCP_PASSWORD=$UCP_PASSWORD \
docker/dtr:2.4.1 backup \
--ucp-insecure-tls \
--ucp-username admin \
> ucp.bkp

Recover DTR from backups:

Restoring DTR from backups should be a last resort, and only attempted when the
majority of replicas are down and the cluster cannot be recovered any other way.
If you have lost a single replica and the majority are still up, you should add a new
replica using the dtr join command.

restore from backup, the workflow is like this:

  1. Stop and delete DTR on the node (might already be stopped)
1
2
3
$ docker run -it --rm \
docker/dtr:2.4.1 destroy \
--ucp-insecure-tls
  1. Restore images to the shared storage backend (might not be required)
  2. Restore DTR
1
2
3
4
5
6
7
8
9
$ read -sp 'ucp password: ' UCP_PASSWORD; \
docker run -i --rm \
--env UCP_PASSWORD=$UCP_PASSWORD \
docker/dtr:2.4.1 restore \
--ucp-url <ENTER_YOUR_ucp-url> \
--ucp-node <ENTER_DTR_NODE_hostname> \
--ucp-insecure-tls \
--ucp-username admin \
< ucp.bkp

Domain 4: Networking (15% of exam)

Create a Docker bridge network for a developer to use for their containers

[Troubleshoot container and engine logs to understand a connectivity issue between

containers](https://success.docker.com/article/troubleshooting-container-networking)

Publish a port so that an application is accessible externally

Identify which IP and port a container is externally accessible on

Describe the different types and use cases for the built-in network drivers

[Understand the Container Network Model and how it interfaces with the Docker engine

and network and IPAM drivers](https://success.docker.com/article/networking/)

Configure Docker to use external DNS

[Use Docker to load balance HTTP/HTTPs traffic to an application (Configure L7 load

balancing with Docker EE)](https://docs.docker.com/datacenter/ucp/2.2/guides/admin/configure/use-a-load-balancer/#configuration-examples)

[Understand and describe the types of traffic that flow between the Docker engine,

registry, and UCP controllers](https://success.docker.com/article/networking/)

Deploy a service on a Docker overlay network

  • Describe the difference between “host” and “ingress” port publishing mode (Host, Ingress)

Domain 5: Security (15% of exam)

Describe the process of signing an image

Demonstrate that an image passes a security scan

Enable Docker Content Trust

Configure RBAC in UCP

Integrate UCP with LDAP/AD

Demonstrate creation of UCP client bundles

Describe default engine security

Describe swarm default security

Describe MTLS

Identity roles

Describe the difference between UCP workers and managers

Domain 6: Storage and Volumes (10% of exam)

State which graph driver should be used on which OS

Demonstrate how to configure devicemapper

[Compare object storage to block storage, and explain which one is preferable when

available](https://rancher.com/block-object-file-storage-containers/)

[Summarize how an application is composed of layers and where those layers reside on

the filesystem](https://docs.docker.com/storage/storagedriver/#images-and-layers)

Describe how volumes are used with Docker for persistent storage

Demonstrate how storage can be used across cluster nodes

Install Docker

On Debian

uninstall old versions:

1
$ sudo apt-get remove docker docker-engine docker.io containerd runc
1
2
3
4
5
6
7
8
$ sudo apt-get update
$ sudo apt-get install \
apt-transport-https \
ca-certificates \
curl \
gnupg-agent \
software-properties-common
$ curl -fsSL https://download.docker.com/linux/debian/gpg | sudo apt-key add -

Verfiy key with the fingerprint:

1
sudo apt-key fingerprint 0EBFCD88

For x86_64 / amd64 architecture:

1
2
3
4
$ sudo add-apt-repository \
"deb [arch=amd64] https://download.docker.com/linux/debian \
$(lsb_release -cs) \
stable"
1
2
$ sudo apt-get update
$ sudo apt-get install docker-ce docker-ce-cli containerd.io

Run hello world demo

1
$ sudo docker run hello-world

Avoid sudo when running docker:

1
$ sudo nano /etc/group

add username to docker:x:999:<username>, e.x. docker:x:999:admin

Or:

1
sudo usermod -a -G docker <username>

On Ubuntu by Shell Script

1
$ wget -qO- https://get.docker.com/ | sh

add user accout to local unix docker group, to avoid sudo:

1
sudo usermod -aG docker <username> # e.x. admin

Launching a Docker container

1
nano Dockerfile
1
2
3
4
5
6
7
8
9
10
##############################
# Dockerfile to create Ubuntu webserver
#
FROM ubuntu:18.04

RUN apt-get update
RUN apt-get install -y apache2
RUN echo "Welcome to my web site" > /var/www/html/index.html
EXPOSE 80
##############################
1
2
3
4
5
docker build -t "webserver" .
docker images
docker run -d -p 80:80 webserver /usr/sbin/apache2ctl -D FOREGROUND
docker ps
curl localhost

Docker Engine Architecture

Big Picture

docker_engine

Example of creating a container

create_container

“Daemon” can restart without affecting on containers, which means upgrading doesn’t kill containers, same for “containerd”. Can restart them, leaving all containers running, when come back, they re-discover running containers and reconnect to the shim.

Docker on Windows: Native and Hyper-V

windows_container

only low level difference, APIs for users are the same.
The idea is by VM isolation of Hyper-V(lightweight VM) might be better or more secure than Namespaces. Also, can run different OS in VM.


Docker Images

docker_image

Image is read-only template for creating application containers. Independent layer loosely connected by a manifest file(config file).

docker_image_container

Every container can also write by copying read-only layer(files in it) to its writable layer, and do changes there.

Pull a docker image

1
$ docker image pull redis

In fact, we are pulling layers:

1
2
3
4
5
6
7
8
9
10
11
Using default tag: latest
latest: Pulling from library/redis
afb6ec6fdc1c: Pull complete # layer
608641ee4c3f: Pull complete
668ab9e1f4bc: Pull complete
78a12698914e: Pull complete
d056855f4300: Pull complete
618fdf7d0dec: Pull complete
Digest: sha256:ec277acf143340fa338f0b1a9b2f23632335d2096940d8e754474e21476eae32
Status: Downloaded newer image for redis:latest
docker.io/library/redis:latest

Fat manifest is specified by OS architecture, to get image manifest:

image_manifest

Referencing by hash to avoid mismatch between the image asking for and the image got.

Using overlay2 as storage driver, those layers are stored at /var/lib/docker/overlay2. To see layers, run:

1
$ sudo ls -l /var/lib/docker/overlay2

to get content of layer, run:

1
$ sudo ls -l /var/lib/docker/overlay2/<sha256>

Layer structure, e.x.:

  1. Base layer (OS files and objects)
  2. App codes
  3. Updates …

-> a single unified file system.

Check images

1
2
3
4
5
6
7
$ docker image ls  / $ docker images
$ docker image ls --digests # get sha256
$ docker image ls --filter dangling=true # get <none>:<none>
$ docker image ls --filter=reference="*:latest"
$ docker image ls --format "{{.Size}}"
$ docker image ls --format "{{.Repository}}: {{.Tag}}: {{.Size}}"
$ docker image ls --format "{{json .}}" # print in json format

to see operation history of one image, run:

1
$ docker history redis

every non-zero size creates a new layer, the rests add something to image’s json config file.

to get configs and layers of one image, run:

1
$ docker image inspect redis

Delete images

1
2
3
4
$ docker image rm redis
$ docker rmi alpine
$ docker image prune # delete all dangle images
$ docker image prune -a # delete all unused images, not used by any container

Registries

Images live in registries. When docker image pull <some-image>, defaultly pulling from Docker Hub.

Official images live in the top level of Docker Hub namespaces, e.x. docker.io/redis, docker.io/nginx. Can ignore registry name “docker.io/” by default, then do repo name “redis”, then do tag name “latest”, which is an image actually. So the full version is `docker image pull docker.io/redis:4.0.1

Unofficial ones, e.x. nigelpoulton/tu-demo

To pull all tags of images from repo, run

1
$ docker image pull <some-image> -a

Content hashes for host, compressed hashes(distribution hashes) for wire, to verify. UIDs used for storing layers are random.

run sha256 on content of layer -> layer’s hash as ID;
run sha256 on image config file -> image’s hash as ID.


Containerizing

Dockerfile

Dockerfile is list of instructions for building images with an app inside(document the app).

Good practice: put Dockerfile in root folder of app.

Good practice: LABEL maintainer="xxx@gmail.com"

notes:

  • CAPITALIZE instructions
  • <INSTRUCTION> <value>, e.x. FROM alpine
  • FROM always first instruction, as base image
  • RUN execute command and create layer
  • COPY copy code into image as new layer
  • instructions like WORKDIR are adding metadata instead of layers
  • ENTRYPOINT default app for image/container, metadata
  • CMD run-time arguments override CMD instructions, append to ENTRYPOINT

e.x.:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
FROM alpine

LABEL maintainer="xyshell@bu.edu"

RUN apk add --update nodejs nodejs-npm

COPY . /src

WORKDIR /src

RUN npm install

EXPOSE 8080

ENTRYPOINT ["node", "./app.js"] # relative to WORKDIR

code: https://github.com/nigelpoulton/psweb

Build image

1
$ docker image build -t <image-name> . # current folder
1
$ docker image build -t psweb https://github.com/nigelpoulton/psweb.git # git repo

docker image ls to check it exists.

During each step, docker spins up temporary containers, once the following layer is built, the container is removed.

Run container

1
2
$ docker container run -d --name <app-name> -p 8080:8080 <image> # detach mode
$ docker run -dit --name alpine1 alpine ash # iterative and detach, can docker attach alpine1 later

Multi-stage Builds

use multiple FROM statements in Dockerfile.

Each FROM instruction can use a different base, and each of them begins a new stage of the build.

can selectively copy artifacts from one stage to another, leaving behind everything you don’t want in the final image.

  • FROM ... AS ...
  • COPY --from==...

example 1:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
FROM node:latest AS storefront
WORKDIR /usr/src/atsea/app/react-app
COPY react-app .
RUN npm install
RUN npm run build

FROM maven:latest AS appserver
WORKDIR /usr/src/atsea
COPY pom.xml .
RUN mvn -B -f pom.xml -s /usr/share/maven/ref/settings-docker.xml dependency:resolve
COPY . .
RUN mvn -B -s /usr/share/maven/ref/settings-docker.xml package -DskipTests

FROM java:8-jdk-alpine
RUN adduser -Dh /home/gordon gordon
WORKDIR /static
COPY --from=storefront /usr/src/atsea/app/react-app/build/ .
WORKDIR /app
COPY --from=appserver /usr/src/atsea/target/AtSea-0.0.1-SNAPSHOT.jar .
ENTRYPOINT ["java", "-jar", "/app/AtSea-0.0.1-SNAPSHOT.jar"]
CMD ["--spring.profiles.active=postgres"]

source: https://github.com/dockersamples/atsea-sample-shop-app/blob/master/app/Dockerfile

example 2:

1
2
3
4
5
6
7
8
9
10
11
FROM golang:1.7.3 AS builder
WORKDIR /go/src/github.com/alexellis/href-counter/
RUN go get -d -v golang.org/x/net/html
COPY app.go .
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o app .

FROM alpine:latest
RUN apk --no-cache add ca-certificates
WORKDIR /root/
COPY --from=builder /go/src/github.com/alexellis/href-counter/app .
CMD ["./app"]

source: https://docs.docker.com/develop/develop-images/multistage-build/#name-your-build-stages

When building image, don’t have to build the entire Dockerfile including every stage. Can specify a target build stage, and stop at that stage.

1
$ docker build --target builder -t alexellis2/href-counter:latest .

Docker containers

Most atomic unit in docker is container.

microservices instead of monolith, glue by APIs. Containers should be as small and simple as possible. Single process per container.

modernize traditional apps:

modernize_traditional_apps

container should be ephemeral and immutable.

Check status

1
2
3
$ docker ps / $ docker container ls # running containers
$ docker ps -a # all containers
$ docker port <container> # Port mapping e.x. 80/tcp -> 0.0.0.0:80

Run containers

1
2
3
$ docker container run ... / docker run ...
$ docker container run -it alpine sh # iterative terminal
$ docker container run -d alpine sleep 1d # detached mode, command

Stop containers

Stopping a container sends signal to main process in the container (PID1), gives 10s before force stop.

Stopping and restarting a container doesn’t destory data.

1
$ docker container stop <container>

Start containers

1
$ docker container start <container>

Enter containers

execing into a container starts a new process

1
2
3
$ docker container exec -it <container> sh
$ docker container exec <container> ls -l
$ docker container exec <container> cat <file-name>

exiting by exit kills the process, if it’s the only one, container exits. However, ctrl+P+Q gets out of container without terminating its main process (can docker attach to it).

Remove containers

1
2
$ docker container rm <container>
$ docker container rm $(docker container ls -aq) -f # remove all containers, force

Log

Engine/daemon logs and Container/App logs

1
$ docker logs <container>

logging

Swarm and Services

Swarm

swarm is a secure cluster of docker nodes, including “secure cluster” and “orchestrator”

can do native swarm work and kubernetes on swarm cluster

single-engine mode: install individual docker instances VS swarm mode: working on a cluster of docker instances.

1
$ docker system info #  Swarm: inactive/active

Single docker node to swarm mode

1
2
$ docker swarm init
$ docker swarm init --external-ca

if it’s the first manager of swarm, it’s automatically elected as its leader(root CA).

  • issue itself a client certificate.
  • Build a secure cluster store(ETD) and automatically distributed to every other manager in the swarm, encrypted.
  • default certificate rotation policy.
  • a set of cryptographic join tokens, one for joining new managers, another for joining new workers.

on manager node, query cluster store to check all nodes:

1
2
$ docker node ls # lists all nodes in the swarm
$ docker node ls --filter role=manager/worker

Join another manager and workers

1
$ docker swarm join

swarm

Every swarm has a single leader manager, the rest are follower managers.

Commands could be issued to any manager, hitting a follower manager will proxy commands to the leader.

If the leader fails, another one gets selected as a new leader.

Best practice: ideal number of managers is 3, 5, 7. Make sure its odd number, to increase chance of achieving quorum.

Connect managers by fast and reliable network. e.x. in AWS, put in same region, could cross availability zones.

Workers doesn’t join cluster store, which is just for managers.

Workers have a full list of IPs for all managers. If one manager dies, workders talk to another.

get join token:

1
2
3
4
$ docker swarm join-token manager
SWMTKN-1-36xjjuzeryn11xc2xtrnjjxy288aef43o2r8o8grrpela5gsq4-2pvht1x50o3s8hm5rcur5cizo 192.168.0.31:2377
$ docker swarm join-token worker
SWMTKN-1-36xjjuzeryn11xc2xtrnjjxy288aef43o2r8o8grrpela5gsq4-5rzpk82wtde7durxwiu1mmh14 192.168.0.31:2377

note:

  • SWMTKN: identifier
  • 36xjju: cluster identifier, hash of cluster certificate (same for same swarm cluster)
  • 2pvht1 or 5rzpk8: determines worker or manager (could change by rotation)

switch to another node:

1
$ docker swarm join --token ...

to rotate token(change password):

1
2
$ docker swarm join-token --rotate worker
$ docker swarm join-token --rotate manager

The existing managers and workers stay unaffected.

to get client certificates:

1
$ sudo openssl x509 -in /var/lib/docker/swarm/certificates/swarm-node.crt -text

in Subject field:

1
Subject: O = pn6210vdux6ppj3uef0kqn8cv, OU = swarm-manager, CN = dkyneha22mdz384n3b3mdvjk7

note:

  • O: organization, swarm ID
  • OU: organizational unit, node’s role(swarm-manager/workder)
  • CN: canonical name, cryptographic node ID

Remmove swarm node

1
2
3
$ docker node demote <NODE> # To demote the node
$ docker node rm <NODE> # To remove the node from the swarm
$ docker swarm leave

Autolock swarm

  • prevents restarted managers(not applied to workers) from automatically re-joining the swarm
  • pervents accidentally restoring old copies of the swarm
1
2
3
$ docker swarm init --autolock # autolock new swarm
$ docker swarm update --autolock=true # autolock existing swarm
SWMKEY-1-7e7w/gsGI2iGL9dqRtY/JqOOffnP5INPRw5uME2o+hM # jot down unlock key

Then if manager restarts by:

1
$ sudo service docker restart

Inspecting the cluster by docker node ls gives raft logs saying swarm is encrypted.

re-join the swarm by:

1
2
$ docker swarm unlock
Please enter unlock key: SWMKEY-1-7e7w/gsGI2iGL9dqRtY/JqOOffnP5INPRw5uME2o+hM

check again by docker node ls to confirm.

Update certificate expiry time

1
docker swarm update --cert-expiry 48h

check by docker system info:

1
2
CA Configuration:
Expiry Duration: 2 days

Update a node

Add label metadata to a node by:

1
2
$ docker node update --label-add foo worker1
$ docker node update --label-add foo --label-add bar worker1

then node labels used a placement constraint when creating service.

Orchestration intro

orchestration_intro


Services

There are two types of service deployments, replicated and global:

  • For a replicated service, you specify the number of identical tasks you want to run. For example, you decide to deploy an HTTP service with three replicas, each serving the same content.
  • A global service is a service that runs one task on every node. There is no pre-specified number of tasks. Each time you add a node to the swarm, the orchestrator creates a task and the scheduler assigns the task to the new node. Good candidates for global services are monitoring agents, an anti-virus scanners or other types of containers that you want to run on every node in the swarm.

Create service

1
2
$ docker service create --replicas 5 <service> # default replicated mode
$ docker service create --mode global <service> # global mode

Check status

1
2
3
4
5
6
$ docker service ls # list all services
$ docker service ps <service> # list all tasks
$ docker service inspect <service> --pretty # details
$ docker service inspect <service> | jq -r '.[].CreatedAt'
$ docker service ps <service> --format "{{json .}}" --filter "desired-state=running" | jq -r .ID
$ docker inspect <task> | jq -r '.[].Status.ContainerStatus.ContainerID'

Remove services

1
$ docker service rm $(docker service ls -q) # remove all services

Update services

rescale services by:

1
$ docker service scale <service>=20

update service’s image by:

1
2
3
4
$ docker service update \
--image nigelpoulton/tu-demo:v2 \
--update-parallelism 2 \
--update-delay 20s <service>

then update parallelism and update delay settings are now part of the service definition.

update service’s network by:

1
2
3
4
$ docker service update \
--network-add <new-network> \
--network-rm <old-network \
<service>

Logs

1
2
3
4
$ docker service logs <service>
--follow
--tail 1000
--details

Container Networking

See the current network:

1
$ docker network ls

Every container goes onto bridge (nat on Windows) network by default.

container_network

Network types

containers talk to each other, VMs, physicals and internet. Vice versa.

Bridge Networking

a.k.a single-host networking, docker0.

can only connect containers on the same host. Isolated layer-two network, even on the same host. Get in/out traffic by mapping port to the host.

bridge_network

1
$ docker network inspect bridge # default bridge network
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
{
"Name": "bridge", //
"Id": "f904473bbf1f625413c0cd2e1b7c0271253056709731cab4271ee95906ef270c", //
"Created": "2020-06-09T21:03:39.158623688Z",
"Scope": "local", //
"Driver": "bridge", //
"EnableIPv6": false,
"IPAM": {
"Driver": "default",
"Options": null,
"Config": [
{
"Subnet": "172.17.0.0/16" // ip range
}
]
},
"Internal": false,
"Attachable": false,
"Ingress": false,
"ConfigFrom": {
"Network": ""
},
"ConfigOnly": false,
"Containers": {}, // currently no containers
"Options": {
"com.docker.network.bridge.default_bridge": "true",
"com.docker.network.bridge.enable_icc": "true",
"com.docker.network.bridge.enable_ip_masquerade": "true",
"com.docker.network.bridge.host_binding_ipv4": "0.0.0.0",
"com.docker.network.bridge.name": "docker0",
"com.docker.network.driver.mtu": "1500"
},
"Labels": {}
}

create a container without specifying network by $ docker container run --rm -d alpine sleep 1d, then inspect again:

1
2
3
4
5
6
7
8
9
10
11
{
"Containers": {
"4ea75ff740542150570357239d2f61f236f18d3840cffb3bdeae1df2745a9c2e": {
"Name": "cool_haibt",
"EndpointID": "f0dc9a4302cd78d262a1ec562fae8ae635d2cebb2733c60cfa40d5eb00d04564",
"MacAddress": "02:42:ac:11:00:02",
"IPv4Address": "172.17.0.2/16", //ip address
"IPv6Address": ""
}
}
}

to talk to outside, need port mapping:

1
2
$ docker container run --rm -d --name web -p 8080:80 nginx # host port 8080 to container port 80
# --rm: remove the container once it exits/stops

show port mapping by:

1
2
$ docker port <container>
80/tcp -> 0.0.0.0:8080 # container port -> host port

then can visit the web by localhost:8080

create a bridge network:

1
$ docker network create -d bridge <network-name>

check by docker network ls. To run containers in it:

1
$ docker container run --rm -d --network <network-name> alpine sleep 1d

to switch container between networks:

1
2
$ docker network disconnect <network1> web # even when container is running
$ docker network connect <network2> web

Overlay Networking

a.k.a multi-host networking.

Single layer-two network spanning multiple hosts

1
2
$ docker network create
$ docker network create -o encrypted # encrypt data plane

built-in overlay is container to container only (not applied to VM, physicals)

overlay_network

To create a overlay network:

1
$ docker network create -d overlay <network-name>

check by docker network ls, note its scope is “swarm”, which means availabel on every node in the swarm.

create a service to use this overlay network:

1
$ docker service create -d --name <service> --replicas 2 --network overnet alpine sleep 1d # default replicated mode

check by docker service ls, check which nodes are running the service by docker service ps <service>, more details run docker service inspect <service>

switch to one node which runs this service and run docker network inspect <network-name>

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
{
"Containers": {
"7df4738446ac44a577d026a11ad73401c6cbdaaafcddbe75028954e7191fe1a1": {
"Name": "pinger.1.neku7xixs6g8oe2r8otlnqnep",
"EndpointID": "9a2dbdda2c15b991eeba7c1ab6fabd93e42b5ed4495b2f47d038dc871143a409",
"MacAddress": "02:42:0a:00:01:04",
"IPv4Address": "10.0.1.4/24", // jot down ip address
"IPv6Address": ""
},
"lb-overnet": {
"Name": "overnet-endpoint",
"EndpointID": "45c5d95a4d21eadcd2f99d0ff982ec4bc6d0c6a7f136136a93aeeb8c7d959898",
"MacAddress": "02:42:0a:00:01:06",
"IPv4Address": "10.0.1.6/24",
"IPv6Address": ""
}
}
}

switch to the other node, exec into the container by docker container exec -it <container> sh, ping 10.0.1.4, check success.

MACVLAN

Containers also need to talk to VMs or physicals on existing VLANs.

Gives every container its own IP address and MAC address on the existing network (directly on the wire, no bridges, no port mapping)

requires promiscuous mode on the host. (cloud providers generally don’t allow. look for IPVLAN instead, which doesn’t require promiscuous mode)

Network services

  • Service discovery: locate services in a swarm

  • Load Balancing: access a service from any node in swarm (even nodes not hosting the service)

Service discovery

Every service gets a name, registered with swarm DNS, uses swarm DNS

1
2
$ docker service create -d --name ping --network <overlay> --replicas 3 alpine sleep 1d
$ docker service create -d --name pong --network <overlay> --replicas 3 alpine sleep 1d

check docker service ls to confirm (also check docker container ls), can locate other service in same overlay network by name, e.x.:

1
2
3
$ docker container exec -it <container> sh
$ ping pong # sucesss
$ ping -c 2 pong # -c 2: only two ping attempts.

Load Balancing

1
2
3
4
5
6
7
$ docker service create -d --name web --network overnet --replicas 1 -p 8080:80 nginx
$ docker service inspect web --pretty
Ports:
PublishedPort = 8080 #
Protocol = tcp
TargetPort = 80 #
PublishMode = ingress # default mode

ingress mode: publish a port on every node in the swarm — even nodes not running service replicas. then can access by any node in the network by port 8080.

The alternative mode is host mode which only publishes the service on swarm nodes running replicas. by adding mode=host to the --publish output, using --mode global instead of --replicas=5, since only one service task can bind a given port on a given node.

Volumes

running a new container automatically gets its own non-persistent, ephemeral graph driver storage (copy-on-write union mount, /var/lib/docker). However, volume is to store persistent data, entirely decoupled from containers, seamlessly plugs into containers.

a directory on the docker(also remote hosts or cloud providers by volume drivers), mounted into container at a specific mount point.

can exist not only on local storage of docker host, but also on high-end external systems like SAN and NAS. Pluggable by docker store drive.

Create Volumes

1
$ docker volume create <volume>

Check status

1
2
$ docker volume ls
$ docker volume inspect <volume>
1
2
3
4
5
6
7
8
9
10
11
[
{
"CreatedAt": "2020-06-10T16:29:30Z",
"Driver": "local", // default
"Labels": {},
"Mountpoint": "/var/lib/docker/volumes/myvol/_data", // inspect by ls -l /var/lib/docker/volumes/
"Name": "myvol",
"Options": {},
"Scope": "local" //
}
]

Delete volume

to delete a specific volume:

1
$ docker volume rm <volume>

rm an in-use volume causes error message.

to delete all unused volumes:

1
$ docker volume prune # delete unused volume

Attach volume

to attach volume to a container, either by --mount or -v:

1
2
$ docker run -d --name <container> --mount source=<volume>,target=/vol <image>
$ docker run -d --name <container> -v <volume>:/vol <image>

note:

  • source=<volume>: if volume doesn’t exist for now, will be created.
  • target=/vol: where in the container to mount it, check by exec into container and ls -l /vol/
  • if the container has files or directories in the directory to be mounted, the directory’s contents are copied into the volume.

check by docker inspect <container>:

1
2
3
4
5
6
7
8
9
10
11
12
"Mounts": [
{
"Type": "volume",
"Name": "myvol",
"Source": "/var/lib/docker/volumes/myvol/_data",
"Destination": "/vol",
"Driver": "local",
"Mode": "",
"RW": true,
"Propagation": ""
}
],

Then, container can write data to /vol (e.x. echo "some data" > /vol/newfile), accessible in /var/lib/docker/volumes/ as well, even if the container is removed.

–mount works with service as well.

Also useful in Dockerfile’s volume instruction.

Volume for service

1
2
3
4
5
$ docker service create -d \
--replicas=4 \
--name <service> \
--mount source=myvol,target=/app \
<image>

note:

  • docker service create command does not support the -v or --volume

Read only volume

1
2
3
4
$ docker run -d \
--name=nginxtest \
--mount source=nginx-vol,destination=/usr/share/nginx/html,readonly \
nginx:latest

verfify by docker inspect nginxtest:

1
2
3
4
5
6
7
8
9
10
11
12
"Mounts": [
{
"Type": "volume",
"Name": "nginx-vol",
"Source": "/var/lib/docker/volumes/nginx-vol/_data",
"Destination": "/usr/share/nginx/html",
"Driver": "local",
"Mode": "",
"RW": false, //
"Propagation": ""
}
],

Secrets

string <= 500k, swarm mode for services only(not containers)

secret

note:
/run/secrets/: stay in memory

1
$ docker secret create <secret> <file>

check by docker secret ls. inspect by docker secret inspect <secret>

create a service, using secret:

1
$ docker service create -d --name <service> --secret <secret> --replicas 2 ...

inpect by docker service inspect <service> and look at secrets section. exec into containers by docker container exec -it <container> sh, find secret by ls -l /run/secrets, accessible

can’t delete an in-use secret by docker secret rm <secret>, need to delete service first.

Docker Compose and Stack

Docker compose

Install on linux

  1. Download the current stable release of Docker Compose:
1
$ curl -L "https://github.com/docker/compose/releases/download/1.26.0/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
  1. Make it executable:
1
$ chmod +x /usr/local/bin/docker-compose

Compose files

Compose uses YAML files to define multi-service applications.

The default name for the Compose YAML file is docker-compose.yml

Flask app example:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
version: "3.5" # mandatory
services: # application services
web-fe: # create a web front-end container called web-fe
build: . # build a new image by Dockerfile in '.' to create container for web-fe
command: python app.py # Override CMD in Dockerfile, which has Python and app.py
ports:
- target: 5000 # container port
published: 5000 # host port
networks:
- counter-net # already exist or to be defined
volumes:
- type: volume
source: counter-vol # already exist or to be defined
target: /code # mount to container
redis: # create an in-memory database container called redis
image: "redis:alpine" # pulled from Docker Hub.
networks:
counter-net:

networks: # create new networks, bridge by default
counter-net:

volumes: # create new volumes
counter-vol:

note:

  • 4 top-level keys: version, services, networks, volumes, (secrets, configs…)
  • use the driver property to specify different network types:
1
2
3
4
networks:
over-net:
driver: overlay
attachable: true # for standalone containers(instead of Docker Services)

source: https://github.com/nigelpoulton/counter-app

Run compose app

1
2
3
4
$ docker-compose up # docker-compose.yml in current folder
$ docker-compose up & # ctrl+c doesn't kill container
$ docker-compose up -d # run in daemon
$ docker-compose -f prod-equus-bass.yml up # -f flag

check the current state of the app by docker-compose ps.

list the processes running inside of each
service (container) by docker-compose top.

Stop, Restart and Delete App

stop without deleting:

1
$ docker-compose stop

could restart by:

1
$ docker-compose restart

If changed app after stopping, these changes won’t apply in restarted app. Need to re-deploy.

delete a stopped Compose app and networks:

1
$ docker-compose rm

stop and delete containers and networks by:

1
$ docker-compose down

Stack

swarm only

stacks manage a bunch of services as a single app, highest layer of docker application hierarchy.

stack

can run on Docker CLI, Docker UCP, Docker Cloud.

docker-stack.yml: YAML config file including version, services, network, volumes, documenting the app. Can do version control.

stack_file

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
version: "3" # >=3
services:
redis: # service 1st
image: redis:alpine #
networks:
- frontend
deploy: # new in version 3
replicas: 1
update_config:
parallelism: 2
delay: 10s
restart_policy:
condition: on-failure
db:
image: postgres:9.4
environment:
POSTGRES_USER: "postgres"
POSTGRES_PASSWORD: "postgres"
volumes:
- db-data:/var/lib/postgresql/data
networks:
- backend
deploy:
placement:
constraints: [node.role == manager] # only run on manager nodes
vote:
image: dockersamples/examplevotingapp_vote:before
ports:
- 5000:80
networks:
- frontend
depends_on:
- redis
deploy:
replicas: 2
update_config:
parallelism: 2
restart_policy:
condition: on-failure
result:
image: dockersamples/examplevotingapp_result:before
ports:
- 5001:80
networks:
- backend
depends_on:
- db
deploy:
replicas: 1
update_config:
parallelism: 2
delay: 10s
restart_policy:
condition: on-failure

worker:
image: dockersamples/examplevotingapp_worker
networks:
- frontend
- backend
depends_on:
- db
- redis
deploy:
mode: replicated
replicas: 1
labels: [APP=VOTING]
restart_policy:
condition: on-failure
delay: 10s
max_attempts: 3
window: 120s
placement:
constraints: [node.role == manager]

visualizer:
image: dockersamples/visualizer:stable
ports:
- "8080:8080"
stop_grace_period: 1m30s
volumes:
- "/var/run/docker.sock:/var/run/docker.sock"
deploy:
placement:
constraints: [node.role == manager]

networks:
frontend:
backend:

volumes:
db-data:

source: https://github.com/dockersamples/example-voting-app/docker-stack.yml

Check Compose file version 3 reference

Placement constraints:

  • Node ID: node.id == o2p4kw2uuw2a
  • Node name: node.hostname == wrk-12
  • Role: node.role != manager
  • Engine labels: engine.labels.operatingsystem==ubuntu 16.04
  • Custom node labels: node.labels.zone == prod1 zone: prod1

service.<service>.deploy.update_config:

1
2
3
update_config:
parallelism: 2 # update two replicas at-atime
failure_action: rollback # [pause, continue, rollback]

services.<service>.deploy.restart-policy:

1
2
3
4
5
restart_policy:
condition: on-failure # non-zero exit code
delay: 5s # between each of the restart attempts
max_attempts: 3
window: 120s # wait up to 120 seconds to decide if the restart worked

services.<service>.stop_grace_period:

1
stop_grace_period: 1m30s # for PID 1 to handle SIGTERM, default 10s, then SIGKILL.

Deploy the stack

1
$ docker stack deploy -c <stackfile> <stack> # -c: --compose-file

Check status

1
2
3
$ docker stack ls
$ docker stack ps <stack>
$ docker stack services <stack>

Update stack

update config file, and re-deploy by:

1
$ docker stack deploy -c <stackfile> <stack>

will update every service in the stack.

Enterprise Edition(EE)

  • a hardened Docker Engine
  • Ops UI
  • Secure on-premises registry

EECE

Universal Control Plane(UCP)

based on EE, the operations GUI from Docker Inc, to manage swarm and k8s apps.

UCP

Docker Trusted Registry(DTR)

based on EE and UCP, a registry to store images, a containerized app.

DTR

Role-based Access Control(RBAC)

  • subject: user, team
  • role: permissions
  • collection: resources(docker node)

RBAC

Image scanning

after update the image in local, need to push into registry.

Tag the updated image:

1
$ docker image tag <image> <dtr-dns>/<repo>/<image>:latest

check by docker image ls to see a new tagged image.

login:

1
$  docker login <dtr-dns> # username, passwork, user needs permission to write in the repo

ensure image scanning sets to “scan on push” in UCP’s DTR’s repo setting. Then push:

1
$ docker image push <tagged-image>

Check in DTR’s repo’s images’ vulnerabilities field.

HTTP Routing Mesh(HRM)

For Docker CE’s Routing Mesh(Swarm-mode Routing Mesh), Transport layer(L4).

routing_mesh

For Docker EE’s HRM, Application layer(L7). Route based on host header.

HRM

AWS Basics

AWS website

infrastructure.aws

TCO(total cost of ownership) Calculator

Pricing Calculator

Install

AWS CLI and Boto3

Amazon Linux 2

The AWS CLI is already installed on Amazon Linux 2.

Install Python 3:

1
sudo yum install -y python3-pip python3 python3-setuptools

Install Boto3:

1
pip3 install boto3 --user

macOS

Install Python3 using Homebrew:

1
ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"

Install Python 3:

1
brew install python

Insert the Homebrew Python directory at the top of your PATH environment variable:

1
export PATH="/usr/local/opt/python/libexec/bin:$PATH"

Verify you are using Python 3:

1
python --version

Install the AWS CLI and Boto3:

1
pip install awscli boto3 --upgrade --user

Docker

1
sudo amazon-linux-extras install docker

Configuring your AWS environment

Obtain your AWS access key and secret access key from the AWS Management Console. Run the following command:

1
aws configure

This sets up a text file that the AWS CLI and Boto3 libraries look at by default for your credentials: ~/.aws/credentials.

The file should look like this:

1
2
3
[default]
aws_access_key_id = AKIAIOSFODNN7EXAMPLE
aws_secret_access_key = wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY

Test Your Credentials

AWS CLI
Run the following command:

1
aws sts get-caller-identity

The output should look like this:

1
2
3
4
5
{
"UserId": "AIDAJKLMNOPQRSTUVWXYZ",
"Account": "123456789012",
"Arn": "arn:aws:iam::123456789012:userdevuser"
}

AWS CLI

1
2
aws configure
aws sts get-caller-identity

S3

Upload folder to s3:

1
aws s3 cp <path-to-source-folder> s3://<path-to-target-folder> --recursive --exclude ".DS_Store"

Dynamodb

create table:

1
2
3
4
5
6
7
8
9
aws dynamodb create-table \
--table-name Music \
--key-schema AttributeName=Artist,KeyType=HASH \
AttributeName=SongTitle,KeyType=RANGE \
--attribute-definitions \
AttributeName=Artist,AttributeType=S \
AttributeName=SongTitle,AttributeType=S \
--provisioned-throughput \
ReadCapacityUnits=5,WriteCapacityUnits=5

describe table:

1
aws dynamodb describe-table --table-name Music

put item:

1
2
3
4
5
6
aws dynamodb put-item \
--table-name Music \
--item '{
"Artist": {"S": "Dream Theater"},
"AlbumTitle": {"S": "Images and Words"},
"SongTitle": {"S": "Under a Glass Moon"} }'

scan table:

1
aws dynamodb scan --table-name Music

Boto3 - EC2

This section follows https://github.com/linuxacademy/content-dynamodb-deepdive

HelloWorld

List all S3 bucket name:

1
2
3
4
import boto3
s3 = boto3.resource('s3')
for bucket in s3.buckets.all():
print(bucket.name)

Spin up an ec2 instance

1
2
3
4
5
6
7
8
9
10
11
import boto3
ec2 = boto3.client('ec2')
response = ec2.run_instances(
ImageId='ami-0947d2ba12ee1ff75', # Amazon Linux 2 AMI (HVM), SSD Volume Type
InstanceType='t2.micro',
KeyName='xyshell',
MinCount=1,
MaxCount=1,
SubnetId='subnet-05b78e3323bcabddc'
)
print(response['Instances'][0]['InstanceId'])

Stopping EC2 Instances Nightly

Stopping-EC2-Instances-Nightly.png

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# lambda_function.py
import boto3

def lambda_handler(event, context):
ec2_client = boto3.client('ec2')
# get list of regions
regions = [region['RegionName'] for region in ec2_client.describe_regions()['Regions']]
# iterate over each region
for region in regions:
ec2 = boto3.resource('ec2', region_name=region)
print('Region:', region)
# get only running instances
instances = ec2.instances.filter(
Filters=[
{'Name': 'instance-state-name',
'Values': ['running']}
]
)
# stop instances
for instance in instances:
instance.stop()
print('stopped instance:', instance.id)

Backing Up EC2 Instances

Backing-Up-EC2-Instances

Create-Backups

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36

from datetime import datetime

import boto3


def lambda_handler(event, context):

ec2_client = boto3.client('ec2')
regions = [region['RegionName']
for region in ec2_client.describe_regions()['Regions']]

for region in regions:

print('Instances in EC2 Region {0}:'.format(region))
ec2 = boto3.resource('ec2', region_name=region)

instances = ec2.instances.filter(
Filters=[
{'Name': 'tag:backup', 'Values': ['true']}
]
)

# ISO 8601 timestamp, i.e. 2019-01-31T14:01:58
timestamp = datetime.utcnow().replace(microsecond=0).isoformat()

for i in instances.all():
for v in i.volumes.all():

desc = 'Backup of {0}, volume {1}, created {2}'.format(
i.id, v.id, timestamp)
print(desc)

snapshot = v.create_snapshot(Description=desc)

print("Created snapshot:", snapshot.id)

Prune-Backups

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import boto3


def lambda_handler(event, context):

account_id = boto3.client('sts').get_caller_identity().get('Account')
ec2 = boto3.client('ec2')
regions = [region['RegionName']
for region in ec2.describe_regions()['Regions']]

for region in regions:
print("Region:", region)
ec2 = boto3.client('ec2', region_name=region)
response = ec2.describe_snapshots(OwnerIds=[account_id])
snapshots = response["Snapshots"]

# Sort snapshots by date ascending
snapshots.sort(key=lambda x: x["StartTime"])

# Remove snapshots we want to keep (i.e. 3 most recent)
snapshots = snapshots[:-3]

for snapshot in snapshots:
id = snapshot['SnapshotId']
try: # EBS might be using this snapshot
print("Deleting snapshot:", id)
ec2.delete_snapshot(SnapshotId=id)
except Exception as e:
print("Snapshot {} in use, skipping.".format(id))
continue

Removing Unattached EBS Volumes

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import boto3


def lambda_handler(object, context):

# Get list of regions
ec2_client = boto3.client('ec2')
regions = [region['RegionName']
for region in ec2_client.describe_regions()['Regions']]

for region in regions:
ec2 = boto3.resource('ec2', region_name=region)
print("Region:", region)

# List only unattached volumes ('available' vs. 'in-use')
volumes = ec2.volumes.filter(
Filters=[{'Name': 'status', 'Values': ['available']}])

for volume in volumes:
v = ec2.Volume(volume.id)
print("Deleting EBS volume: {}, Size: {} GiB".format(v.id, v.size))
v.delete()

Deregistering Old AMIs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import datetime
from dateutil.parser import parse

import boto3


def days_old(date):
parsed = parse(date).replace(tzinfo=None)
diff = datetime.datetime.now() - parsed
return diff.days


def lambda_handler(event, context):

# Get list of regions
ec2_client = boto3.client('ec2')
regions = [region['RegionName']
for region in ec2_client.describe_regions()['Regions']]

for region in regions:
ec2 = boto3.client('ec2', region_name=region)
print("Region:", region)

amis = ec2.describe_images(Owners=['self'])['Images']

for ami in amis:
creation_date = ami['CreationDate']
age_days = days_old(creation_date)
image_id = ami['ImageId']
print('ImageId: {}, CreationDate: {} ({} days old)'.format(
image_id, creation_date, age_days))

if age_days >= 2:
print('Deleting ImageId:', image_id)

# Deregister the AMI
ec2.deregister_image(ImageId=image_id)

Boto3 - Dynamodb

This chapter follows https://github.com/linuxacademy/content-lambda-boto3

1
2
3
import boto3
client = boto3.client('dynamodb', endpoint_url='http://localhost:8000') # dynamodb-local
client.list_tables()

Create Tables

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
dynamodb = boto3.resource('dynamodb')

table = dynamodb.create_table(
TableName='Movies',
KeySchema=[
{
'AttributeName': 'year',
'KeyType': 'HASH' # Partition key
},
{
'AttributeName': 'title',
'KeyType': 'RANGE' # Sort key
}
],
AttributeDefinitions=[
{
'AttributeName': 'year',
'AttributeType': 'N'
},
{
'AttributeName': 'title',
'AttributeType': 'S'
},

],
ProvisionedThroughput={
'ReadCapacityUnits': 5,
'WriteCapacityUnits': 5
}
)

print('Table status:', table.table_status)

print('Waiting for', table.name, 'to complete creating...')
table.meta.client.get_waiter('table_exists').wait(TableName='Movies')
print('Table status:', dynamodb.Table('Movies').table_status)

Load Data

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
dynamodb = boto3.resource('dynamodb')

table = dynamodb.Table('Movies')

with open("moviedata.json") as json_file:
movies = json.load(json_file, parse_float=decimal.Decimal)
for movie in movies:
year = int(movie['year'])
title = movie['title']
info = movie['info']

print("Adding movie:", year, title)

table.put_item(
Item={
'year': year,
'title': title,
'info': info,
}
)

moviedata.json:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
[
{
"year": 2013,
"title": "Rush",
"info": {
"directors": ["Ron Howard"],
"release_date": "2013-09-02T00:00:00Z",
"rating": 8.3,
"genres": ["Action", "Biography", "Drama", "Sport"],
"image_url": "http://ia.media-imdb.com/images/M/MV5BMTQyMDE0MTY0OV5BMl5BanBnXkFtZTcwMjI2OTI0OQ@@._V1_SX400_.jpg",
"plot": "A re-creation of the merciless 1970s rivalry between Formula One rivals James Hunt and Niki Lauda.",
"rank": 2,
"running_time_secs": 7380,
"actors": ["Daniel Bruhl", "Chris Hemsworth", "Olivia Wilde"]
}
}
]

Put Item

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28

class DecimalEncoder(json.JSONEncoder):
'''Helper class to convert a DynamoDB item to JSON'''

def default(self, o):
if isinstance(o, decimal.Decimal):
if abs(o) % 1 > 0:
return float(o)
else:
return int(o)
return super(DecimalEncoder, self).default(o)

title = "The Big New Movie"
year = 2015

response = table.put_item(
Item={
'year': year,
'title': title,
'info': {
'plot': "Nothing happens at all.",
'rating': decimal.Decimal(0)
}
}
)
print("PutItem succeeded:")
print(json.dumps(response, indent=4, cls=DecimalEncoder))

Get Item

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from botocore.exceptions import ClientError

title = "The Big New Movie"
year = 2015

try:
response = table.get_item(
Key={
'year': year,
'title': title
}
)
except ClientError as e:
print(e.response['Error']['Message'])
else:
item = response['Item']
print("GetItem succeeded:")
print(json.dumps(item, indent=4, cls=DecimalEncoder))

Update Item

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
title = "The Big New Movie"
year = 2015

response = table.update_item(
Key={
'year': year,
'title': title
},
UpdateExpression="set info.rating = :r, info.plot=:p, info.actors=:a",
ExpressionAttributeValues={
':r': decimal.Decimal(5.5),
':p': "Everything happens all at once.",
':a': ["Larry", "Moe", "Curly"]
},
ReturnValues="UPDATED_NEW"
)

print("UpdateItem succeeded:")
print(json.dumps(response, indent=4, cls=DecimalEncoder))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
title = "The Big New Movie"
year = 2015

response = table.update_item(
Key={
'year': year,
'title': title
},
UpdateExpression="set info.rating = info.rating + :val",
ExpressionAttributeValues={
':val': decimal.Decimal(1)
},
ReturnValues="UPDATED_NEW"
)

print("UpdateItem succeeded:")
print(json.dumps(response, indent=4, cls=DecimalEncoder))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
title = "The Big New Movie"
year = 2015

# Conditional update (will fail)
print("Attempting conditional update...")

try:
response = table.update_item(
Key={
'year': year,
'title': title
},
UpdateExpression="remove info.actors[0]",
ConditionExpression="size(info.actors) >= :num",
ExpressionAttributeValues={
':num': 3
},
ReturnValues="UPDATED_NEW"
)
except ClientError as e:
if e.response['Error']['Code'] == "ConditionalCheckFailedException":
print(e.response['Error']['Message'])
else:
raise
else:
print("UpdateItem succeeded:")
print(json.dumps(response, indent=4, cls=DecimalEncoder))

Delete Item

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
title = "The Big New Movie"
year = 2015

print("Attempting a conditional delete...")

try:
response = table.delete_item(
Key={
'year': year,
'title': title
}
)
except ClientError as e:
if e.response['Error']['Code'] == "ConditionalCheckFailedException":
print(e.response['Error']['Message'])
else:
raise
else:
print("DeleteItem succeeded:")
print(json.dumps(response, indent=4, cls=DecimalEncoder))

Query

1
2
3
4
5
6
7
8
9
10
from boto3.dynamodb.conditions import Key

print("Movies from 1985")

response = table.query(
KeyConditionExpression=Key('year').eq(1985)
)

for i in response['Items']:
print(i['year'], ":", i['title'])
1
2
3
4
5
6
7
8
9
10
11
12
print("Movies from 1992 - titles A-L, with genres and lead actor")

response = table.query(
ProjectionExpression="#yr, title, info.genres, info.actors[0]",
# Expression Attribute Names for Projection Expression only.
ExpressionAttributeNames={"#yr": "year"},
KeyConditionExpression=Key('year').eq(
1992) & Key('title').between('A', 'L')
)

for i in response[u'Items']:
print(json.dumps(i, cls=DecimalEncoder))

Scan

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
fe = Key('year').between(1950, 1959)
pe = "#yr, title, info.rating"
# Expression Attribute Names for Projection Expression only.
ean = {"#yr": "year", }
esk = None

response = table.scan(
FilterExpression=fe,
ProjectionExpression=pe,
ExpressionAttributeNames=ean
)

for i in response['Items']:
print(json.dumps(i, cls=DecimalEncoder))

while 'LastEvaluatedKey' in response:
response = table.scan(
ProjectionExpression=pe,
FilterExpression=fe,
ExpressionAttributeNames=ean,
ExclusiveStartKey=response['LastEvaluatedKey']
)

for i in response['Items']:
print(json.dumps(i, cls=DecimalEncoder))

Delete table

1
2
3
4
5
6
7
import boto3

dynamodb = boto3.resource('dynamodb')

table = dynamodb.Table('Movies')

table.delete()

Boto3 - S3

This chapter follows https://github.com/linuxacademy/content-lambda-boto3

Resizing Images

Resizing-Images

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import os
import tempfile

import boto3
from PIL import Image

s3 = boto3.client('s3')
DEST_BUCKET = os.environ['DEST_BUCKET']
SIZE = 128, 128


def lambda_handler(event, context):

for record in event['Records']:
source_bucket = record['s3']['bucket']['name']
key = record['s3']['object']['key']
thumb = 'thumb-' + key
with tempfile.TemporaryDirectory() as tmpdir:
download_path = os.path.join(tmpdir, key)
upload_path = os.path.join(tmpdir, thumb)
s3.download_file(source_bucket, key, download_path)
generate_thumbnail(download_path, upload_path)
s3.upload_file(upload_path, DEST_BUCKET, thumb)

print('Thumbnail image saved at {}/{}'.format(DEST_BUCKET, thumb))


def generate_thumbnail(source_path, dest_path):
print('Generating thumbnail from:', source_path)
with Image.open(source_path) as image:
image.thumbnail(SIZE)
image.save(dest_path)

To get pillow pkg, find it at https://pypi.org/project/Pillow/

1
unzip Pillow-5.4.1-cp37-cp37m-manylinux1_x86_64.whl
1
rm -rf Pillow-5.4.1.dist-info
1
zip -r9 lambda.zip lambda_function.py PIL

upload the zip file to AWS Lambda.

Importing CSV Files into DynamoDB

Importing-CSV-Files-into-DynamoDB

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import csv
import os
import tempfile

import boto3

dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('Movies')
s3 = boto3.client('s3')


def lambda_handler(event, context):

for record in event['Records']:
source_bucket = record['s3']['bucket']['name']
key = record['s3']['object']['key']
with tempfile.TemporaryDirectory() as tmpdir:
download_path = os.path.join(tmpdir, key)
s3.download_file(source_bucket, key, download_path)
items = read_csv(download_path)

with table.batch_writer() as batch:
for item in items:
batch.put_item(Item=item)


def read_csv(file):
items = []
with open(file) as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
data = {}
data['Meta'] = {}
data['Year'] = int(row['Year'])
data['Title'] = row['Title'] or None
data['Meta']['Length'] = int(row['Length'] or 0)
data['Meta']['Subject'] = row['Subject'] or None
data['Meta']['Actor'] = row['Actor'] or None
data['Meta']['Actress'] = row['Actress'] or None
data['Meta']['Director'] = row['Director'] or None
data['Meta']['Popularity'] = row['Popularity'] or None
data['Meta']['Awards'] = row['Awards'] == 'Yes'
data['Meta']['Image'] = row['Image'] or None
data['Meta'] = {k: v for k,
v in data['Meta'].items() if v is not None}
items.append(data)
return items

Transcribing Audio

Transcribing-Audio

TranscribeAudio:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import boto3

s3 = boto3.client('s3')
transcribe = boto3.client('transcribe')


def lambda_handler(event, context):

for record in event['Records']:
source_bucket = record['s3']['bucket']['name']
key = record['s3']['object']['key']
object_url = "https://s3.amazonaws.com/{0}/{1}".format(
source_bucket, key)
response = transcribe.start_transcription_job(
TranscriptionJobName='MyTranscriptionJob',
Media={'MediaFileUri': object_url},
MediaFormat='mp3',
LanguageCode='en-US'
)
print(response)

ParseTranscription:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import json
import os
import urllib.request

import boto3


BUCKET_NAME = os.environ['BUCKET_NAME']

s3 = boto3.resource('s3')
transcribe = boto3.client('transcribe')


def lambda_handler(event, context):
job_name = event['detail']['TranscriptionJobName']
job = transcribe.get_transcription_job(TranscriptionJobName=job_name)
uri = job['TranscriptionJob']['Transcript']['TranscriptFileUri']
print(uri)

content = urllib.request.urlopen(uri).read().decode('UTF-8')

print(json.dumps(content))

data = json.loads(content)

text = data['results']['transcripts'][0]['transcript']

object = s3.Object(BUCKET_NAME, job_name + '-asrOutput.txt')
object.put(Body=text)

Detecting Faces with Rekognition

Detecting-Faces-with-Rekognition

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os

import boto3

TABLE_NAME = os.environ['TABLE_NAME']

dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table(TABLE_NAME)
s3 = boto3.resource('s3')
rekognition = boto3.client('rekognition')


def lambda_handler(event, context):

# Get the object from the event
bucket = event['Records'][0]['s3']['bucket']['name']
key = event['Records'][0]['s3']['object']['key']

obj = s3.Object(bucket, key)
image = obj.get()['Body'].read()
print('Recognizing celebrities...')
response = rekognition.recognize_celebrities(Image={'Bytes': image})

names = []

for celebrity in response['CelebrityFaces']:
name = celebrity['Name']
print('Name: ' + name)
names.append(name)

print(names)

print('Saving face data to DynamoDB table:', TABLE_NAME)
response = table.put_item(
Item={
'key': key,
'names': names,
}
)
print(response)

Boto3 - SQS

Triggering Lambda from SQS

Triggering-Lambda-from-SQS

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import json
import os
from datetime import datetime

import boto3

QUEUE_NAME = os.environ['QUEUE_NAME']
MAX_QUEUE_MESSAGES = os.environ['MAX_QUEUE_MESSAGES']
DYNAMODB_TABLE = os.environ['DYNAMODB_TABLE']

sqs = boto3.resource('sqs')
dynamodb = boto3.resource('dynamodb')


def lambda_handler(event, context):

# Receive messages from SQS queue
queue = sqs.get_queue_by_name(QueueName=QUEUE_NAME)

print("ApproximateNumberOfMessages:",
queue.attributes.get('ApproximateNumberOfMessages'))

for message in queue.receive_messages(
MaxNumberOfMessages=int(MAX_QUEUE_MESSAGES)):

print(message)

# Write message to DynamoDB
table = dynamodb.Table(DYNAMODB_TABLE)

response = table.put_item(
Item={
'MessageId': message.message_id,
'Body': message.body,
'Timestamp': datetime.now().isoformat()
}
)
print("Wrote message to DynamoDB:", json.dumps(response))

# Delete SQS message
message.delete()
print("Deleted message:", message.message_id)

send_message.py:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python3.7
# -*- coding: utf-8 -*-
import argparse
import logging
import sys
from time import sleep

import boto3
from faker import Faker

parser = argparse.ArgumentParser()
parser.add_argument("--queue-name", "-q", required=True,
help="SQS queue name")
parser.add_argument("--interval", "-i", required=True,
help="timer interval", type=float)
parser.add_argument("--message", "-m", help="message to send")
parser.add_argument("--log", "-l", default="INFO",
help="logging level")
args = parser.parse_args()

if args.log:
logging.basicConfig(
format='[%(levelname)s] %(message)s', level=args.log)

else:
parser.print_help(sys.stderr)

sqs = boto3.client('sqs')

response = sqs.get_queue_url(QueueName=args.queue_name)

queue_url = response['QueueUrl']

logging.info(queue_url)

while True:
message = args.message
if not args.message:
fake = Faker()
message = fake.text()

logging.info('Sending message: ' + message)

response = sqs.send_message(
QueueUrl=queue_url, MessageBody=message)

logging.info('MessageId: ' + response['MessageId'])
sleep(args.interval)

Creating a Queue Using Cross-Account Permissions

Creating-a-Queue-Using-Cross-Account-Permissions

SQS does not allow API calls such as CreateQueue using cross-account permissions. A workaround is to create and invoke a Lambda function in another account in order to call that API.

Create AWS CLI Profiles
Development account admin:

1
2
3
4
aws configure --profile devadmin
Production account admin:

aws configure --profile prodadmin

Create a Lambda Function in the Production Account
Function name: CreateSQSQueue

See lambda_function.py and assign the role lambda_execution_role.json.

Assign Permissions to the Lambda Function
Add permissions to the production Lambda function that allow it to be invoked by the development account user:

1
2
3
4
5
6
7
aws lambda add-permission \
--function-name CreateSQSQueue \
--statement-id DevAccountAccess \
--action 'lambda:InvokeFunction' \
--principal 'arn:aws:iam::__DEVELOPMENT_ACCOUNT_NUMBER__:user/devadmin' \
--region us-east-2 \
--profile prodadmin

To view the policy:

1
2
3
4
aws lambda get-policy \
--function-name CreateSQSQueue \
--region us-east-2 \
--profile prodadmin

To remove the policy:

1
2
3
4
5
aws lambda remove-permission \
--function-name CreateSQSQueue \
--statement-id DevAccountAccess \
--region us-east-2 \
--profile prodadmin

Invoke the Production Lambda Function from the Development Account:

1
2
3
4
5
6
7
aws lambda invoke \
--function-name '__LAMBDA_FUNCTION_ARN__' \
--payload '{"QueueName": "MyQueue" }' \
--invocation-type RequestResponse \
--profile devadmin \
--region us-east-2 \
output.txt

lambda_function.py:

1
2
3
4
5
6
7
8
9
import boto3

sqs = boto3.resource('sqs')


def lambda_handler(event, context):

queue = sqs.create_queue(QueueName=event['QueueName'])
print('Queue URL', queue.url)

lambda_execution_role.json:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents"
],
"Resource": "arn:aws:logs:*:*:*"
},
{
"Action": ["sqs:CreateQueue"],
"Effect": "Allow",
"Resource": "*"
}
]
}

Boto3 - Third party

Creating Slack Notifications for CloudWatch Alarms

Creating-Slack-Notifications-for-CloudWatch-Alarms

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import json
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen

import boto3

ssm = boto3.client('ssm')


def lambda_handler(event, context):
print(json.dumps(event))

message = json.loads(event['Records'][0]['Sns']['Message'])
print(json.dumps(message))

alarm_name = message['AlarmName']
new_state = message['NewStateValue']
reason = message['NewStateReason']

slack_message = {
'text': f':fire: {alarm_name} state is now {new_state}: {reason}\n'
f'```\n{message}```'
}

webhook_url = ssm.get_parameter(
Name='SlackWebHookURL', WithDecryption=True)

req = Request(webhook_url['Parameter']['Value'],
json.dumps(slack_message).encode('utf-8'))

try:
response = urlopen(req)
response.read()
print(f"Message posted to Slack")
except HTTPError as e:
print(f'Request failed: {e.code} {e.reason}')
except URLError as e:
print(f'Server connection failed: {e.reason}')

Creating a Twitter App

Creating-a-Twitter-App

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import os
import random

import boto3
from botocore.exceptions import ClientError
import tweepy

BUCKET_NAME = os.environ['BUCKET_NAME']
KEY = 'data.txt'

s3 = boto3.resource('s3')
ssm = boto3.client('ssm')


def get_parameter(param_name):
response = ssm.get_parameter(Name=param_name, WithDecryption=True)
credentials = response['Parameter']['Value']
return credentials


def get_tweet_text():
filename = '/tmp/' + KEY
try:
s3.Bucket(BUCKET_NAME).download_file(KEY, filename)
except ClientError as e:
if e.response['Error']['Code'] == "404":
print(f'The object {KEY} does not exist in bucket {BUCKET_NAME}.')
else:
raise

with open(filename) as f:
lines = f.readlines()
return random.choice(lines)


def lambda_handler(event, context):

# Get SSM parameters
CONSUMER_KEY = get_parameter('/TwitterBot/consumer_key')
CONSUMER_SECRET = get_parameter('/TwitterBot/consumer_secret')
ACCESS_TOKEN = get_parameter('/TwitterBot/access_token')
ACCESS_TOKEN_SECRET = get_parameter('/TwitterBot/access_token_secret')

# Authenticate Tweepy
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)

# Send tweet
tweet = get_tweet_text()
print(tweet)
api.update_status(tweet)

SSH

1
ssh -i <path-to-pem> admin@<ip/dns>

Local Port Forwarding:

1
ssh -i <path-to-pem> -l admin <ip/dns> -L 9999:localhost:9999

SCP

1
scp -i <path-to-pem> <path-to-source-file> admin@<ip/dns>:/home/admin/<path-to-target-file>

Auto-start after reboot

1
2
3
4
5
$ systemctl enable docker
Synchronizing state of docker.service...
Executing /lib/systemd/systemd-sysv-install enable docker
$ systemctl is-enabled docker
enabled
0%