diff --git a/docs/cli-manual.md b/docs/cli-manual.md index 7525a35479f..79338acc746 100644 --- a/docs/cli-manual.md +++ b/docs/cli-manual.md @@ -175,10 +175,13 @@ This command used to diagnose the Pods of TiDB cluster. It launches a debug cont | --image | | specify the docker image of debug container, default to `pingcap/tidb-debug:lastest` | | --container | -c | select the container to diagnose, default to the first container of target Pod | | --docker-socket | | specify the docker socket of cluster node, default to `/var/run/docker.sock` | +| --privileged | | whether launch container in privileged mode (full container capabilities) | The default image of debug container contains almost all the related tools you may use then diagnosing, however, the image size can be kinda big. You may use `--image=pingcap/tidb-control:latest` if your just need a basic shell, `pd-ctl` and `tidb-ctl`. +For the guide of using the default debug image (`tidb-debug`), refer to [tidb-debug](/misc/images/tidb-debug/README.md). + Example: ``` $ tkctl debug demo-cluster-tikv-0 diff --git a/misc/images/tidb-control/profile b/misc/images/tidb-control/profile index 010f4d4e261..2fe47eb89c2 100644 --- a/misc/images/tidb-control/profile +++ b/misc/images/tidb-control/profile @@ -6,6 +6,7 @@ export LS_OPTIONS='--color=auto' # aliases alias ls='ls $LS_OPTIONS' +alias ll='ls -alF' function prompt { local GREENBOLD="\[\033[1;32m\]" diff --git a/misc/images/tidb-debug/Dockerfile b/misc/images/tidb-debug/Dockerfile index be150e5dc8d..6631b203e94 100644 --- a/misc/images/tidb-debug/Dockerfile +++ b/misc/images/tidb-debug/Dockerfile @@ -34,7 +34,8 @@ RUN wget https://github.com/brendangregg/FlameGraph/archive/master.zip \ && unzip master.zip \ && mv FlameGraph-master /opt/FlameGraph \ && rm master.zip -ADD run_flamegraph.sh /run_flamegraph.sh +COPY run_flamegraph.sh /run_flamegraph.sh +COPY gdbinit /root/.gdbinit # used for go pprof ENV GOLANG_VERSION 1.12.4 diff --git a/misc/images/tidb-debug/README.md b/misc/images/tidb-debug/README.md index 4cb7d632efc..4ad85986d9f 100644 --- a/misc/images/tidb-debug/README.md +++ b/misc/images/tidb-debug/README.md @@ -7,3 +7,38 @@ TiDB cluster debug toolkit is a docker image contains various troubleshooting to ```shell $ docker run -it --rm pingcap/tidb-debug:latest ``` + +## GDB and perf + +This image includes useful troubleshooting tools like [GDB](https://www.gnu.org/software/gdb/) and [perf](https://en.wikipedia.org/wiki/Perf_(Linux)). However, using these tools in debug container is slightly different with the ordinary workflow due to the difference in root filesystems of the target container and the debug container. + +### GDB + +In order to use GDB properly, you must set the "program" argument to the binary in the target container and set sysroot to the target container's root dir by using gdb's `set sysroot` command. Moreover, if the target container is missing some dynamic libraries (e.g. `libthread_db-*.so`) required by GDB, you must set the corresponding search path to the debug container. + +Taking TiKV as an example: + +```shell +$ tkctl debug demo-tikv-0 +$ gdb /proc/${pid:-1}/root/tikv-server 1 + +# .gdbinit in the debug container is configured to set sysroot to /proc/1/root/ +# so if the target process pid is 1, you can omit this command +(gdb) set sysroot /proc/${pid:-1}/root/ + +# now you can start debugging +(gdb) thread apply all bt +(gdb) info threads +``` + +### perf (and flame graph) + +To use `perf` and the `run_flamegraph.sh` script (which wraps the `perf` tool) properly, you must copy the program from the target container to the same location in the debug container: + +Still taking TiKV as and example: + +```shell +$ cp /proc/1/root/tikv-server / +$ ./run_flamegraph.sh 1 +``` + diff --git a/misc/images/tidb-debug/gdbinit b/misc/images/tidb-debug/gdbinit new file mode 100644 index 00000000000..f44ef11071f --- /dev/null +++ b/misc/images/tidb-debug/gdbinit @@ -0,0 +1,3 @@ +set auto-load safe-path / +set libthread-db-search-path /usr/lib64/ +set sysroot /proc/1/root/ diff --git a/misc/images/tidb-debug/profile b/misc/images/tidb-debug/profile index 010f4d4e261..2fe47eb89c2 100644 --- a/misc/images/tidb-debug/profile +++ b/misc/images/tidb-debug/profile @@ -6,6 +6,7 @@ export LS_OPTIONS='--color=auto' # aliases alias ls='ls $LS_OPTIONS' +alias ll='ls -alF' function prompt { local GREENBOLD="\[\033[1;32m\]" diff --git a/misc/images/tidb-debug/run_flamegraph.sh b/misc/images/tidb-debug/run_flamegraph.sh old mode 100644 new mode 100755 diff --git a/pkg/tkctl/cmd/debug/debug.go b/pkg/tkctl/cmd/debug/debug.go index 72b6dcc95ab..c058c7342dd 100644 --- a/pkg/tkctl/cmd/debug/debug.go +++ b/pkg/tkctl/cmd/debug/debug.go @@ -74,6 +74,7 @@ type DebugOptions struct { Command []string HostDockerSocket string LauncherImage string + Privileged bool KubeCli *kubernetes.Clientset @@ -114,7 +115,8 @@ func NewCmdDebug(tkcContext *config.TkcContext, streams genericclioptions.IOStre "docker socket path of kubernetes node") cmd.Flags().StringVar(&options.LauncherImage, "launcher-image", options.LauncherImage, "image for launcher pod which is responsible to launch the debug container") - + cmd.Flags().BoolVar(&options.Privileged, "privileged", options.Privileged, + "whether launch container in privileged mode (full container capabilities)") return cmd } @@ -193,8 +195,11 @@ func (o *DebugOptions) makeLauncherPod(nodeName, containerID string, command []s o.Image, "--docker-socket", fmt.Sprintf("unix://%s", util.DockerSocket), - "--", } + if o.Privileged { + launchArgs = append(launchArgs, "--privileged") + } + launchArgs = append(launchArgs, "--") launchArgs = append(launchArgs, command...) return &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/tkctl/debug/launcher.go b/pkg/tkctl/debug/launcher.go index 747ad9a6713..01f44ae0ffe 100644 --- a/pkg/tkctl/debug/launcher.go +++ b/pkg/tkctl/debug/launcher.go @@ -29,6 +29,9 @@ import ( const ( defaultDockerSocket = "unix:///var/run/docker.sock" dockerContainerPrefix = "docker://" + + CAP_SYS_PTRACE = "SYS_PTRACE" + CAP_SYS_ADMIN = "SYS_ADMIN" ) type IOStreams struct { @@ -46,6 +49,8 @@ type Launcher struct { dockerSocket string ctx context.Context + privileged bool + client *dockerclient.Client } @@ -74,6 +79,8 @@ func NewLauncherCmd(streams IOStreams) *cobra.Command { "debug container image") cmd.Flags().StringVar(&launcher.dockerSocket, "docker-socket", launcher.dockerSocket, "docker socket to bind") + cmd.Flags().BoolVar(&launcher.privileged, "privileged", launcher.privileged, + "whether launch container in privileged mode (full container capabilities)") return cmd } @@ -136,6 +143,8 @@ func (l *Launcher) createContainer(command []string) (*container.ContainerCreate UsernsMode: container.UsernsMode(containerMode(dockerContainerID)), IpcMode: container.IpcMode(containerMode(dockerContainerID)), PidMode: container.PidMode(containerMode(dockerContainerID)), + CapAdd: strslice.StrSlice([]string{CAP_SYS_PTRACE, CAP_SYS_ADMIN}), + Privileged: l.privileged, } body, err := l.client.ContainerCreate(l.ctx, config, hostConfig, nil, "") if err != nil {