Skip to content
This repository has been archived by the owner on Jun 24, 2021. It is now read-only.

Commit

Permalink
Update Ansible for Lightning 2.1.9 (#750)
Browse files Browse the repository at this point in the history
* group_vars: change default Lightning and Importer ports to 8287 & 8289

* roles: include the new Lightning and Importer config items

* roles: added scripts/tidb_lightning_ctl.sh to easily run tidb-lighting-ctl

* roles: further importer config changes

* Update lightning config

* fix typo
  • Loading branch information
kennytm authored and liubo0127 committed May 10, 2019
1 parent 038841c commit 1e946f8
Show file tree
Hide file tree
Showing 9 changed files with 216 additions and 62 deletions.
93 changes: 74 additions & 19 deletions conf/tidb-lightning.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,20 @@
lightning:
# check if the cluster satisfies the minimum requirement before starting
# check-requirements = true
# table-concurrency controls the maximum handled tables concurrently while reading Mydumper SQL files. It can affect the tikv-importer memory usage amount.
# table-concurrency must be <= max-open-engines value in tikv-importer.tmol
table-concurrency: 8

# table-concurrency controls the maximum handled tables concurrently while reading Mydumper SQL files.
# index-concurrency controls the maximum handled index concurrently while reading Mydumper SQL files.
# They can affect the tikv-importer memory and disk usage.
# table-concurrency + index-concurrency must be <= max-open-engines value in tikv-importer.tmol
index-concurrency: 2
table-concurrency: 6
# region-concurrency changes the concurrency number of data. It is set to the number of logical CPU cores by default and needs no configuration.
# in mixed configuration, you can set it to 75% of the size of logical CPU cores.
# region-concurrency default to runtime.NumCPU()
# region-concurrency =
# region-concurrency:

# io-concurrency controls the maximum IO concurrency
io-concurrency: 5

# logging
level: "info"
Expand All @@ -25,32 +32,67 @@ checkpoint:
enable: true
# The schema name (database name) to store the checkpoints
schema: "tidb_lightning_checkpoint"
# The data source name (DSN) in the form "USER:PASS@tcp(HOST:PORT)/".
# If not specified, the TiDB server from the [tidb] section will be used to store the checkpoints. You could also
# specify a different MySQL-compatible database server if you like.
#dsn: "root@tcp(127.0.0.1:4000)/"
# Where to store the checkpoints.
# Set to "file" to store as a local file.
# Set to "mysql" to store into a remote MySQL-compatible database
# driver: "file"
# The data source name (DSN) indicating the location of the checkpoint storage.
# For "file" driver, the DSN is a path. If not specified, Lightning would default to "/tmp/CHKPTSCHEMA.pb".
# For "mysql" driver, the DSN is a URL in the form "USER:PASS@tcp(HOST:PORT)/".
# If not specified, the TiDB server from the [tidb] section will be used to store the checkpoints.
# dsn: "/tmp/tidb_lightning_checkpoint.pb"
# Whether to keep the checkpoints after all data are imported. If false, the checkpoints will be deleted. The schema
# needs to be dropped manually, however.
#keep-after-success: false
# keep-after-success: false

tikv_importer:
# the listening address of tikv-importer. Change it to the actual address in tikv-importer.toml.
# addr: "0.0.0.0:20170"
# size of batch to import KV data into TiKV: xxx (GB)
batch-size: 500 # GB
# addr: "0.0.0.0:8287"

mydumper:
# block size of file reading
read-block-size: 65536 # Byte (default = 64 KB)
# divide source data file into multiple Region/chunk to execute restoring in parallel
region-min-size: 268435456 # Byte (default = 256 MB)

# minimum size (in terms of source data file) of each batch of import.
# Lightning will split a large table into multiple engine files according to this size.
# batch-size: 107374182400 # Byte (default = 100 GiB)

# Engine file needs to be imported sequentially. Due to table-concurrency, multiple engines will be
# imported nearly the same time, and this will create a queue and this wastes resources. Therefore,
# Lightning will slightly increase the size of the first few batches to properly distribute
# resources. The scale up is controlled by this parameter, which expresses the ratio of duration
# between the "import" and "write" steps with full concurrency. This can be calculated as the ratio
# (import duration / write duration) of a single table of size around 1 GB. The exact timing can be
# found in the log. If "import" is faster, the batch size anomaly is smaller, and a ratio of
# zero means uniform batch size. This value should be in the range (0 <= batch-import-ratio < 1).
# batch-import-ratio: 0.75

# the source data directory of Mydumper. tidb-lightning will automatically create the corresponding database and tables based on the schema file in the directory.
# data-source-dir: "/data/mydumper"
# If no-schema is set to true, tidb-lightning will obtain the table schema information from tidb-server,
# instead of creating the database or tables based on the schema file of data-source-dir.
# This applies to manually creating tables or the situation where the table schema exits in TiDB.
no-schema: false

# the character set of the schema files; only supports one of:
# - utf8mb4: the schema files must be encoded as UTF-8, otherwise will emit errors
# - gb18030: the schema files must be encoded as GB-18030, otherwise will emit errors
# - auto: (default) automatically detect if the schema is UTF-8 or GB-18030, error if the encoding is neither
# - binary: do not try to decode the schema files
# note that the *data* files are always parsed as binary regardless of schema encoding.
# character-set: "auto"

# CSV files are imported according to MySQL's LOAD DATA INFILE rules.
# See https://pingcap.com/docs/tools/lightning/csv/ for details of these settings
csv:
separator: ','
delimiter: '"'
header: true
not-null: false
'null': \N
backslash-escape: true
trim-last-separator: false

# configuration for TiDB (pick one of them if it has many TiDB servers) and the PD server.
tidb:
# the target cluster information
Expand All @@ -63,17 +105,30 @@ tidb:
# status-port: 10080
# Lightning uses some code of TiDB (used as a library) and the flag controls its log level.
log-level: "error"
# set TiDB session variable to speed up performing the Checksum or Analyze operation on the table.
distsql-scan-concurrency: 16

# Set tidb session variables to speed up checksum/analyze table.
# See https://pingcap.com/docs/sql/statistics/#control-analyze-concurrency for the meaning of each setting
build-stats-concurrency: 20
distsql-scan-concurrency: 100
index-serial-scan-concurrency: 20
checksum-table-concurrency: 16

# cron performs some periodic actions in background
cron:
# duration between which Lightning will automatically refresh the import mode status.
# should be shorter than the corresponding TiKV setting
switch-mode: '5m'
# the duration which the an import progress will be printed to the log.
log-progress: '5m'

# post-restore provide some options which will be executed after all kv data has been imported into the tikv cluster.
# the execution order are(if set true): checksum -> compact -> analyze
post_restore:
# if it is set to true, tidb-lightning will perform the ADMIN CHECKSUM TABLE <table> operation on the tables one by one.
checksum: true
# if it is set to true, tidb-lightning will perform a full Compact operation on all the data.
# If the Compact operation fails, you can use ./bin/tidb-lightning -compact or the command of tikv-ctl to compact the data manually.
compact: true
# compaction is performed automatically starting v2.1.6. These settings should be left as `false`.
# level-1-compact: false
# compact: false
# if it is set to true, tidb-lightning will perform the ANALYZE TABLE <table> operation on the tables one by one.
# If the Analyze operation fails, you can analyze data manually on the Mysql client.
analyze: true
22 changes: 16 additions & 6 deletions conf/tikv-importer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,14 @@ rocksdb:
write-buffer-size: "1GB"
# the maximum number of write buffers that are built up in memory.
max-write-buffer-number: 8

# the compression algorithms used in different levels.
# the algorithm at level-0 is used to compress KV data.
# the algorithm at level-6 is used to compress SST files.
# the algorithms at level-1 ~ level-5 are not used now.
compression-per-level: ["lz4", "no", "no", "no", "no", "no", "zstd"]
compression-per-level: ["lz4", "no", "no", "no", "no", "no", "lz4"]
writecf:
compression-per-level: ["lz4", "no", "no", "no", "no", "no", "lz4"]

import:
# this directory is used to store the data written by `tidb-lightning`.
Expand All @@ -43,12 +45,20 @@ import:
num-threads: 16
# the number of concurrent import jobs.
num-import-jobs: 24
# the stream channel window size. Stream will be blocked when the channel is full.
# the stream channel window size. Stream will be blocked when the channel is full.
stream-channel-window: 128
# maximum duration to prepare regions.
# max-prepare-duration = "5m"
# split regions into this size according to the importing data.
# region-split-size = "96MB"
# maximum number of open engines, max-open-engines must be >= table-concurrency value in tidb-lightning.toml
# region-split-size = "512MB"
# max-open-engines must be >= index-concurrency + table-concurrency value in tidb-lightning.toml
max-open-engines: 8
# write-buffer-size * max-write-buffer-number * max-open-engines should less than machine's memory size
# speed limit of uploading SST to TiKV (unit: byte/s)
# upload-speed-limit: "512MB"
# minimum ratio of target store available space: store_available_space / store_capacity
# Importer will pause to upload SST to target store if its available ratio less than
# this value, and give the store some time window to balance regions.
min-available-ratio: 0.05

# Note: the machine's memory size should be more than
# (write-buffer-size * max-write-buffer-number * 2) + (num-import-jobs * region-split-size * 2)
2 changes: 1 addition & 1 deletion group_vars/all.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ docker_bin_dir: "/usr/bin"
retry_stagger: 5

# the listening address of tikv-importer. tidb-lightning needs to connect to this address to write data. Set it to the actual IP address.
tikv_importer_port: 20170
tikv_importer_port: 8287

# deployment methods, [binary, docker] docker deployment method is not recommended and deprecated.
deployment_method: binary
Expand Down
2 changes: 1 addition & 1 deletion group_vars/lightning_server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
dummy:

# background profile for debugging
tidb_lightning_pprof_port: 10089
tidb_lightning_pprof_port: 8289

# the source data directory of Mydumper
data_source_dir: "{{ deploy_dir }}/mydumper"
Expand Down
9 changes: 5 additions & 4 deletions roles/tidb_lightning/tasks/binary_deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@

- name: create run script
template:
src: "{{ item }}_lightning_binary.sh.j2"
dest: "{{ deploy_dir }}/scripts/{{ item }}_lightning.sh"
src: "{{ item }}_binary.sh.j2"
dest: "{{ deploy_dir }}/scripts/{{ item }}.sh"
mode: "0755"
backup: yes
with_items:
- start
- stop
- start_lightning
- stop_lightning
- tidb_lightning_ctl
register: lightning_script

- name: backup script file
Expand Down
20 changes: 17 additions & 3 deletions roles/tidb_lightning/templates/tidb-lightning.toml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,24 @@

[mydumper]
{% for item, value in tidb_lightning_conf.mydumper | dictsort -%}
{{ item }} = {{ value | to_json}}
{% if item != 'csv' -%}
{{ item }} = {{ value | to_json }}
{% endif -%}
{% endfor %}

{% if tidb_lightning_conf.mydumper.csv -%}
[mydumper.csv]
{% for item, value in tidb_lightning_conf.mydumper.csv | dictsort -%}
{{ item }} = {{ value | to_json }}
{% endfor %}
{% endif -%}

[tidb]
{% for item, value in tidb_lightning_conf.tidb | dictsort -%}
{% if item == "port" or item == "status-port" %}
{{ item }} = {{ value | int}}
{{ item }} = {{ value | int }}
{% else %}
{{ item }} = {{ value | to_json}}
{{ item }} = {{ value | to_json }}
{% endif %}
{% endfor %}
pd-addr = "{{ all_pd |join(',') }}"
Expand All @@ -40,3 +49,8 @@ pd-addr = "{{ all_pd |join(',') }}"
{% for item, value in tidb_lightning_conf.post_restore | dictsort -%}
{{ item }} = {{ value | to_json }}
{% endfor %}

[cron]
{% for item, value in tidb_lightning_conf.cron | dictsort -%}
{{ item }} = {{ value | to_json }}
{% endfor %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
set -e
ulimit -n 1000000
cd "{{ deploy_dir }}" || exit 1

export TZ={{ timezone }}

./bin/tidb-lightning-ctl -config ./conf/tidb-lightning.toml "$@"
Loading

0 comments on commit 1e946f8

Please sign in to comment.