Skip to content

Commit

Permalink
[Bazel] Improve Mobile-Install Incremental Manifest Generating by app…
Browse files Browse the repository at this point in the history
…lying multi-thread

**Background**
While using `mobile-Install`, we noticed that it constantly takes more to run on incremental build. Take our app for example, the incremental build metrics for a single line kotlin code change looks like this:

| Command                   | Time |
|---------------------------|------|
| bazel build + adb install | 63s  |
| mobile-install            | 91s  |

After digging into it, I found that the bottleneck is the "Incremental Manifest Generating" action which takes a lot of time (35+ sec) for multidex build. The time is spent on the checksum calculation for all dex files. The SHA256 checksum for each dex file takes around 1-2 sec. Currently in our app we have 10 dex shards and each dex zip contains 2-3 dex files, processing them sequentially takes more than 30 seconds.

**Change**
In this PR, I added multithread support for this script so that the checksum calculation can be done concurrently and it improved the "Incremental Manifest Generating" to be done in 6 second (80%+ improvement).

**Result & Test**
After applying this change, the total incremental build time has been reduced to 43 seconds, with a 30s+ improvement from `Incremental Manifest Generating` step.
Before:
<img width=400 src="https://user-images.githubusercontent.com/6951238/92814439-f678ef80-f377-11ea-967f-92767a08587e.png">

After:
<img width=400 src="https://user-images.githubusercontent.com/6951238/92814445-fb3da380-f377-11ea-8de6-ff8c6b77c3f8.png">

You can also easily verify this from command line:
```
jchen tmp % time python ../build_incremental_dexmanifest.py ../output/outmanifest.txt shard1.dex.zip  shard10.dex.zip shard2.dex.zip  shard3.dex.zip  shard4.dex.zip  shard5.dex.zip  shard6.dex.zip  shard7.dex.zip  shard8.dex.zip  shard9.dex.zip
python ../build_incremental_dexmanifest.py ../output/outmanifest.txt           0.70s user 0.72s system 31% cpu 4.583 total
jchen tmp % time python ../build_incremental_dexmanifest_before.py ../output/outmanifest.txt shard1.dex.zip  shard10.dex.zip shard2.dex.zip  shard3.dex.zip  shard4.dex.zip  shard5.dex.zip  shard6.dex.zip  shard7.dex.zip  shard8.dex.zip  shard9.dex.zip
python ../build_incremental_dexmanifest_before.py ../output/outmanifest.txt    0.65s user 0.64s system 3% cpu 37.908 total
```

Closes #12085.

PiperOrigin-RevId: 340996883
  • Loading branch information
ThomasCJY authored and copybara-github committed Nov 6, 2020
1 parent f9f8ce7 commit 1049fe8
Showing 1 changed file with 53 additions and 21 deletions.
74 changes: 53 additions & 21 deletions tools/android/build_incremental_dexmanifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@

import hashlib
import os
from queue import Queue
import shutil
import sys
import tempfile
from threading import Thread
import zipfile


Expand All @@ -47,6 +49,8 @@ def __init__(self):
self.output_dex_counter = 1
self.checksums = set()
self.tmpdir = None
self.queue = Queue()
self.threads_list = list()

def __enter__(self):
self.tmpdir = tempfile.mkdtemp()
Expand All @@ -55,8 +59,14 @@ def __enter__(self):
def __exit__(self, unused_type, unused_value, unused_traceback):
shutil.rmtree(self.tmpdir, True)

def Checksum(self, filename):
"""Compute the SHA-256 checksum of a file."""
def Checksum(self, filename, input_dex_or_zip, zippath):
"""Compute the SHA-256 checksum of a file.
This method could be invoked concurrently.
Therefore we need to include other metadata like input_dex_or_zip to
keep the context.
"""
h = hashlib.sha256()
with open(filename, "rb") as f:
while True:
Expand All @@ -66,30 +76,42 @@ def Checksum(self, filename):

h.update(data)

return h.hexdigest()
return h.hexdigest(), input_dex_or_zip, zippath

def AddDex(self, input_dex_or_zip, zippath, dex):
"""Adds a dex file to the output.
def AddDexes(self, dex_metadata_list):
"""Adds all dex file together to the output.
Sort the result to make sure the dexes order are always the same given
the same input.
Args:
input_dex_or_zip: the input file written to the manifest
zippath: the zip path written to the manifest or None if the input file
is not a .zip .
dex: the dex file to be added
dex_metadata_list: A list of [fs_checksum, input_dex_or_zip, zippath],
where fs_checksum is the SHA-256 checksum for dex file, input_dex_or_zip
is the input file written to the manifest, zippath is the zip path
written to the manifest or None if the input file is not a .zip.
Returns:
None.
"""

fs_checksum = self.Checksum(dex)
if fs_checksum in self.checksums:
return

self.checksums.add(fs_checksum)
zip_dex = "incremental_classes%d.dex" % self.output_dex_counter
self.output_dex_counter += 1
self.manifest_lines.append("%s %s %s %s" %(
input_dex_or_zip, zippath if zippath else "-", zip_dex, fs_checksum))
dex_metadata_list_sorted = sorted(
dex_metadata_list, key=lambda x: (x[1], x[2]))
for dex_metadata in dex_metadata_list_sorted:
fs_checksum, input_dex_or_zip, zippath = dex_metadata[0], dex_metadata[
1], dex_metadata[2]
if fs_checksum in self.checksums:
return
self.checksums.add(fs_checksum)
zip_dex = "incremental_classes%d.dex" % self.output_dex_counter
self.output_dex_counter += 1
self.manifest_lines.append(
"%s %s %s %s" %
(input_dex_or_zip, zippath if zippath else "-", zip_dex, fs_checksum))

def ComputeChecksumConcurrently(self, input_dex_or_zip, zippath, dex):
"""Call Checksum concurrently to improve build performance when an app contains multiple dex files."""
t = Thread(target=lambda q, arg1, arg2, arg3: q.put(self.Checksum(arg1, arg2, arg3)), \
args=(self.queue, dex, input_dex_or_zip, zippath))
t.start()
self.threads_list.append(t)

def Run(self, argv):
"""Creates a dex manifest."""
Expand All @@ -116,9 +138,19 @@ def Run(self, argv):

input_dex_zip.extract(input_dex_dex, input_dex_dir)
fs_dex = input_dex_dir + "/" + input_dex_dex
self.AddDex(input_filename, input_dex_dex, fs_dex)
self.ComputeChecksumConcurrently(input_filename, input_dex_dex,
fs_dex)
elif input_filename.endswith(".dex"):
self.AddDex(input_filename, None, input_filename)
self.ComputeChecksumConcurrently(input_filename, None, input_filename)
# Collect results from all threads
for t in self.threads_list:
t.join()

results = []
while not self.queue.empty():
fs_checksum, input_dex_or_zip, zippath = self.queue.get()
results.append([fs_checksum, input_dex_or_zip, zippath])
self.AddDexes(results)

with open(argv[0], "wb") as manifest:
manifest.write(("\n".join(self.manifest_lines)).encode("utf-8"))
Expand Down

0 comments on commit 1049fe8

Please sign in to comment.