From a10c89a0bd4463de29045c7bd0061b18e5ba7a3d Mon Sep 17 00:00:00 2001 From: zburning <798672141@qq.com> Date: Fri, 25 Oct 2019 11:25:38 +0800 Subject: [PATCH 01/59] Update transformer.py --- .../language_model/transformer/transformer.py | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/scripts/language_model/transformer/transformer.py b/scripts/language_model/transformer/transformer.py index 82e16705c0..32d5909c77 100644 --- a/scripts/language_model/transformer/transformer.py +++ b/scripts/language_model/transformer/transformer.py @@ -486,6 +486,45 @@ def hybrid_forward(self, F, inputs, pos_emb, mem_value, mask, segments): class _BaseXLNet(mx.gluon.HybridBlock): + """ + Parameters + ---------- + vocab_size : int or None, default None + The size of the vocabulary. + num_layers : int + units : int + hidden_size : int + number of units in the hidden layer of position-wise feed-forward networks + num_heads : int + Number of heads in multi-head attention + activation + Activation function used for the position-wise feed-forward networks + two_stream + If True, use Two-Stream Self-Attention. Typically set to True for + pre-training and False during finetuning. + scaled : bool + Whether to scale the softmax input by the sqrt of the input dimension + in multi-head attention + dropout : float + attention_dropout : float + use_residual : bool + clamp_len : int + Clamp all relative distances larger than clamp_len + use_decoder : bool, default True + Whether to include the decoder for language model prediction. + tie_decoder_weight : bool, default True + Whether to tie the decoder weight with the input embeddings + weight_initializer : str or Initializer + Initializer for the input weights matrix, used for the linear + transformation of the inputs. + bias_initializer : str or Initializer + Initializer for the bias vector. + prefix : str, default 'rnn_' + Prefix for name of `Block`s (and name of weight if params is `None`). + params : Parameter or None + Container for weight sharing between cells. Created if `None`. + + """ def __init__(self, vocab_size, num_layers=2, units=128, hidden_size=2048, num_heads=4, activation='gelu', two_stream: bool = False, scaled=True, dropout=0.0, attention_dropout=0.0, use_residual=True, clamp_len: typing.Optional[int] = None, @@ -529,6 +568,33 @@ def __init__(self, vocab_size, num_layers=2, units=128, hidden_size=2048, num_he params=self.word_embed.params if tie_decoder_weight else None) def hybrid_forward(self, F, step_input, segments, mask, pos_seq, mems, mask_embed): #pylint: disable=arguments-differ + """Transformer Decoder Attention Cell. + + Parameters + ---------- + step_input : NDArray + Input of shape [batch_size, query_length] + segments : Symbol or NDArray + One-hot vector indicating if a query-key pair is in the same + segment or not. Shape [batch_size, query_length, query_length + + memory_length, 2]. `1` indicates that the pair is not in the same + segment. + mask : Symbol or NDArray + Attention mask of shape (batch_size, length, length + mem_length) + pos_seq : Symbol or NDArray + Relative distances + mems : List of NDArray or Symbol, optional + Memory from previous forward passes containing + `num_layers` `NDArray`s or `Symbol`s each of shape [batch_size, + memory_length, units]. + + Returns + ------- + core_out : NDArray or Symbol + For use_decoder=True, logits. Otherwise output of last layer. + hids : List of NDArray or Symbol + Stacking the output of each layer + """ if self._clamp_len: pos_seq = F.clip(pos_seq, a_min=0, a_max=self._clamp_len) @@ -635,6 +701,8 @@ def forward(self, step_input, token_types, mems=None, mask=None): # pylint: dis Optional memory from previous forward passes containing `num_layers` `NDArray`s or `Symbol`s each of shape [batch_size, memory_length, units]. + mask : Symbol or NDArray + Attention mask of shape (batch_size, length, length + mem_length) Returns ------- From 23be6c639dc0f1b5a7b2322e8c3f1572457eea00 Mon Sep 17 00:00:00 2001 From: zburning <798672141@qq.com> Date: Fri, 25 Oct 2019 11:33:12 +0800 Subject: [PATCH 02/59] Update transformer.py --- scripts/language_model/transformer/transformer.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/language_model/transformer/transformer.py b/scripts/language_model/transformer/transformer.py index 32d5909c77..362d3c204b 100644 --- a/scripts/language_model/transformer/transformer.py +++ b/scripts/language_model/transformer/transformer.py @@ -489,7 +489,7 @@ class _BaseXLNet(mx.gluon.HybridBlock): """ Parameters ---------- - vocab_size : int or None, default None + vocab_size : int The size of the vocabulary. num_layers : int units : int @@ -568,11 +568,10 @@ def __init__(self, vocab_size, num_layers=2, units=128, hidden_size=2048, num_he params=self.word_embed.params if tie_decoder_weight else None) def hybrid_forward(self, F, step_input, segments, mask, pos_seq, mems, mask_embed): #pylint: disable=arguments-differ - """Transformer Decoder Attention Cell. - + """ Parameters ---------- - step_input : NDArray + step_input : Symbol or NDArray Input of shape [batch_size, query_length] segments : Symbol or NDArray One-hot vector indicating if a query-key pair is in the same From 090a6cd70826f2fcad2ae9d46fab2748aac2efab Mon Sep 17 00:00:00 2001 From: zburning <798672141@qq.com> Date: Fri, 25 Oct 2019 13:11:05 +0800 Subject: [PATCH 03/59] Update transformer.py --- scripts/language_model/transformer/transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/language_model/transformer/transformer.py b/scripts/language_model/transformer/transformer.py index 362d3c204b..f0c86e9a81 100644 --- a/scripts/language_model/transformer/transformer.py +++ b/scripts/language_model/transformer/transformer.py @@ -568,7 +568,7 @@ def __init__(self, vocab_size, num_layers=2, units=128, hidden_size=2048, num_he params=self.word_embed.params if tie_decoder_weight else None) def hybrid_forward(self, F, step_input, segments, mask, pos_seq, mems, mask_embed): #pylint: disable=arguments-differ - """ + """ Parameters ---------- step_input : Symbol or NDArray From 358169f3cc106da7c39cd70d0855f2389e74e76a Mon Sep 17 00:00:00 2001 From: zburning <798672141@qq.com> Date: Sat, 26 Oct 2019 10:03:00 +0800 Subject: [PATCH 04/59] Update transformer.py deleting trailing white space --- scripts/language_model/transformer/transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/language_model/transformer/transformer.py b/scripts/language_model/transformer/transformer.py index f0c86e9a81..e154c468fd 100644 --- a/scripts/language_model/transformer/transformer.py +++ b/scripts/language_model/transformer/transformer.py @@ -586,7 +586,7 @@ def hybrid_forward(self, F, step_input, segments, mask, pos_seq, mems, mask_embe Memory from previous forward passes containing `num_layers` `NDArray`s or `Symbol`s each of shape [batch_size, memory_length, units]. - + Returns ------- core_out : NDArray or Symbol From 278340c673095a83593813bee9e6763f5d9d2ee2 Mon Sep 17 00:00:00 2001 From: zburning <798672141@qq.com> Date: Tue, 29 Oct 2019 11:28:40 +0800 Subject: [PATCH 05/59] Update transformer.py --- scripts/language_model/transformer/transformer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/language_model/transformer/transformer.py b/scripts/language_model/transformer/transformer.py index e154c468fd..e84e143cbe 100644 --- a/scripts/language_model/transformer/transformer.py +++ b/scripts/language_model/transformer/transformer.py @@ -519,9 +519,9 @@ class _BaseXLNet(mx.gluon.HybridBlock): transformation of the inputs. bias_initializer : str or Initializer Initializer for the bias vector. - prefix : str, default 'rnn_' + prefix : str, default None Prefix for name of `Block`s (and name of weight if params is `None`). - params : Parameter or None + params : ParameterDict or None Container for weight sharing between cells. Created if `None`. """ From b82b0e1c376b0f1a8d3c696b56ca27383b7ca5b8 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 2 Dec 2019 16:41:38 +0800 Subject: [PATCH 06/59] new features&test --- src/gluonnlp/data/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gluonnlp/data/__init__.py b/src/gluonnlp/data/__init__.py index 79ea84b9de..005a3f9950 100644 --- a/src/gluonnlp/data/__init__.py +++ b/src/gluonnlp/data/__init__.py @@ -38,10 +38,11 @@ from .utils import * from .word_embedding_evaluation import * from .intent_slot import * +from .data_preprocessing_transform import * __all__ = (['batchify'] + utils.__all__ + transforms.__all__ + sampler.__all__ + dataset.__all__ + corpora.__all__ + sentiment.__all__ + word_embedding_evaluation.__all__ + stream.__all__ + conll.__all__ + translation.__all__ + registry.__all__ + question_answering.__all__ + dataloader.__all__ + candidate_sampler.__all__ + intent_slot.__all__ - + glue.__all__) + + glue.__all__ + data_preprocessing_transform.__all__) From 8b160d9b34ce3692661c185ef211f0f99a04baa2 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 2 Dec 2019 17:15:05 +0800 Subject: [PATCH 07/59] fix lint --- src/gluonnlp/data/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gluonnlp/data/__init__.py b/src/gluonnlp/data/__init__.py index abaa1c4c11..01944db7f5 100644 --- a/src/gluonnlp/data/__init__.py +++ b/src/gluonnlp/data/__init__.py @@ -47,4 +47,3 @@ + translation.__all__ + registry.__all__ + question_answering.__all__ + dataloader.__all__ + candidate_sampler.__all__ + intent_slot.__all__ + glue.__all__ + data_preprocessing_transform.__all__) - From 6d553d6444675925140ad4a4493e3f4e21c62d5c Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 2 Dec 2019 17:29:59 +0800 Subject: [PATCH 08/59] fix lint --- src/gluonnlp/data/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gluonnlp/data/__init__.py b/src/gluonnlp/data/__init__.py index 01944db7f5..ca1159bc1c 100644 --- a/src/gluonnlp/data/__init__.py +++ b/src/gluonnlp/data/__init__.py @@ -21,7 +21,8 @@ from . import (batchify, candidate_sampler, conll, corpora, dataloader, dataset, question_answering, registry, sampler, sentiment, stream, transforms, translation, utils, - word_embedding_evaluation, intent_slot, glue) + word_embedding_evaluation, intent_slot, glue, + data_preprocessing_transform) from .candidate_sampler import * from .conll import * from .glue import * @@ -46,4 +47,5 @@ + word_embedding_evaluation.__all__ + stream.__all__ + conll.__all__ + translation.__all__ + registry.__all__ + question_answering.__all__ + dataloader.__all__ + candidate_sampler.__all__ + intent_slot.__all__ - + glue.__all__ + data_preprocessing_transform.__all__) + + glue.__all__ + data_preprocessing_transform.__all__) + # pytype: disable=attribute-error From bf9297b0bf182fd53e94fd6b86f78c6b6ea5f2f3 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 2 Dec 2019 17:37:53 +0800 Subject: [PATCH 09/59] fix lint --- src/gluonnlp/data/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gluonnlp/data/__init__.py b/src/gluonnlp/data/__init__.py index ca1159bc1c..07e5bfea87 100644 --- a/src/gluonnlp/data/__init__.py +++ b/src/gluonnlp/data/__init__.py @@ -47,5 +47,5 @@ + word_embedding_evaluation.__all__ + stream.__all__ + conll.__all__ + translation.__all__ + registry.__all__ + question_answering.__all__ + dataloader.__all__ + candidate_sampler.__all__ + intent_slot.__all__ - + glue.__all__ + data_preprocessing_transform.__all__) + + glue.__all__ + data_preprocessing_transform.__all__) # pytype: disable=attribute-error From 99b8ebc91bbe1fa79b5e27895cc7c1c1a1e26e62 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 2 Dec 2019 18:36:46 +0800 Subject: [PATCH 10/59] fix lint --- src/gluonnlp/data/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gluonnlp/data/__init__.py b/src/gluonnlp/data/__init__.py index 07e5bfea87..5bfb550d99 100644 --- a/src/gluonnlp/data/__init__.py +++ b/src/gluonnlp/data/__init__.py @@ -47,5 +47,4 @@ + word_embedding_evaluation.__all__ + stream.__all__ + conll.__all__ + translation.__all__ + registry.__all__ + question_answering.__all__ + dataloader.__all__ + candidate_sampler.__all__ + intent_slot.__all__ - + glue.__all__ + data_preprocessing_transform.__all__) - # pytype: disable=attribute-error + + glue.__all__ + data_preprocessing_transform.__all__)# pytype: disable=attribute-error From 203e319c126f9b69c297acfacd7080034f90c92a Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 3 Dec 2019 19:11:25 +0800 Subject: [PATCH 11/59] new --- .../data/data_preprocessing_transform.py | 217 +++++++++--------- src/gluonnlp/data/qa_preprocessing_utils.py | 52 +++++ tests/unittest/test_dataset_transform.py | 33 ++- 3 files changed, 187 insertions(+), 115 deletions(-) diff --git a/src/gluonnlp/data/data_preprocessing_transform.py b/src/gluonnlp/data/data_preprocessing_transform.py index e0208ffc74..e0b9fa7f51 100644 --- a/src/gluonnlp/data/data_preprocessing_transform.py +++ b/src/gluonnlp/data/data_preprocessing_transform.py @@ -1,99 +1,90 @@ -# Copyright 2018 The Google AI Language Team Authors and DMLC. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. """glue and squad transform.""" __all__ = [ - 'TruncateTransform', 'InsertTransform', 'TokenizeTransform', + 'TruncateTransform', 'ConcatSeqTransform', 'BertTStyleSentenceTransform', 'BertStyleGlueTransform', 'BertStyleSQuADTransform', 'SQuADExampleTransform', 'DocSpanTransform', 'TokenizeAndPositionAlignTransform', 'SimpleQAPreparation', 'SquadExample' ] import collections +from functools import partial import numpy as np from gluonnlp.data.utils import whitespace_splitter -from .qa_preprocessing_utils import truncate_seq_pair, improve_answer_span +from .qa_preprocessing_utils import truncate_seqs_equal, improve_answer_span class TruncateTransform: """ - Truncate a sequence(pair) to max length. + Truncate a sequence(list) to max length. Parameters ---------- max_len : int truncate_fn : callable - A function determines how to truncate the sequence pair + A function determines how to truncate the sequence (list). + The function should implement argument max_length Returns ------- - list : list of sequence + list : list of sequences or a single sequence """ - def __init__(self, max_len, truncate_fn=truncate_seq_pair): + def __init__(self, max_len, truncate_fn=truncate_seqs_equal): self._max_len = max_len - self.fn = truncate_fn + self.fn = partial(truncate_fn, max_length=max_len) def __call__(self, seqs): - assert isinstance(seqs, collections.Iterable) - if len(seqs) > 1: - # Modifies `tokens_a` and `tokens_b` in place so that the total - # length is less than the specified length. - # Account for [CLS], [SEP], [SEP] with "- 3" - token_a, token_b = seqs - self.fn(token_a, token_b, self._max_len) - return [token_a, token_b] + assert isinstance(seqs, collections.abc.Iterable) + if len(seqs) == 0: + return seqs + if isinstance(seqs[0], collections.abc.Iterable) and not isinstance( + seqs[0], str): + #if it contains a list of seqs + seqs = self.fn(seqs) + return seqs else: - # Account for [CLS] and [SEP] with "- 2" - if len(seqs[0]) > self._max_len - 2: - seqs = [seqs[0][0:(self._max_len - 2)]] - return [seqs[0]] + #if it is a single sequence + return seqs[:self._max_len] -class InsertTransform: - """Insert special tokens for sequence pairs or single sequences. - For sequence pairs, the input is a tuple of 2 strings: +class ConcatSeqTransform: + """Insert special tokens for sequence list or a single sequence. + For sequence pairs, the input is a list of 2 strings: text_a, text_b. Inputs: text_a: 'is this jacksonville ?' text_b: 'no it is not' - Tokenization: - text_a: 'is this jack ##son ##ville ?' - text_b: 'no it is not .' - Processed: - tokens: '[CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]' - type_ids: 0 0 0 0 0 0 0 0 1 1 1 1 1 1 - valid_length: 14 - - For single sequences, the input is a tuple of single string: - text_a. + start_token: [CLS] + token_after_seg: [[SEP], [SEP]] + end_token: None - Inputs: - text_a: 'the dog is hairy .' - Tokenization: - text_a: 'the dog is hairy .' Processed: - text_a: '[CLS] the dog is hairy . [SEP]' - type_ids: 0 0 0 0 0 0 0 - valid_length: 7 + tokens: '[CLS] is this jacksonville ? [SEP] no it is not . [SEP]' + segment_ids: 0 0 0 0 0 0 1 1 1 1 1 1 + p_mask: 0 0 0 0 0 1 0 0 0 0 0 1 + valid_length: 12 Parameters ---------- - line: tuple of str - Input strings. For sequence pairs, the input is a tuple of 2 strings: - (text_a, text_b). For single sequences, the input is a tuple of single - string: (text_a,). + vocab : Vocab + If vocab is not None. The tokens will be converted to ids before return + + token_after_seg : list + The special tokens to be appended to each sequence. For example: + Given: + seqs: [[1, 2], [3, 4], [5, 6]] + token_after_seg: [None, 7] + it will be: + [1, 2, 3, 4, 7, 5, 6] + + start_token : string + The special token to be added to the start + + end_token : string + The special token to be added to the end + + seqs : list of sequences or a single sequence Returns ------- @@ -102,64 +93,59 @@ class InsertTransform: np.array: valid length in 'int32', shape (batch_size,) np.array: mask for special tokens """ - def __init__(self, cls_token, sep_token, vocab, left_cls=True): - self._cls_token = cls_token - self._sep_token = sep_token - self._left_cls = left_cls + def __init__(self, + vocab=None, + token_after_seg=None, + start_token=None, + end_token=None): self._vocab = vocab + self._start_token = start_token + self._end_token = end_token + self._token_after_seg = token_after_seg if token_after_seg else [] - def __call__(self, token_truncated): - # The embedding vectors for `type=0` and `type=1` were learned during - # pre-training and are added to the wordpiece embedding vector - # (and position vector). This is not *strictly* necessary since - # the [SEP] token unambiguously separates the sequences, but it makes - # it easier for the model to learn the concept of sequences. - - # For classification tasks, the first/last vector (corresponding to [CLS]) is - # used as as the "sentence vector". Note that this only makes sense because - # the entire model is fine-tuned. - assert self._left_cls #currently we only support left cls + def __call__(self, seqs): + assert isinstance(seqs, collections.abc.Iterable) and len(seqs) > 0 tokens = [] - tokens_a = token_truncated[0] if isinstance(token_truncated, - list) else token_truncated - if self._left_cls: - tokens.append(self._cls_token) - + if self._start_token: + tokens.append(self._start_token) + tokens_a = seqs if not isinstance( + seqs[0], collections.abc.Iterable) else seqs[0] tokens.extend(tokens_a) p_mask = [0] * len(tokens) + sp_token_counter = 0 + + if sp_token_counter < len(self._token_after_seg): + if self._token_after_seg[sp_token_counter]: + tokens.append(self._token_after_seg[sp_token_counter]) + sp_token_counter += 1 - tokens.append(self._sep_token) segment_ids = [0] * len(tokens) p_mask.append(1) - - if isinstance(token_truncated, list) and len(token_truncated) == 2: - tokens_b = token_truncated[1] - tokens.extend(tokens_b) - p_mask.extend([0] * (len(tokens) - len(p_mask))) - tokens.append(self._sep_token) - p_mask.append(1) - if not self._left_cls: - tokens.append(self._cls_token) - segment_ids.extend([1] * (len(tokens) - len(segment_ids))) - - input_ids = self._vocab[tokens] + seqs = seqs[1:] + + if len(seqs) > 0 and isinstance(seqs[0], collections.abc.Iterable): + # if seqs is a list of sequence + for (i, seq) in enumerate(seqs): + tokens_b = seq + tokens.extend(tokens_b) + p_mask.extend([0] * (len(tokens) - len(p_mask))) + if sp_token_counter < len(self._token_after_seg): + if self._token_after_seg[sp_token_counter]: + tokens.append(self._token_after_seg[sp_token_counter]) + p_mask.append(1) + sp_token_counter += 1 + segment_ids.extend([i + 1] * (len(tokens) - len(segment_ids))) + + if self._end_token: + tokens.append(self._end_token) + p_mask.append(0) + segment_ids.append(len(seqs)) + + if self._vocab: + tokens = self._vocab[tokens] # The valid length of sentences. Only real tokens are attended to. - valid_length = len(input_ids) - return input_ids, segment_ids, valid_length, p_mask - - -class TokenizeTransform: - """ - Tokenize a sequence or a list of sequence - """ - def __init__(self, tokenizer): - self._tokenizer = tokenizer - - def __call__(self, tokens): - if isinstance(tokens, collections.abc.Iterable): - return [self._tokenizer(token) for token in tokens] - else: - return [self._tokenizer(tokens)] + valid_length = len(tokens) + return tokens, segment_ids, valid_length, p_mask class BertTStyleSentenceTransform: @@ -181,11 +167,10 @@ class BertTStyleSentenceTransform: def __init__(self, tokenizer, max_seq_length=None, - vocab=None, - left_cls=True): + vocab=None): assert tokenizer.vocab or vocab self.Truncate = TruncateTransform(max_len=max_seq_length) - self.Tokenizer = TokenizeTransform(tokenizer) + self._tokenizer = tokenizer self._vocab = tokenizer.vocab if vocab is None else vocab # RoBERTa does not register CLS token and SEP token if hasattr(self._vocab, 'cls_token'): @@ -196,14 +181,14 @@ def __init__(self, self._sep_token = self._vocab.sep_token else: self._sep_token = self._vocab.eos_token + self._token_after_seg = [self._sep_token] * 2 - self.InsertSpecialTokens = InsertTransform(self._cls_token, - self._sep_token, - self._vocab, - left_cls=left_cls) + self.InsertSpecialTokens = ConcatSeqTransform(self._vocab, + self._token_after_seg, + self._cls_token) def __call__(self, line): - tokens_raw = self.Tokenizer(line) + tokens_raw = [self._tokenizer(l) for l in line] tokens_trun = self.Truncate(tokens_raw) input_ids, segment_ids, valid_length, _ = self.InsertSpecialTokens( tokens_trun) @@ -236,6 +221,10 @@ def __init__(self, for key in self.label_alias: self._label_map[key] = self._label_map[ self.label_alias[key]] + if len(class_labels) > 1: + max_seq_length += 3 # account for special tokens + else: + max_seq_length += 2 self.sentense_transform = BertTStyleSentenceTransform( tokenizer, max_seq_length=max_seq_length, vocab=vocab) @@ -418,7 +407,7 @@ class SimpleQAPreparation: Note that this class does not check if max span. """ def __init__(self, cls_token, sep_token, vocab, is_training): - self.insert = InsertTransform(cls_token, sep_token, vocab) + self.insert = ConcatSeqTransform(cls_token, sep_token, vocab) self.is_training = is_training def __call__(self, diff --git a/src/gluonnlp/data/qa_preprocessing_utils.py b/src/gluonnlp/data/qa_preprocessing_utils.py index 83f38da898..c546933da5 100644 --- a/src/gluonnlp/data/qa_preprocessing_utils.py +++ b/src/gluonnlp/data/qa_preprocessing_utils.py @@ -1,5 +1,7 @@ """Utility classes and functions for qa data processing""" +import collections + def truncate_seq_pair(tokens_a, tokens_b, max_length): """Truncates a sequence pair in place to the maximum length.""" @@ -17,6 +19,56 @@ def truncate_seq_pair(tokens_a, tokens_b, max_length): tokens_b.pop() +def truncate_seqs_equal(seqs, max_length): + """ + truncate a list of seqs so that the total length equals max length. + Trying to truncate the seqs to equal length. + + Returns + ------- + list : list of truncated sequence keeping the origin order + """ + assert isinstance(seqs, list) and isinstance(seqs[0], + collections.abc.Iterable) + tokens_to_remove = sum(list(map(len, seqs))) - max_length + if tokens_to_remove <= 0: + return seqs + if len(seqs) == 1: + return [seqs[0][:-tokens_to_remove]] + + seq_len = list(map(lambda a: (len(a), a), seqs)) + seq_len = [[s[0], [i, s[1]]] for (i, s) in enumerate(seq_len)] + seq_len.sort(key=lambda a: a[0], reverse=True) + prev = seq_len[0] + count_removed = 0 + truncate_to = seq_len[0][0] + remain = 0 + for (i, seq) in enumerate(seq_len): + cur_remove = (prev[0] - seq_len[i][0]) * i + if count_removed + cur_remove < tokens_to_remove: + count_removed += cur_remove + prev = seq_len[i] + else: + truncate_to = prev[0] - (tokens_to_remove - count_removed) // i + remain = (tokens_to_remove - count_removed) % i + break + + if count_removed < tokens_to_remove and prev[1][0] == seq_len[-1][1][0]: + truncate_to = prev[0] - (tokens_to_remove - + count_removed) // len(seq_len) + remain = (tokens_to_remove - count_removed) % len(seq_len) + + for seq in seq_len: + seq[1][1] = seq[1][1][:truncate_to] + if remain > 0: + seq[1][1].pop() + remain -= 1 + seq_len.sort(key=lambda a: a[1][0]) + ret = [a[1][1] for a in seq_len] + assert sum(list(map(len, ret))) == max_length + return ret + + def improve_answer_span(doc_tokens, input_start, input_end, tokenizer, orig_answer_text): """Returns tokenized answer spans that better match the annotated answer.""" diff --git a/tests/unittest/test_dataset_transform.py b/tests/unittest/test_dataset_transform.py index a3a7439096..dcb0de51c1 100644 --- a/tests/unittest/test_dataset_transform.py +++ b/tests/unittest/test_dataset_transform.py @@ -19,7 +19,9 @@ import numpy as np from gluonnlp.vocab import BERTVocab -from gluonnlp.data import count_tokens, BERTTokenizer, BertStyleGlueTransform, BertStyleSQuADTransform +from gluonnlp.data import count_tokens, BERTTokenizer, \ + BertStyleGlueTransform, BertStyleSQuADTransform, TruncateTransform, \ + ConcatSeqTransform def test_bertstyle_glue_dataset_transform(): @@ -140,3 +142,32 @@ def test_bertstyle_squad_dataset_transform(): assert end_label == 0 assert is_impossible == True +def test_truncate(): + seqs = [[j*i for j in range(i)] for i in range(1,10)] + res1 = [[0], [0, 2], [0, 3, 6], [0, 4, 8], [0, 5, 10], [0, 6], [0, 7], [0, 8], [0, 9]] + seq = [i for i in range(20)] + res3 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + trunc = TruncateTransform(20) + assert all(np.array(trunc(seqs)) == np.array(res1)) + assert all(np.array(trunc(seq)) == np.array(res3)) + +def test_concat_sequence(): + seqs = [[3 * i + j for j in range(3)] for i in range(3)] + start_token = -1 + end_token = 999 + middle_tokens = ['a', 'b', 'c'] + concat = ConcatSeqTransform(start_token=start_token, token_after_seg=middle_tokens, end_token=end_token) + res = concat(seqs) + assert res[0] == [-1, 0, 1, 2, 'a', 3, 4, 5, 'b', 6, 7, 8, 'c', 999] + assert res[1] == [0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2] + assert res[2] == 14 + assert res[3] == [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0] + + middle_tokens = ['a', None, 'b'] + concat = ConcatSeqTransform(start_token=start_token, token_after_seg=middle_tokens, end_token=end_token) + res = concat(seqs) + assert res[0] == [-1, 0, 1, 2, 'a', 3, 4, 5, 6, 7, 8, 'b', 999] + assert res[1] == [0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2] + assert res[2] == 13 + assert res[3] == [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0] + From 1e81e55e606d387ec8f5584a8a5555bd6de2300f Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 4 Dec 2019 13:30:38 +0800 Subject: [PATCH 12/59] fix lint --- .../data/data_preprocessing_transform.py | 88 +++++++++---------- src/gluonnlp/data/qa_preprocessing_utils.py | 2 +- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/src/gluonnlp/data/data_preprocessing_transform.py b/src/gluonnlp/data/data_preprocessing_transform.py index e0b9fa7f51..62bf54e619 100644 --- a/src/gluonnlp/data/data_preprocessing_transform.py +++ b/src/gluonnlp/data/data_preprocessing_transform.py @@ -49,50 +49,50 @@ def __call__(self, seqs): class ConcatSeqTransform: """Insert special tokens for sequence list or a single sequence. - For sequence pairs, the input is a list of 2 strings: - text_a, text_b. - - Inputs: - text_a: 'is this jacksonville ?' - text_b: 'no it is not' - start_token: [CLS] - token_after_seg: [[SEP], [SEP]] - end_token: None - - Processed: - tokens: '[CLS] is this jacksonville ? [SEP] no it is not . [SEP]' - segment_ids: 0 0 0 0 0 0 1 1 1 1 1 1 - p_mask: 0 0 0 0 0 1 0 0 0 0 0 1 - valid_length: 12 - - Parameters - ---------- - vocab : Vocab - If vocab is not None. The tokens will be converted to ids before return - - token_after_seg : list - The special tokens to be appended to each sequence. For example: - Given: - seqs: [[1, 2], [3, 4], [5, 6]] - token_after_seg: [None, 7] - it will be: - [1, 2, 3, 4, 7, 5, 6] - - start_token : string - The special token to be added to the start - - end_token : string - The special token to be added to the end - - seqs : list of sequences or a single sequence - - Returns - ------- - np.array: input token ids in 'int32', shape (batch_size, seq_length) - np.array: segment ids in 'int32', shape (batch_size, seq_length) - np.array: valid length in 'int32', shape (batch_size,) - np.array: mask for special tokens - """ + For sequence pairs, the input is a list of 2 strings: + text_a, text_b. + + Inputs: + text_a: 'is this jacksonville ?' + text_b: 'no it is not' + start_token: [CLS] + token_after_seg: [[SEP], [SEP]] + end_token: None + + Processed: + tokens: '[CLS] is this jacksonville ? [SEP] no it is not . [SEP]' + segment_ids: 0 0 0 0 0 0 1 1 1 1 1 1 + p_mask: 0 0 0 0 0 1 0 0 0 0 0 1 + valid_length: 12 + + Parameters + ---------- + vocab : Vocab + If vocab is not None. The tokens will be converted to ids before return + + token_after_seg : list + The special tokens to be appended to each sequence. For example: + Given: + seqs: [[1, 2], [3, 4], [5, 6]] + token_after_seg: [None, 7] + it will be: + [1, 2, 3, 4, 7, 5, 6] + + start_token : string + The special token to be added to the start + + end_token : string + The special token to be added to the end + + seqs : list of sequences or a single sequence + + Returns + ------- + np.array: input token ids in 'int32', shape (batch_size, seq_length) + np.array: segment ids in 'int32', shape (batch_size, seq_length) + np.array: valid length in 'int32', shape (batch_size,) + np.array: mask for special tokens + """ def __init__(self, vocab=None, token_after_seg=None, diff --git a/src/gluonnlp/data/qa_preprocessing_utils.py b/src/gluonnlp/data/qa_preprocessing_utils.py index c546933da5..ebc1789b8a 100644 --- a/src/gluonnlp/data/qa_preprocessing_utils.py +++ b/src/gluonnlp/data/qa_preprocessing_utils.py @@ -61,7 +61,7 @@ def truncate_seqs_equal(seqs, max_length): for seq in seq_len: seq[1][1] = seq[1][1][:truncate_to] if remain > 0: - seq[1][1].pop() + seq[1][1].pop() # pytype: disable=attribute-error remain -= 1 seq_len.sort(key=lambda a: a[1][0]) ret = [a[1][1] for a in seq_len] From 983dfcbd8b276761e2544d50dc8f4b1b9499bdb5 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 4 Dec 2019 18:36:38 +0800 Subject: [PATCH 13/59] new! --- .../data/data_preprocessing_transform.py | 198 ++++++++---------- src/gluonnlp/data/qa_preprocessing_utils.py | 73 +++---- tests/unittest/test_dataset_transform.py | 21 +- 3 files changed, 122 insertions(+), 170 deletions(-) diff --git a/src/gluonnlp/data/data_preprocessing_transform.py b/src/gluonnlp/data/data_preprocessing_transform.py index 62bf54e619..1357a22237 100644 --- a/src/gluonnlp/data/data_preprocessing_transform.py +++ b/src/gluonnlp/data/data_preprocessing_transform.py @@ -8,6 +8,7 @@ ] import collections +import itertools from functools import partial import numpy as np from gluonnlp.data.utils import whitespace_splitter @@ -23,129 +24,89 @@ class TruncateTransform: max_len : int truncate_fn : callable A function determines how to truncate the sequence (list). - The function should implement argument max_length + The function should implement argument max_len, and return a list Returns ------- - list : list of sequences or a single sequence + list : list """ def __init__(self, max_len, truncate_fn=truncate_seqs_equal): self._max_len = max_len - self.fn = partial(truncate_fn, max_length=max_len) + self.fn = partial(truncate_fn, max_len=max_len) def __call__(self, seqs): assert isinstance(seqs, collections.abc.Iterable) if len(seqs) == 0: return seqs - if isinstance(seqs[0], collections.abc.Iterable) and not isinstance( - seqs[0], str): - #if it contains a list of seqs - seqs = self.fn(seqs) - return seqs - else: - #if it is a single sequence - return seqs[:self._max_len] + if not isinstance(seqs[0], collections.abc.Iterable) or isinstance(seqs[0], str): + seqs = [seqs] + + seqs = self.fn(seqs) + return seqs class ConcatSeqTransform: """Insert special tokens for sequence list or a single sequence. - For sequence pairs, the input is a list of 2 strings: - text_a, text_b. - - Inputs: - text_a: 'is this jacksonville ?' - text_b: 'no it is not' - start_token: [CLS] - token_after_seg: [[SEP], [SEP]] - end_token: None - - Processed: - tokens: '[CLS] is this jacksonville ? [SEP] no it is not . [SEP]' - segment_ids: 0 0 0 0 0 0 1 1 1 1 1 1 - p_mask: 0 0 0 0 0 1 0 0 0 0 0 1 - valid_length: 12 - - Parameters - ---------- - vocab : Vocab - If vocab is not None. The tokens will be converted to ids before return - - token_after_seg : list - The special tokens to be appended to each sequence. For example: - Given: - seqs: [[1, 2], [3, 4], [5, 6]] - token_after_seg: [None, 7] - it will be: - [1, 2, 3, 4, 7, 5, 6] - - start_token : string - The special token to be added to the start - - end_token : string - The special token to be added to the end - - seqs : list of sequences or a single sequence - - Returns - ------- - np.array: input token ids in 'int32', shape (batch_size, seq_length) - np.array: segment ids in 'int32', shape (batch_size, seq_length) - np.array: valid length in 'int32', shape (batch_size,) - np.array: mask for special tokens - """ + For sequence pairs, the input is a list of 2 strings: + text_a, text_b. + + Inputs: + text_a: 'is this jacksonville ?' + text_b: 'no it is not' + start_token: [CLS] + token_after_seg: [[SEP], [SEP]] + end_token: None + + Processed: + tokens: '[CLS] is this jacksonville ? [SEP] no it is not . [SEP]' + segment_ids: 0 0 0 0 0 0 1 1 1 1 1 1 + p_mask: 0 0 0 0 0 1 0 0 0 0 0 1 + valid_length: 12 + + Parameters + ---------- + vocab : Vocab + If vocab is not None. The tokens will be converted to ids before return + + token_after_seg : list + The special tokens to be appended to each sequence. For example: + Given: + seqs: [[1, 2], [3, 4], [5, 6]] + token_after_seg: [None, 7] + it will be: + [1, 2, 3, 4, 7, 5, 6] + + start_token : string + The special token to be added to the start + + end_token : string + The special token to be added to the end + + seqs : list of sequences or a single sequence + + Returns + ------- + np.array: input token ids in 'int32', shape (batch_size, seq_length) + np.array: segment ids in 'int32', shape (batch_size, seq_length) + np.array: mask for special tokens + """ def __init__(self, - vocab=None, - token_after_seg=None, - start_token=None, - end_token=None): + vocab=None): self._vocab = vocab - self._start_token = start_token - self._end_token = end_token - self._token_after_seg = token_after_seg if token_after_seg else [] - def __call__(self, seqs): + def __call__(self, seqs, seperators): assert isinstance(seqs, collections.abc.Iterable) and len(seqs) > 0 - tokens = [] - if self._start_token: - tokens.append(self._start_token) - tokens_a = seqs if not isinstance( - seqs[0], collections.abc.Iterable) else seqs[0] - tokens.extend(tokens_a) - p_mask = [0] * len(tokens) - sp_token_counter = 0 - - if sp_token_counter < len(self._token_after_seg): - if self._token_after_seg[sp_token_counter]: - tokens.append(self._token_after_seg[sp_token_counter]) - sp_token_counter += 1 - - segment_ids = [0] * len(tokens) - p_mask.append(1) - seqs = seqs[1:] - - if len(seqs) > 0 and isinstance(seqs[0], collections.abc.Iterable): - # if seqs is a list of sequence - for (i, seq) in enumerate(seqs): - tokens_b = seq - tokens.extend(tokens_b) - p_mask.extend([0] * (len(tokens) - len(p_mask))) - if sp_token_counter < len(self._token_after_seg): - if self._token_after_seg[sp_token_counter]: - tokens.append(self._token_after_seg[sp_token_counter]) - p_mask.append(1) - sp_token_counter += 1 - segment_ids.extend([i + 1] * (len(tokens) - len(segment_ids))) - - if self._end_token: - tokens.append(self._end_token) - p_mask.append(0) - segment_ids.append(len(seqs)) + concat = sum((seq + sep for sep, seq in + itertools.zip_longest(seperators, seqs, fillvalue=[])), []) + segment_ids = sum(([i] * (len(seq) + len(sep)) for i, (sep, seq) in + enumerate(itertools.zip_longest(seperators, seqs, fillvalue=[]))), []) + p_mask = sum(([0] * len(seq) + [1] * len(sep) for sep, seq in + itertools.zip_longest(seperators, seqs, fillvalue=[])), []) if self._vocab: - tokens = self._vocab[tokens] - # The valid length of sentences. Only real tokens are attended to. - valid_length = len(tokens) - return tokens, segment_ids, valid_length, p_mask + concat = self._vocab[concat] + + return concat, segment_ids, p_mask class BertTStyleSentenceTransform: @@ -183,17 +144,18 @@ def __init__(self, self._sep_token = self._vocab.eos_token self._token_after_seg = [self._sep_token] * 2 - self.InsertSpecialTokens = ConcatSeqTransform(self._vocab, - self._token_after_seg, - self._cls_token) + self.InsertSpecialTokens = ConcatSeqTransform() def __call__(self, line): tokens_raw = [self._tokenizer(l) for l in line] tokens_trun = self.Truncate(tokens_raw) - input_ids, segment_ids, valid_length, _ = self.InsertSpecialTokens( - tokens_trun) - return np.array(input_ids, dtype='int32'), np.array(valid_length, dtype='int32'),\ - np.array(segment_ids, dtype='int32') + tokens, segment_ids, _ = self.InsertSpecialTokens( + tokens_trun, [[self._sep_token]] * len(tokens_trun)) + + input_ids = self._vocab[[self._cls_token] + tokens] + segment_ids = [0] + segment_ids + + return np.array(input_ids, dtype='int32'), np.array(segment_ids, dtype='int32') class BertStyleGlueTransform: @@ -232,14 +194,14 @@ def __init__(self, def __call__(self, line): if self.has_label: - input_ids, valid_length, segment_ids = self.sentense_transform( + input_ids, segment_ids = self.sentense_transform( line[:-1]) label = line[-1] # map to int if class labels are available if self.class_labels: label = self._label_map[label] label = np.array([label], dtype=self._label_dtype) - return input_ids, valid_length, segment_ids, label + return input_ids, segment_ids, label else: return self.sentense_transform(line) @@ -407,8 +369,11 @@ class SimpleQAPreparation: Note that this class does not check if max span. """ def __init__(self, cls_token, sep_token, vocab, is_training): - self.insert = ConcatSeqTransform(cls_token, sep_token, vocab) + self.insert = ConcatSeqTransform() self.is_training = is_training + self._cls_token = cls_token + self._sep_token = sep_token + self._vocab = vocab def __call__(self, query, @@ -424,8 +389,11 @@ def __call__(self, for doc_span in doc_spans: span_text = all_doc_tokens[doc_span.start:doc_span.start + doc_span.length] - input_ids, segment_ids, valid_length, p_mask = self.insert( - [query, span_text]) + tokens, segment_ids, p_mask = self.insert( + [query, span_text], [[self._sep_token]] * 2) + input_ids = self._vocab[[self._cls_token] + tokens] + segment_ids = [0] + segment_ids + p_mask = [0] + p_mask start_position = 0 end_position = 0 if self.is_training and not is_impossible: @@ -449,7 +417,7 @@ def __call__(self, if not other_features: other_features = [] ret.append(other_features + [ - input_ids, segment_ids, valid_length, p_mask, start_position, + input_ids, segment_ids, p_mask, start_position, end_position, is_impossible ]) return ret diff --git a/src/gluonnlp/data/qa_preprocessing_utils.py b/src/gluonnlp/data/qa_preprocessing_utils.py index ebc1789b8a..0c2eeb1878 100644 --- a/src/gluonnlp/data/qa_preprocessing_utils.py +++ b/src/gluonnlp/data/qa_preprocessing_utils.py @@ -1,7 +1,7 @@ """Utility classes and functions for qa data processing""" -import collections +import numpy.ma as ma def truncate_seq_pair(tokens_a, tokens_b, max_length): """Truncates a sequence pair in place to the maximum length.""" @@ -19,7 +19,32 @@ def truncate_seq_pair(tokens_a, tokens_b, max_length): tokens_b.pop() -def truncate_seqs_equal(seqs, max_length): +def truncate_equal_by_len(lens, max_len): + """ + Reduce the sum of lens to max_len, always reduces the longer len + by preference + """ + if sum(lens) <= max_len: + return lens + + lens = ma.masked_array(lens, mask=[0] * len(lens)) + while True: + argmin = lens.argmin() + minval = lens[argmin] + quotient, remainder = divmod(max_len, len(lens) - sum(lens.mask)) + if minval <= quotient: # Ignore values that don't need truncation + lens.mask[argmin] = 1 + max_len -= minval + else: # Truncate all + lens.data[~lens.mask] = [ + quotient + 1 if i < remainder else quotient + for i in range(lens.count()) + ] + break + + return lens.data.tolist() + +def truncate_seqs_equal(seqs, max_len): """ truncate a list of seqs so that the total length equals max length. Trying to truncate the seqs to equal length. @@ -28,46 +53,10 @@ def truncate_seqs_equal(seqs, max_length): ------- list : list of truncated sequence keeping the origin order """ - assert isinstance(seqs, list) and isinstance(seqs[0], - collections.abc.Iterable) - tokens_to_remove = sum(list(map(len, seqs))) - max_length - if tokens_to_remove <= 0: - return seqs - if len(seqs) == 1: - return [seqs[0][:-tokens_to_remove]] - - seq_len = list(map(lambda a: (len(a), a), seqs)) - seq_len = [[s[0], [i, s[1]]] for (i, s) in enumerate(seq_len)] - seq_len.sort(key=lambda a: a[0], reverse=True) - prev = seq_len[0] - count_removed = 0 - truncate_to = seq_len[0][0] - remain = 0 - for (i, seq) in enumerate(seq_len): - cur_remove = (prev[0] - seq_len[i][0]) * i - if count_removed + cur_remove < tokens_to_remove: - count_removed += cur_remove - prev = seq_len[i] - else: - truncate_to = prev[0] - (tokens_to_remove - count_removed) // i - remain = (tokens_to_remove - count_removed) % i - break - - if count_removed < tokens_to_remove and prev[1][0] == seq_len[-1][1][0]: - truncate_to = prev[0] - (tokens_to_remove - - count_removed) // len(seq_len) - remain = (tokens_to_remove - count_removed) % len(seq_len) - - for seq in seq_len: - seq[1][1] = seq[1][1][:truncate_to] - if remain > 0: - seq[1][1].pop() # pytype: disable=attribute-error - remain -= 1 - seq_len.sort(key=lambda a: a[1][0]) - ret = [a[1][1] for a in seq_len] - assert sum(list(map(len, ret))) == max_length - return ret - + lens = list(map(len, seqs)) + lens_truncated = truncate_equal_by_len(lens, max_len) + seqs = [seq[:lens_truncated[i]] for (i, seq) in enumerate(seqs)] + return seqs def improve_answer_span(doc_tokens, input_start, input_end, tokenizer, orig_answer_text): diff --git a/tests/unittest/test_dataset_transform.py b/tests/unittest/test_dataset_transform.py index dcb0de51c1..043a16c19b 100644 --- a/tests/unittest/test_dataset_transform.py +++ b/tests/unittest/test_dataset_transform.py @@ -37,7 +37,7 @@ def test_bertstyle_glue_dataset_transform(): # test Transform for classification task bert_cls_dataset_t = BertStyleGlueTransform(tokenizer, 15, class_labels=[label_cls]) - token_ids, length, type_ids, label_ids = bert_cls_dataset_t((text_a, text_b, label_cls)) + token_ids, type_ids, label_ids = bert_cls_dataset_t((text_a, text_b, label_cls)) text_a_tokens = ['is', 'this', 'jack', '##son', '##ville', '?'] text_b_tokens = ['no', 'it', 'is', 'not'] text_a_ids = bert_vocab[text_a_tokens] @@ -53,28 +53,25 @@ def test_bertstyle_glue_dataset_transform(): end = len(text_a_tokens)+2+len(text_b_tokens)+1 valid_type_ids[start:end] = 1 assert all(token_ids == concated_ids) - assert length == len(vocab_tokens) + 3 assert all(type_ids == valid_type_ids) assert all(label_ids == np.array([label_cls], dtype=np.int32)) #test Transform for regression task label_reg = 0.2 bert_reg_dataset_t = BertStyleGlueTransform(tokenizer, 15) - token_ids, length, type_ids, label_reg_val = bert_reg_dataset_t((text_a, text_b, label_reg)) + token_ids, type_ids, label_reg_val = bert_reg_dataset_t((text_a, text_b, label_reg)) assert all(token_ids == concated_ids) - assert length == len(vocab_tokens) + 3 assert all(type_ids == valid_type_ids) assert all(label_reg_val == np.array([label_reg], dtype=np.float32)) #test Transform for single input sequence label_reg = 0.2 bert_reg_dataset_t = BertStyleGlueTransform(tokenizer, 15) - token_ids, length, type_ids, label_reg_val = bert_reg_dataset_t((text_ab, label_reg)) + token_ids, type_ids, label_reg_val = bert_reg_dataset_t((text_ab, label_reg)) concated_ids = cls_ids + text_a_ids + text_b_ids + sep_ids valid_type_ids = np.zeros((12,), dtype=np.int32) - assert all(token_ids == concated_ids) - assert length == len(vocab_tokens) + 2 + assert all(token_ids == np.array(concated_ids)) assert all(type_ids == valid_type_ids) assert all(label_reg_val == np.array([label_reg], dtype=np.float32)) @@ -101,7 +98,7 @@ def test_bertstyle_squad_dataset_transform(): trans = BertStyleSQuADTransform(tokenizer, max_seq_length=len(vocab_tokens) + 3, doc_stride=3, max_query_length=6, is_training=True) - example_id, inputs, token_types, valid_length, p_mask, start_label, end_label, is_impossible = \ + example_id, inputs, token_types, p_mask, start_label, end_label, is_impossible = \ trans(data_without_impossible)[0] text_a_tokens = ['what', 'is', 'my', 'na','##me', '?'] text_b_tokens = ['my', 'na', '##me', 'is', 'jack'] @@ -124,19 +121,17 @@ def test_bertstyle_squad_dataset_transform(): assert all(inputs == concated_ids) assert example_id == data_with_impossible[0] assert all(token_types == valid_token_type) - assert valid_length == len(vocab_tokens) + 3 assert all(p_mask == p_mask_valid) assert start_label == 12 assert end_label == 12 assert is_impossible == False #squad2 with impossible - example_id, inputs, token_types, valid_length, p_mask, start_label, end_label, is_impossible = \ + example_id, inputs, token_types, p_mask, start_label, end_label, is_impossible = \ trans(data_with_impossible)[0] assert all(inputs == concated_ids) assert example_id == data_with_impossible[0] assert all(token_types == valid_token_type) - assert valid_length == len(vocab_tokens) + 3 assert all(p_mask == p_mask_valid) assert start_label == 0 assert end_label == 0 @@ -146,10 +141,10 @@ def test_truncate(): seqs = [[j*i for j in range(i)] for i in range(1,10)] res1 = [[0], [0, 2], [0, 3, 6], [0, 4, 8], [0, 5, 10], [0, 6], [0, 7], [0, 8], [0, 9]] seq = [i for i in range(20)] - res3 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + res2 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] trunc = TruncateTransform(20) assert all(np.array(trunc(seqs)) == np.array(res1)) - assert all(np.array(trunc(seq)) == np.array(res3)) + assert all(np.array(trunc(seq[0])) == np.array(res2)) def test_concat_sequence(): seqs = [[3 * i + j for j in range(3)] for i in range(3)] From f9952c23d69be247c4a9a4c00648d05276fbbf9d Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 4 Dec 2019 19:11:52 +0800 Subject: [PATCH 14/59] new! --- .../data/data_preprocessing_transform.py | 190 ++++++------------ tests/unittest/test_dataset_transform.py | 16 +- 2 files changed, 77 insertions(+), 129 deletions(-) diff --git a/src/gluonnlp/data/data_preprocessing_transform.py b/src/gluonnlp/data/data_preprocessing_transform.py index 1357a22237..aadd8722c9 100644 --- a/src/gluonnlp/data/data_preprocessing_transform.py +++ b/src/gluonnlp/data/data_preprocessing_transform.py @@ -2,8 +2,8 @@ __all__ = [ 'TruncateTransform', 'ConcatSeqTransform', - 'BertTStyleSentenceTransform', 'BertStyleGlueTransform', - 'BertStyleSQuADTransform', 'SQuADExampleTransform', 'DocSpanTransform', + 'BertStyleGlueTransform', 'BertStyleSQuADTransform', + 'SQuADExampleTransform', 'DocSpanTransform', 'TokenizeAndPositionAlignTransform', 'SimpleQAPreparation', 'SquadExample' ] @@ -46,118 +46,50 @@ def __call__(self, seqs): class ConcatSeqTransform: - """Insert special tokens for sequence list or a single sequence. - For sequence pairs, the input is a list of 2 strings: - text_a, text_b. - - Inputs: - text_a: 'is this jacksonville ?' - text_b: 'no it is not' - start_token: [CLS] - token_after_seg: [[SEP], [SEP]] - end_token: None - - Processed: - tokens: '[CLS] is this jacksonville ? [SEP] no it is not . [SEP]' - segment_ids: 0 0 0 0 0 0 1 1 1 1 1 1 - p_mask: 0 0 0 0 0 1 0 0 0 0 0 1 - valid_length: 12 - - Parameters - ---------- - vocab : Vocab - If vocab is not None. The tokens will be converted to ids before return - - token_after_seg : list - The special tokens to be appended to each sequence. For example: - Given: - seqs: [[1, 2], [3, 4], [5, 6]] - token_after_seg: [None, 7] - it will be: - [1, 2, 3, 4, 7, 5, 6] - - start_token : string - The special token to be added to the start - - end_token : string - The special token to be added to the end - - seqs : list of sequences or a single sequence - - Returns - ------- - np.array: input token ids in 'int32', shape (batch_size, seq_length) - np.array: segment ids in 'int32', shape (batch_size, seq_length) - np.array: mask for special tokens - """ - def __init__(self, - vocab=None): - self._vocab = vocab + """ + Insert special tokens for sequence list or a single sequence. + For sequence pairs, the input is a list of 2 strings: + text_a, text_b. + Inputs: + text_a: 'is this jacksonville ?' + text_b: 'no it is not' + separator: [[SEP], [SEP]] + + Processed: + tokens: 'is this jacksonville ? [SEP] no it is not . [SEP]' + segment_ids: 0 0 0 0 0 1 1 1 1 1 1 + p_mask: 0 0 0 0 1 0 0 0 0 0 1 + valid_length: 11 + + Parameters + ---------- + separator : list + The special tokens to be appended to each sequence. For example: + Given: + seqs: [[1, 2], [3, 4], [5, 6]] + separator: [[], 7] + it will be: + [1, 2, 3, 4, 7, 5, 6] + + seqs : list of sequences or a single sequence - def __call__(self, seqs, seperators): + Returns + ------- + np.array: input token ids in 'int32', shape (batch_size, seq_length) + np.array: segment ids in 'int32', shape (batch_size, seq_length) + np.array: mask for special tokens + """ + def __call__(self, seqs, separators): assert isinstance(seqs, collections.abc.Iterable) and len(seqs) > 0 concat = sum((seq + sep for sep, seq in - itertools.zip_longest(seperators, seqs, fillvalue=[])), []) + itertools.zip_longest(separators, seqs, fillvalue=[])), []) segment_ids = sum(([i] * (len(seq) + len(sep)) for i, (sep, seq) in - enumerate(itertools.zip_longest(seperators, seqs, fillvalue=[]))), []) + enumerate(itertools.zip_longest(separators, seqs, fillvalue=[]))), []) p_mask = sum(([0] * len(seq) + [1] * len(sep) for sep, seq in - itertools.zip_longest(seperators, seqs, fillvalue=[])), []) - - if self._vocab: - concat = self._vocab[concat] - + itertools.zip_longest(separators, seqs, fillvalue=[])), []) return concat, segment_ids, p_mask -class BertTStyleSentenceTransform: - r"""BERT style data transformation. - - Parameters - ---------- - tokenizer : BERTTokenizer. - Tokenizer for the sentences. - max_seq_length : int. - Maximum sequence length of the sentences. - vocab : Vocab - The vocabulary which has cls_token and sep_token registered. - If vocab.cls_token is not present, vocab.bos_token is used instead. - If vocab.sep_token is not present, vocab.eos_token is used instead. - left_cls : bool - Insert [CLS] to the start/end of the sequence - """ - def __init__(self, - tokenizer, - max_seq_length=None, - vocab=None): - assert tokenizer.vocab or vocab - self.Truncate = TruncateTransform(max_len=max_seq_length) - self._tokenizer = tokenizer - self._vocab = tokenizer.vocab if vocab is None else vocab - # RoBERTa does not register CLS token and SEP token - if hasattr(self._vocab, 'cls_token'): - self._cls_token = self._vocab.cls_token - else: - self._cls_token = self._vocab.bos_token - if hasattr(self._vocab, 'sep_token'): - self._sep_token = self._vocab.sep_token - else: - self._sep_token = self._vocab.eos_token - self._token_after_seg = [self._sep_token] * 2 - - self.InsertSpecialTokens = ConcatSeqTransform() - - def __call__(self, line): - tokens_raw = [self._tokenizer(l) for l in line] - tokens_trun = self.Truncate(tokens_raw) - tokens, segment_ids, _ = self.InsertSpecialTokens( - tokens_trun, [[self._sep_token]] * len(tokens_trun)) - - input_ids = self._vocab[[self._cls_token] + tokens] - segment_ids = [0] + segment_ids - - return np.array(input_ids, dtype='int32'), np.array(segment_ids, dtype='int32') - - class BertStyleGlueTransform: """ Convert from gluonnlp.data.Glue* record to inputs for BERT-style model. @@ -165,17 +97,18 @@ class BertStyleGlueTransform: def __init__(self, tokenizer, max_seq_length, + cls_token=None, + sep_token=None, task=None, class_labels=None, label_alias=None, - vocab=None, - has_label=True): - self.has_label = has_label + vocab=None): + self._vocab = tokenizer.vocab if vocab is None else vocab self.class_labels = task.class_labels if task else class_labels self._label_dtype = 'int32' if (task and task.class_labels) else 'float32' self.label_alias = task.label_alias if task else label_alias - if self.has_label and self.class_labels: + if self.class_labels: self._label_map = {} for (i, label) in enumerate(self.class_labels): self._label_map[label] = i @@ -188,22 +121,31 @@ def __init__(self, else: max_seq_length += 2 - self.sentense_transform = BertTStyleSentenceTransform( - tokenizer, max_seq_length=max_seq_length, vocab=vocab) - self.tokenizer = tokenizer + self.Truncate = TruncateTransform(max_len=max_seq_length) + self.InsertSpecialTokens = ConcatSeqTransform() + self._tokenizer = tokenizer + self._sep_token = sep_token + self._cls_token = cls_token def __call__(self, line): - if self.has_label: - input_ids, segment_ids = self.sentense_transform( - line[:-1]) - label = line[-1] - # map to int if class labels are available - if self.class_labels: - label = self._label_map[label] - label = np.array([label], dtype=self._label_dtype) - return input_ids, segment_ids, label - else: - return self.sentense_transform(line) + #process the token pair + tokens_raw = [self._tokenizer(l) for l in line[:-1]] + tokens_trun = self.Truncate(tokens_raw) + tokens, segment_ids, _ = self.InsertSpecialTokens( + tokens_trun, [[self._sep_token]] * len(tokens_trun)) + + #add cls token + input_ids = self._vocab[[self._cls_token] + tokens] + segment_ids = [0] + segment_ids + + #get label + label = line[-1] + # map to int if class labels are available + if self.class_labels: + label = self._label_map[label] + label = np.array([label], dtype=self._label_dtype) + return input_ids, segment_ids, label + SquadExample = collections.namedtuple('SquadExample', [ diff --git a/tests/unittest/test_dataset_transform.py b/tests/unittest/test_dataset_transform.py index 043a16c19b..3d8d8786e4 100644 --- a/tests/unittest/test_dataset_transform.py +++ b/tests/unittest/test_dataset_transform.py @@ -35,7 +35,9 @@ def test_bertstyle_glue_dataset_transform(): tokenizer = BERTTokenizer(vocab=bert_vocab) # test Transform for classification task - bert_cls_dataset_t = BertStyleGlueTransform(tokenizer, 15, class_labels=[label_cls]) + bert_cls_dataset_t = BertStyleGlueTransform(tokenizer, 15, class_labels=[label_cls], + cls_token=bert_vocab.cls_token, + sep_token=bert_vocab.sep_token) token_ids, type_ids, label_ids = bert_cls_dataset_t((text_a, text_b, label_cls)) text_a_tokens = ['is', 'this', 'jack', '##son', '##ville', '?'] @@ -52,21 +54,25 @@ def test_bertstyle_glue_dataset_transform(): start = len(text_a_tokens) + 2 end = len(text_a_tokens)+2+len(text_b_tokens)+1 valid_type_ids[start:end] = 1 - assert all(token_ids == concated_ids) + assert all(np.array(token_ids) == np.array(concated_ids)) assert all(type_ids == valid_type_ids) assert all(label_ids == np.array([label_cls], dtype=np.int32)) #test Transform for regression task label_reg = 0.2 - bert_reg_dataset_t = BertStyleGlueTransform(tokenizer, 15) + bert_reg_dataset_t = BertStyleGlueTransform(tokenizer, 15, + cls_token=bert_vocab.cls_token, + sep_token=bert_vocab.sep_token) token_ids, type_ids, label_reg_val = bert_reg_dataset_t((text_a, text_b, label_reg)) - assert all(token_ids == concated_ids) + assert all(token_ids == np.array(concated_ids)) assert all(type_ids == valid_type_ids) assert all(label_reg_val == np.array([label_reg], dtype=np.float32)) #test Transform for single input sequence label_reg = 0.2 - bert_reg_dataset_t = BertStyleGlueTransform(tokenizer, 15) + bert_reg_dataset_t = BertStyleGlueTransform(tokenizer, 15, + cls_token=bert_vocab.cls_token, + sep_token=bert_vocab.sep_token) token_ids, type_ids, label_reg_val = bert_reg_dataset_t((text_ab, label_reg)) concated_ids = cls_ids + text_a_ids + text_b_ids + sep_ids From 60574c4f097a7626387677bf0572065c1d4e381e Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 5 Dec 2019 10:17:30 +0800 Subject: [PATCH 15/59] fix test --- tests/unittest/test_dataset_transform.py | 32 +++++++++++------------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/tests/unittest/test_dataset_transform.py b/tests/unittest/test_dataset_transform.py index 3d8d8786e4..228100d24d 100644 --- a/tests/unittest/test_dataset_transform.py +++ b/tests/unittest/test_dataset_transform.py @@ -150,25 +150,21 @@ def test_truncate(): res2 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] trunc = TruncateTransform(20) assert all(np.array(trunc(seqs)) == np.array(res1)) - assert all(np.array(trunc(seq[0])) == np.array(res2)) + assert all(np.array(trunc(seq)[0]) == np.array(res2)) def test_concat_sequence(): seqs = [[3 * i + j for j in range(3)] for i in range(3)] - start_token = -1 - end_token = 999 - middle_tokens = ['a', 'b', 'c'] - concat = ConcatSeqTransform(start_token=start_token, token_after_seg=middle_tokens, end_token=end_token) - res = concat(seqs) - assert res[0] == [-1, 0, 1, 2, 'a', 3, 4, 5, 'b', 6, 7, 8, 'c', 999] - assert res[1] == [0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2] - assert res[2] == 14 - assert res[3] == [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0] - - middle_tokens = ['a', None, 'b'] - concat = ConcatSeqTransform(start_token=start_token, token_after_seg=middle_tokens, end_token=end_token) - res = concat(seqs) - assert res[0] == [-1, 0, 1, 2, 'a', 3, 4, 5, 6, 7, 8, 'b', 999] - assert res[1] == [0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2] - assert res[2] == 13 - assert res[3] == [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0] + seperators = [['a'], ['b'], ['c']] + concat = ConcatSeqTransform() + res = concat(seqs, seperators) + assert res[0] == [0, 1, 2, 'a', 3, 4, 5, 'b', 6, 7, 8, 'c'] + assert res[1] == [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2] + assert res[2] == [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1] + + seperators = [['a'], [], ['b']] + concat = ConcatSeqTransform() + res = concat(seqs, seperators) + assert res[0] == [0, 1, 2, 'a', 3, 4, 5, 6, 7, 8, 'b'] + assert res[1] == [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2] + assert res[2] == [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1] From 4456dce46f3174023301a0d58f5d94f28333f0c7 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 5 Dec 2019 10:34:12 +0800 Subject: [PATCH 16/59] fix --- .../data/data_preprocessing_transform.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/gluonnlp/data/data_preprocessing_transform.py b/src/gluonnlp/data/data_preprocessing_transform.py index aadd8722c9..2e29e4f821 100644 --- a/src/gluonnlp/data/data_preprocessing_transform.py +++ b/src/gluonnlp/data/data_preprocessing_transform.py @@ -331,31 +331,27 @@ def __call__(self, for doc_span in doc_spans: span_text = all_doc_tokens[doc_span.start:doc_span.start + doc_span.length] + + # Insert [sep] tokens, segment_ids, p_mask = self.insert( [query, span_text], [[self._sep_token]] * 2) + # Insert [cls] input_ids = self._vocab[[self._cls_token] + tokens] segment_ids = [0] + segment_ids p_mask = [0] + p_mask + + # Get start/end position for each doc span start_position = 0 end_position = 0 if self.is_training and not is_impossible: doc_start = doc_span.start doc_end = doc_span.start + doc_span.length - 1 - out_of_span = False - if not (tok_start_position >= doc_start + if (tok_start_position >= doc_start and tok_end_position <= doc_end): - out_of_span = True - if out_of_span: - start_position = 0 - end_position = 0 - else: - doc_offset = len(query) + 2 + doc_offset = len(query) + 2 # plus the special token added start_position = tok_start_position - doc_start + doc_offset end_position = tok_end_position - doc_start + doc_offset - if self.is_training and is_impossible: - start_position = 0 - end_position = 0 if not other_features: other_features = [] ret.append(other_features + [ From 4fc458820ecb51ec936f303c286d1ba07ba65ec9 Mon Sep 17 00:00:00 2001 From: Wang Date: Fri, 6 Dec 2019 16:44:28 +0800 Subject: [PATCH 17/59] new --- src/gluonnlp/data/__init__.py | 7 +- .../data/data_preprocessing_transform.py | 363 +++--------------- src/gluonnlp/data/qa_preprocessing_utils.py | 133 ------- tests/unittest/test_dataset_transform.py | 30 +- 4 files changed, 81 insertions(+), 452 deletions(-) delete mode 100644 src/gluonnlp/data/qa_preprocessing_utils.py diff --git a/src/gluonnlp/data/__init__.py b/src/gluonnlp/data/__init__.py index 5bfb550d99..ba17254986 100644 --- a/src/gluonnlp/data/__init__.py +++ b/src/gluonnlp/data/__init__.py @@ -22,7 +22,7 @@ dataset, question_answering, registry, sampler, sentiment, stream, transforms, translation, utils, word_embedding_evaluation, intent_slot, glue, - data_preprocessing_transform) + data_preprocessing_transform, preprocessing_utils) from .candidate_sampler import * from .conll import * from .glue import * @@ -40,11 +40,12 @@ from .word_embedding_evaluation import * from .intent_slot import * from .data_preprocessing_transform import * - +from .preprocessing_utils import * __all__ = (['batchify'] + utils.__all__ + transforms.__all__ + sampler.__all__ + dataset.__all__ + corpora.__all__ + sentiment.__all__ + word_embedding_evaluation.__all__ + stream.__all__ + conll.__all__ + translation.__all__ + registry.__all__ + question_answering.__all__ + dataloader.__all__ + candidate_sampler.__all__ + intent_slot.__all__ - + glue.__all__ + data_preprocessing_transform.__all__)# pytype: disable=attribute-error + + glue.__all__ + data_preprocessing_transform.__all__ + + preprocessing_utils.__all__)# pytype: disable=attribute-error diff --git a/src/gluonnlp/data/data_preprocessing_transform.py b/src/gluonnlp/data/data_preprocessing_transform.py index 2e29e4f821..ae78777760 100644 --- a/src/gluonnlp/data/data_preprocessing_transform.py +++ b/src/gluonnlp/data/data_preprocessing_transform.py @@ -1,113 +1,30 @@ """glue and squad transform.""" __all__ = [ - 'TruncateTransform', 'ConcatSeqTransform', - 'BertStyleGlueTransform', 'BertStyleSQuADTransform', - 'SQuADExampleTransform', 'DocSpanTransform', - 'TokenizeAndPositionAlignTransform', 'SimpleQAPreparation', 'SquadExample' + 'BertStyleGlueTransform','BertStyleSQuADTransform', + 'SQuADExampleTransform', 'SquadExample' ] import collections -import itertools -from functools import partial import numpy as np -from gluonnlp.data.utils import whitespace_splitter -from .qa_preprocessing_utils import truncate_seqs_equal, improve_answer_span - - -class TruncateTransform: - """ - Truncate a sequence(list) to max length. - - Parameters - ---------- - max_len : int - truncate_fn : callable - A function determines how to truncate the sequence (list). - The function should implement argument max_len, and return a list - - Returns - ------- - list : list - """ - def __init__(self, max_len, truncate_fn=truncate_seqs_equal): - self._max_len = max_len - self.fn = partial(truncate_fn, max_len=max_len) - - def __call__(self, seqs): - assert isinstance(seqs, collections.abc.Iterable) - if len(seqs) == 0: - return seqs - if not isinstance(seqs[0], collections.abc.Iterable) or isinstance(seqs[0], str): - seqs = [seqs] - - seqs = self.fn(seqs) - return seqs - - -class ConcatSeqTransform: - """ - Insert special tokens for sequence list or a single sequence. - For sequence pairs, the input is a list of 2 strings: - text_a, text_b. - Inputs: - text_a: 'is this jacksonville ?' - text_b: 'no it is not' - separator: [[SEP], [SEP]] - - Processed: - tokens: 'is this jacksonville ? [SEP] no it is not . [SEP]' - segment_ids: 0 0 0 0 0 1 1 1 1 1 1 - p_mask: 0 0 0 0 1 0 0 0 0 0 1 - valid_length: 11 - - Parameters - ---------- - separator : list - The special tokens to be appended to each sequence. For example: - Given: - seqs: [[1, 2], [3, 4], [5, 6]] - separator: [[], 7] - it will be: - [1, 2, 3, 4, 7, 5, 6] - - seqs : list of sequences or a single sequence - - Returns - ------- - np.array: input token ids in 'int32', shape (batch_size, seq_length) - np.array: segment ids in 'int32', shape (batch_size, seq_length) - np.array: mask for special tokens - """ - def __call__(self, seqs, separators): - assert isinstance(seqs, collections.abc.Iterable) and len(seqs) > 0 - concat = sum((seq + sep for sep, seq in - itertools.zip_longest(separators, seqs, fillvalue=[])), []) - segment_ids = sum(([i] * (len(seq) + len(sep)) for i, (sep, seq) in - enumerate(itertools.zip_longest(separators, seqs, fillvalue=[]))), []) - p_mask = sum(([0] * len(seq) + [1] * len(sep) for sep, seq in - itertools.zip_longest(separators, seqs, fillvalue=[])), []) - return concat, segment_ids, p_mask - +from .qa_preprocessing_utils import truncate_seqs_equal, improve_answer_span, \ + ConcatSeqTransform, TokenizeAndPositionAlign, get_doc_spans, align_position2doc_spans class BertStyleGlueTransform: - """ - Convert from gluonnlp.data.Glue* record to inputs for BERT-style model. - """ + """Convert from gluonnlp.data.Glue* record to inputs for BERT-style model.""" def __init__(self, tokenizer, - max_seq_length, + truncate_length, cls_token=None, sep_token=None, - task=None, class_labels=None, + label_dtype='float32', label_alias=None, vocab=None): self._vocab = tokenizer.vocab if vocab is None else vocab - self.class_labels = task.class_labels if task else class_labels - self._label_dtype = 'int32' if (task - and task.class_labels) else 'float32' - self.label_alias = task.label_alias if task else label_alias + self.class_labels = class_labels + self._label_dtype = label_dtype + self.label_alias = label_alias if self.class_labels: self._label_map = {} for (i, label) in enumerate(self.class_labels): @@ -116,13 +33,8 @@ def __init__(self, for key in self.label_alias: self._label_map[key] = self._label_map[ self.label_alias[key]] - if len(class_labels) > 1: - max_seq_length += 3 # account for special tokens - else: - max_seq_length += 2 - - self.Truncate = TruncateTransform(max_len=max_seq_length) - self.InsertSpecialTokens = ConcatSeqTransform() + truncate_length += 3 if len(class_labels) > 1 else 2 + self._truncate_length = truncate_length self._tokenizer = tokenizer self._sep_token = sep_token self._cls_token = cls_token @@ -130,14 +42,11 @@ def __init__(self, def __call__(self, line): #process the token pair tokens_raw = [self._tokenizer(l) for l in line[:-1]] - tokens_trun = self.Truncate(tokens_raw) - tokens, segment_ids, _ = self.InsertSpecialTokens( + tokens_trun = truncate_seqs_equal(tokens_raw, self._truncate_length) + tokens_trun[0] = [self._cls_token] + tokens_trun[0] + tokens, segment_ids, _ = ConcatSeqTransform( tokens_trun, [[self._sep_token]] * len(tokens_trun)) - - #add cls token - input_ids = self._vocab[[self._cls_token] + tokens] - segment_ids = [0] + segment_ids - + input_ids = self._vocab[tokens] #get label label = line[-1] # map to int if class labels are available @@ -155,11 +64,10 @@ def __call__(self, line): class SQuADExampleTransform: - """ - Convert from gluonnlp.data.SQuAD's record to SquadExample. - """ - def __init__(self, training=True): + """Convert from gluonnlp.data.SQuAD's record to SquadExample.""" + def __init__(self, training=True, version_2=False): self.is_training = training + self._version_2 = version_2 def _is_whitespace(self, c): if c == ' ' or c == '\t' or c == '\r' or c == '\n' or ord(c) == 0x202F: @@ -175,10 +83,12 @@ def __call__(self, record): answer_offset = record[5][0] if record[5] else '' is_impossible = record[6] if len(record) == 7 else False + answer_length = len(orig_answer_text) doc_tokens = [] char_to_word_offset = [] prev_is_whitespace = True + for c in paragraph_text: if self._is_whitespace(c): prev_is_whitespace = True @@ -189,33 +99,9 @@ def __call__(self, record): doc_tokens[-1] += c prev_is_whitespace = False char_to_word_offset.append(len(doc_tokens) - 1) - start_position = -1 - end_position = -1 - if self.is_training: - if not is_impossible: - answer_length = len(orig_answer_text) - start_position = char_to_word_offset[answer_offset] - end_position = char_to_word_offset[answer_offset + - answer_length - 1] - # Only add answers where the text can be exactly recovered from the - # document. If this CAN'T happen it's likely due to weird Unicode - # stuff so we will just skip the example. - # - # Note that this means for training mode, every example is NOT - # guaranteed to be preserved. - actual_text = ' '.join( - doc_tokens[start_position:(end_position + 1)]) - cleaned_answer_text = ' '.join( - whitespace_splitter(orig_answer_text.strip())) - if actual_text.find(cleaned_answer_text) == -1: - print('Could not find answer: %s vs. %s' % - (actual_text, cleaned_answer_text)) - return None - else: - start_position = -1 - end_position = -1 - orig_answer_text = '' + start_position = char_to_word_offset[answer_offset] if not is_impossible else -1 + end_position = char_to_word_offset[answer_offset + answer_length -1] if not is_impossible else -1 example = SquadExample(qas_id=qas_id, question_text=question_text, @@ -228,139 +114,6 @@ def __call__(self, record): return example -class TokenizeAndPositionAlignTransform: - """Tokenize the question and paragraph text and map the origin start/end position - to the right position in tokenized text. - """ - def __init__(self, tokenizer, max_query_length, is_training): - self._tokenizer = tokenizer - self._max_query_length = max_query_length - self.is_training = is_training - - def __call__(self, example): - # tokenize the query text - query_tokens = self._tokenizer(example.question_text) - if len(query_tokens) > self._max_query_length: - query_tokens = query_tokens[0:self._max_query_length] - - tok_to_orig_index = [] - orig_to_tok_index = [] - all_doc_tokens = [] - - for (i, token) in enumerate(example.doc_tokens): - orig_to_tok_index.append(len(all_doc_tokens)) - sub_tokens = self._tokenizer(token) - tok_to_orig_index += [i] * len(sub_tokens) - all_doc_tokens += sub_tokens - # tokenize the paragraph text and align the start/end position - # to the tokenized sequence position - tok_start_position = None - tok_end_position = None - if self.is_training and example.is_impossible: - tok_start_position = -1 - tok_end_position = -1 - if self.is_training and not example.is_impossible: - tok_start_position = orig_to_tok_index[example.start_position] - if example.end_position < len(example.doc_tokens) - 1: - tok_end_position = orig_to_tok_index[example.end_position + - 1] - 1 - else: - tok_end_position = len(all_doc_tokens) - 1 - (tok_start_position, tok_end_position) = improve_answer_span( - all_doc_tokens, tok_start_position, tok_end_position, - self._tokenizer, example.orig_answer_text) - return tok_start_position, tok_end_position, all_doc_tokens, query_tokens - - -class DocSpanTransform: - """ - We can have documents that are longer than the maximum sequence length. - To deal with this we do a sliding window approach, where we take chunks - of the up to our max length with a stride of `doc_stride`. - """ - def __init__(self, doc_stride, max_seq_length=None): - self._doc_stride = doc_stride - self._max_seq_length = max_seq_length - - def __call__(self, - all_doc_tokens, - max_tokens_for_doc=None, - query_tokens_length=None): - _DocSpan = collections.namedtuple( # pylint: disable=invalid-name - 'DocSpan', ['start', 'length']) - assert max_tokens_for_doc or (self._max_seq_length - and query_tokens_length) - doc_spans = [] - start_offset = 0 - if not max_tokens_for_doc: - max_tokens_for_doc = self._max_seq_length - query_tokens_length - 3 - while start_offset < len(all_doc_tokens): - length = len(all_doc_tokens) - start_offset - if length > max_tokens_for_doc: - length = max_tokens_for_doc - doc_spans.append(_DocSpan(start=start_offset, length=length)) - if start_offset + length == len(all_doc_tokens): - break - start_offset += min(length, self._doc_stride) - return doc_spans - - -class SimpleQAPreparation: - """ - Give the tokenized query text and doc spans, convert the data to BERT-style model input. - Note that this class does not check if max span. - """ - def __init__(self, cls_token, sep_token, vocab, is_training): - self.insert = ConcatSeqTransform() - self.is_training = is_training - self._cls_token = cls_token - self._sep_token = sep_token - self._vocab = vocab - - def __call__(self, - query, - doc_spans, - all_doc_tokens, - tok_start_position, - tok_end_position, - other_features=None, - is_impossible=False): - ret = [] - if not isinstance(other_features, list): - other_features = [other_features] - for doc_span in doc_spans: - span_text = all_doc_tokens[doc_span.start:doc_span.start + - doc_span.length] - - # Insert [sep] - tokens, segment_ids, p_mask = self.insert( - [query, span_text], [[self._sep_token]] * 2) - # Insert [cls] - input_ids = self._vocab[[self._cls_token] + tokens] - segment_ids = [0] + segment_ids - p_mask = [0] + p_mask - - # Get start/end position for each doc span - start_position = 0 - end_position = 0 - if self.is_training and not is_impossible: - doc_start = doc_span.start - doc_end = doc_span.start + doc_span.length - 1 - if (tok_start_position >= doc_start - and tok_end_position <= doc_end): - doc_offset = len(query) + 2 # plus the special token added - start_position = tok_start_position - doc_start + doc_offset - end_position = tok_end_position - doc_start + doc_offset - - if not other_features: - other_features = [] - ret.append(other_features + [ - input_ids, segment_ids, p_mask, start_position, - end_position, is_impossible - ]) - return ret - - class BertStyleSQuADTransform: """Dataset Transformation for BERT-style QA. @@ -378,44 +131,50 @@ class BertStyleSQuADTransform: """ def __init__(self, tokenizer, + cls_token, + sep_token, vocab=None, max_seq_length=384, doc_stride=128, max_query_length=64, is_training=True): - + self._tokenizer = tokenizer self._vocab = tokenizer.vocab if vocab is None else vocab - # RoBERTa does not register CLS token and SEP token - if hasattr(self._vocab, 'cls_token'): - self._cls_token = self._vocab.cls_token - else: - self._cls_token = self._vocab.bos_token - if hasattr(self._vocab, 'sep_token'): - self._sep_token = self._vocab.sep_token - else: - self._sep_token = self._vocab.eos_token - - self.get_example = SQuADExampleTransform(training=is_training) - self.get_aligned = TokenizeAndPositionAlignTransform( - tokenizer, max_query_length, is_training) - self.doc_span_transform = DocSpanTransform(doc_stride, max_seq_length) - self.doc_span_preparation = SimpleQAPreparation( - self._cls_token, self._sep_token, self._vocab, is_training) + self._cls_token = cls_token + self._sep_token = sep_token + self._max_seq_length = max_seq_length + self._doc_stride = doc_stride + self._max_query_length = max_query_length + self._get_example = SQuADExampleTransform(training=is_training) def __call__(self, line): - example = self.get_example(line) - tok_start_position, tok_end_position, all_doc_tokens, query_tokens = self.get_aligned( - example) - doc_spans = self.doc_span_transform( - all_doc_tokens, query_tokens_length=len(query_tokens)) - #features contain example_id,input_ids, segment_ids, - # valid_length, start_position, end_position - features = self.doc_span_preparation( - query_tokens, - doc_spans, - all_doc_tokens, - tok_start_position, - tok_end_position, - other_features=example.example_id, - is_impossible=example.is_impossible) + example = self._get_example(line) + query_tokenized = [self._cls_token] + self._tokenizer(example.question_text)[:self._max_query_length] + #get the start/end position of the answer in tokenized paragraph + (tok_start_position, tok_end_position), all_doc_tokens = \ + TokenizeAndPositionAlign(example.doc_tokens, + [example.start_position, + example.end_position], + self._tokenizer) + if not example.is_impossible: + (tok_start_position, tok_end_position) = improve_answer_span( + all_doc_tokens, tok_start_position, tok_end_position, + self._tokenizer, example.orig_answer_text) + else: + tok_start_position, tok_end_position = -1, -1 + + #get doc spans + doc_spans, doc_spans_indices = get_doc_spans(all_doc_tokens, self._max_seq_length - self._max_query_length - 3, + self._doc_stride) + #get sequence features: tokens, segment_ids, p_masks + seq_features = [ConcatSeqTransform([query_tokenized, doc_span], [[self._sep_token]] * 2) + for doc_span in doc_spans] + #get the new start/end position + positions = [align_position2doc_spans([tok_start_position, tok_end_position], doc_idx, + offset=len(query_tokenized) + 1, + default_value=0) for doc_idx in doc_spans_indices] + features = [[example.example_id] + [self._vocab[tokens], segment_id, p_mask] + + [start, end, example.is_impossible] + for (tokens, segment_id, p_mask), (start, end) in zip(seq_features, positions)] + return features diff --git a/src/gluonnlp/data/qa_preprocessing_utils.py b/src/gluonnlp/data/qa_preprocessing_utils.py deleted file mode 100644 index 0c2eeb1878..0000000000 --- a/src/gluonnlp/data/qa_preprocessing_utils.py +++ /dev/null @@ -1,133 +0,0 @@ -"""Utility classes and functions for qa data processing""" - - -import numpy.ma as ma - -def truncate_seq_pair(tokens_a, tokens_b, max_length): - """Truncates a sequence pair in place to the maximum length.""" - # This is a simple heuristic which will always truncate the longer sequence - # one token at a time. This makes more sense than truncating an equal percent - # of tokens from each, since if one sequence is very short then each token - # that's truncated likely contains more information than a longer sequence. - while True: - total_length = len(tokens_a) + len(tokens_b) - if total_length <= max_length: - break - if len(tokens_a) > len(tokens_b): - tokens_a.pop() - else: - tokens_b.pop() - - -def truncate_equal_by_len(lens, max_len): - """ - Reduce the sum of lens to max_len, always reduces the longer len - by preference - """ - if sum(lens) <= max_len: - return lens - - lens = ma.masked_array(lens, mask=[0] * len(lens)) - while True: - argmin = lens.argmin() - minval = lens[argmin] - quotient, remainder = divmod(max_len, len(lens) - sum(lens.mask)) - if minval <= quotient: # Ignore values that don't need truncation - lens.mask[argmin] = 1 - max_len -= minval - else: # Truncate all - lens.data[~lens.mask] = [ - quotient + 1 if i < remainder else quotient - for i in range(lens.count()) - ] - break - - return lens.data.tolist() - -def truncate_seqs_equal(seqs, max_len): - """ - truncate a list of seqs so that the total length equals max length. - Trying to truncate the seqs to equal length. - - Returns - ------- - list : list of truncated sequence keeping the origin order - """ - lens = list(map(len, seqs)) - lens_truncated = truncate_equal_by_len(lens, max_len) - seqs = [seq[:lens_truncated[i]] for (i, seq) in enumerate(seqs)] - return seqs - -def improve_answer_span(doc_tokens, input_start, input_end, tokenizer, - orig_answer_text): - """Returns tokenized answer spans that better match the annotated answer.""" - - # The SQuAD annotations are character based. We first project them to - # whitespace-tokenized words. But then after WordPiece tokenization, we can - # often find a "better match". For example: - # - # Question: What year was John Smith born? - # Context: The leader was John Smith (1895-1943). - # Answer: 1895 - # - # The original whitespace-tokenized answer will be "(1895-1943).". However - # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match - # the exact answer, 1895. - # - # However, this is not always possible. Consider the following: - # - # Question: What country is the top exporter of electornics? - # Context: The Japanese electronics industry is the lagest in the world. - # Answer: Japan - # - # In this case, the annotator chose "Japan" as a character sub-span of - # the word "Japanese". Since our WordPiece tokenizer does not split - # "Japanese", we just use "Japanese" as the annotation. This is fairly rare - # in SQuAD, but does happen. - tok_answer_text = ' '.join(tokenizer(orig_answer_text)) - - for new_start in range(input_start, input_end + 1): - for new_end in range(input_end, new_start - 1, -1): - text_span = ' '.join(doc_tokens[new_start:(new_end + 1)]) - if text_span == tok_answer_text: - return (new_start, new_end) - - return (input_start, input_end) - - -def check_is_max_context(doc_spans, cur_span_index, position): - """Check if this is the 'max context' doc span for the token.""" - - # Because of the sliding window approach taken to scoring documents, a single - # token can appear in multiple documents. E.g. - # Doc: the man went to the store and bought a gallon of milk - # Span A: the man went to the - # Span B: to the store and bought - # Span C: and bought a gallon of - # ... - # - # Now the word 'bought' will have two scores from spans B and C. We only - # want to consider the score with "maximum context", which we define as - # the *minimum* of its left and right context (the *sum* of left and - # right context will always be the same, of course). - # - # In the example the maximum context for 'bought' would be span C since - # it has 1 left context and 3 right context, while span B has 4 left context - # and 0 right context. - best_score = None - best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): - end = doc_span.start + doc_span.length - 1 - if position < doc_span.start: - continue - if position > end: - continue - num_left_context = position - doc_span.start - num_right_context = end - position - score = min(num_left_context, num_right_context) + \ - 0.01 * doc_span.length - if best_score is None or score > best_score: - best_score = score - best_span_index = span_index - - return cur_span_index == best_span_index diff --git a/tests/unittest/test_dataset_transform.py b/tests/unittest/test_dataset_transform.py index 228100d24d..792272e88b 100644 --- a/tests/unittest/test_dataset_transform.py +++ b/tests/unittest/test_dataset_transform.py @@ -20,8 +20,8 @@ import numpy as np from gluonnlp.vocab import BERTVocab from gluonnlp.data import count_tokens, BERTTokenizer, \ - BertStyleGlueTransform, BertStyleSQuADTransform, TruncateTransform, \ - ConcatSeqTransform + BertStyleGlueTransform, BertStyleSQuADTransform, truncate_seqs_equal, \ + ConcatSeqTransform def test_bertstyle_glue_dataset_transform(): @@ -101,9 +101,11 @@ def test_bertstyle_squad_dataset_transform(): vocab_tokens = ['what', 'is', 'my', 'na', '##me', '?', 'my', 'na', '##me', 'is', 'jack'] bert_vocab = BERTVocab(count_tokens(vocab_tokens)) tokenizer = BERTTokenizer(vocab=bert_vocab) - trans = BertStyleSQuADTransform(tokenizer, max_seq_length=len(vocab_tokens) + 3, - doc_stride=3, max_query_length=6, - is_training=True) + trans = BertStyleSQuADTransform(tokenizer, bert_vocab.cls_token, + bert_vocab.sep_token, + max_seq_length=len(vocab_tokens) + 3, + doc_stride=10, max_query_length=6, + is_training=True) example_id, inputs, token_types, p_mask, start_label, end_label, is_impossible = \ trans(data_without_impossible)[0] text_a_tokens = ['what', 'is', 'my', 'na','##me', '?'] @@ -146,24 +148,24 @@ def test_bertstyle_squad_dataset_transform(): def test_truncate(): seqs = [[j*i for j in range(i)] for i in range(1,10)] res1 = [[0], [0, 2], [0, 3, 6], [0, 4, 8], [0, 5, 10], [0, 6], [0, 7], [0, 8], [0, 9]] - seq = [i for i in range(20)] - res2 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] - trunc = TruncateTransform(20) - assert all(np.array(trunc(seqs)) == np.array(res1)) - assert all(np.array(trunc(seq)[0]) == np.array(res2)) + seq = [[i for i in range(20)]] + + truncated = truncate_seqs_equal(seqs, 20) + truncated2 = truncate_seqs_equal(seq, 20) + + assert all(truncated == np.array(res1)) + assert all(truncated2[0] == np.array(seq)[0]) def test_concat_sequence(): seqs = [[3 * i + j for j in range(3)] for i in range(3)] seperators = [['a'], ['b'], ['c']] - concat = ConcatSeqTransform() - res = concat(seqs, seperators) + res = ConcatSeqTransform(seqs, seperators) assert res[0] == [0, 1, 2, 'a', 3, 4, 5, 'b', 6, 7, 8, 'c'] assert res[1] == [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2] assert res[2] == [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1] seperators = [['a'], [], ['b']] - concat = ConcatSeqTransform() - res = concat(seqs, seperators) + res = ConcatSeqTransform(seqs, seperators) assert res[0] == [0, 1, 2, 'a', 3, 4, 5, 6, 7, 8, 'b'] assert res[1] == [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2] assert res[2] == [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1] From 00c8ff997106cd7de131b6476045c58054b192d8 Mon Sep 17 00:00:00 2001 From: Wang Date: Fri, 6 Dec 2019 16:45:16 +0800 Subject: [PATCH 18/59] fix --- .../data/data_preprocessing_transform.py | 2 +- src/gluonnlp/data/preprocessing_utils.py | 220 ++++++++++++++++++ 2 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 src/gluonnlp/data/preprocessing_utils.py diff --git a/src/gluonnlp/data/data_preprocessing_transform.py b/src/gluonnlp/data/data_preprocessing_transform.py index ae78777760..52beb46ce9 100644 --- a/src/gluonnlp/data/data_preprocessing_transform.py +++ b/src/gluonnlp/data/data_preprocessing_transform.py @@ -7,7 +7,7 @@ import collections import numpy as np -from .qa_preprocessing_utils import truncate_seqs_equal, improve_answer_span, \ +from .preprocessing_utils import truncate_seqs_equal, improve_answer_span, \ ConcatSeqTransform, TokenizeAndPositionAlign, get_doc_spans, align_position2doc_spans class BertStyleGlueTransform: diff --git a/src/gluonnlp/data/preprocessing_utils.py b/src/gluonnlp/data/preprocessing_utils.py new file mode 100644 index 0000000000..63b5a90319 --- /dev/null +++ b/src/gluonnlp/data/preprocessing_utils.py @@ -0,0 +1,220 @@ +"""Utility classes and functions for data processing""" + +__all__ = [ + 'truncate_seqs_equal', 'truncate_equal_by_len', 'ConcatSeqTransform', 'TokenizeAndPositionAlign', + 'get_doc_spans', 'align_position2doc_spans', 'improve_answer_span', 'check_is_max_context' +] + +import collections +import itertools +import numpy.ma as ma + + +def truncate_equal_by_len(lens, max_len): + if sum(lens) <= max_len: + return lens + + lens = ma.masked_array(lens, mask=[0] * len(lens)) + while True: + argmin = lens.argmin() + minval = lens[argmin] + quotient, remainder = divmod(max_len, len(lens) - sum(lens.mask)) + if minval <= quotient: # Ignore values that don't need truncation + lens.mask[argmin] = 1 + max_len -= minval + else: # Truncate all + lens.data[~lens.mask] = [ + quotient + 1 if i < remainder else quotient + for i in range(lens.count()) + ] + break + + return lens.data.tolist() + + +def truncate_seqs_equal(seqs, max_len): + """ + truncate a list of seqs so that the total length equals max length. + Trying to truncate the seqs to equal length. + + Returns + ------- + list : list of truncated sequence keeping the origin order + """ + assert isinstance(seqs, list) + lens = list(map(len, seqs)) + seqs = [seq[:length] for (seq, length) in zip(seqs, truncate_equal_by_len(lens, max_len))] + return seqs + + +def ConcatSeqTransform(seqs, separators, separator_mask=1): + """ + Insert special tokens for sequence list or a single sequence. + For sequence pairs, the input is a list of 2 strings: + text_a, text_b. + Inputs: + text_a: 'is this jacksonville ?' + text_b: 'no it is not' + separator: [[SEP], [SEP]] + + Processed: + tokens: 'is this jacksonville ? [SEP] no it is not . [SEP]' + segment_ids: 0 0 0 0 0 1 1 1 1 1 1 + p_mask: 0 0 0 0 1 0 0 0 0 0 1 + valid_length: 11 + + Parameters + ---------- + separator : list + The special tokens to be appended to each sequence. For example: + Given: + seqs: [[1, 2], [3, 4], [5, 6]] + separator: [[], 7] + it will be: + [1, 2, 3, 4, 7, 5, 6] + + seqs : list of sequences or a single sequence + + Returns + ------- + np.array: input token ids in 'int32', shape (batch_size, seq_length) + np.array: segment ids in 'int32', shape (batch_size, seq_length) + np.array: mask for special tokens + """ + assert isinstance(seqs, collections.abc.Iterable) and len(seqs) > 0 + concat = sum((seq + sep for sep, seq in + itertools.zip_longest(separators, seqs, fillvalue=[])), []) + segment_ids = sum(([i] * (len(seq) + len(sep)) for i, (sep, seq) in + enumerate(itertools.zip_longest(separators, seqs, fillvalue=[]))), []) + p_mask = sum(([0] * len(seq) + [separator_mask] * len(sep) for sep, seq in + itertools.zip_longest(separators, seqs, fillvalue=[])), []) + return concat, segment_ids, p_mask + + +def TokenizeAndPositionAlign(origin_text, positions, tokenizer): + """Tokenize the text and align the origin positions to the corresponding position""" + if not isinstance(positions, list): + positions = [positions] + orig_to_tok_index = [] + tokenized_text = [] + for (i, token) in enumerate(origin_text): + orig_to_tok_index.append(len(tokenized_text)) + sub_tokens = tokenizer(token) + tokenized_text += sub_tokens + new_positions = [orig_to_tok_index[p] for p in positions] + return new_positions, tokenized_text + + +def get_doc_spans(full_doc, max_length, doc_stride): + """A simple function that applying a sliding window on the doc and get doc spans + + Parameters + ---------- + full_doc: list + The origin doc text + max_length: max_length + Maximum size of a doc span + doc_stride: int + Step of sliding window + + Returns + ------- + list: a list of processed doc spans + list: a list of start/end index of each doc span + """ + doc_spans = [] + start_offset = 0 + while start_offset < len(full_doc): + length = min(max_length, len(full_doc) - start_offset) + end_offset = start_offset + length + doc_spans.append((full_doc[start_offset: end_offset], (start_offset, end_offset))) + start_offset += min(length, doc_stride) + return list(zip(*doc_spans)) + + +def align_position2doc_spans(positions, doc_spans_indices, + offset=0, default_value=-1, all_in_span=True): + """Align the origin positions to the corresponding position in doc spans""" + if not isinstance(positions, list): + positions = [positions] + doc_start, doc_end = doc_spans_indices + if all_in_span and not all([p in range(doc_start, doc_end + 1) for p in positions]): + return [default_value] * len(positions) + new_positions = [p - doc_start + offset if p in range(doc_start, doc_end + 1) + else default_value for p in positions] + return new_positions + + +def improve_answer_span(doc_tokens, input_start, input_end, tokenizer, + orig_answer_text): + """Returns tokenized answer spans that better match the annotated answer.""" + + # The SQuAD annotations are character based. We first project them to + # whitespace-tokenized words. But then after WordPiece tokenization, we can + # often find a "better match". For example: + # + # Question: What year was John Smith born? + # Context: The leader was John Smith (1895-1943). + # Answer: 1895 + # + # The original whitespace-tokenized answer will be "(1895-1943).". However + # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match + # the exact answer, 1895. + # + # However, this is not always possible. Consider the following: + # + # Question: What country is the top exporter of electornics? + # Context: The Japanese electronics industry is the lagest in the world. + # Answer: Japan + # + # In this case, the annotator chose "Japan" as a character sub-span of + # the word "Japanese". Since our WordPiece tokenizer does not split + # "Japanese", we just use "Japanese" as the annotation. This is fairly rare + # in SQuAD, but does happen. + tok_answer_text = ' '.join(tokenizer(orig_answer_text)) + + for new_start in range(input_start, input_end + 1): + for new_end in range(input_end, new_start - 1, -1): + text_span = ' '.join(doc_tokens[new_start:(new_end + 1)]) + if text_span == tok_answer_text: + return (new_start, new_end) + + return (input_start, input_end) + + +def check_is_max_context(doc_spans, cur_span_index, position): + """Check if this is the 'max context' doc span for the token.""" + + # Because of the sliding window approach taken to scoring documents, a single + # token can appear in multiple documents. E.g. + # Doc: the man went to the store and bought a gallon of milk + # Span A: the man went to the + # Span B: to the store and bought + # Span C: and bought a gallon of + # ... + # + # Now the word 'bought' will have two scores from spans B and C. We only + # want to consider the score with "maximum context", which we define as + # the *minimum* of its left and right context (the *sum* of left and + # right context will always be the same, of course). + # + # In the example the maximum context for 'bought' would be span C since + # it has 1 left context and 3 right context, while span B has 4 left context + # and 0 right context. + best_score = None + best_span_index = None + for (span_index, doc_span) in enumerate(doc_spans): + end = doc_span.start + doc_span.length - 1 + if position < doc_span.start: + continue + if position > end: + continue + num_left_context = position - doc_span.start + num_right_context = end - position + score = min(num_left_context, num_right_context) + \ + 0.01 * doc_span.length + if best_score is None or score > best_score: + best_score = score + best_span_index = span_index + + return cur_span_index == best_span_index From 7b1749abd03ec63dce83a61d5b4673450243a1be Mon Sep 17 00:00:00 2001 From: Wang Date: Fri, 6 Dec 2019 16:57:06 +0800 Subject: [PATCH 19/59] merge two truncate func --- src/gluonnlp/data/preprocessing_utils.py | 33 ++++++++++-------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/src/gluonnlp/data/preprocessing_utils.py b/src/gluonnlp/data/preprocessing_utils.py index 63b5a90319..c55ce3a4b9 100644 --- a/src/gluonnlp/data/preprocessing_utils.py +++ b/src/gluonnlp/data/preprocessing_utils.py @@ -1,7 +1,7 @@ """Utility classes and functions for data processing""" __all__ = [ - 'truncate_seqs_equal', 'truncate_equal_by_len', 'ConcatSeqTransform', 'TokenizeAndPositionAlign', + 'truncate_seqs_equal', 'ConcatSeqTransform', 'TokenizeAndPositionAlign', 'get_doc_spans', 'align_position2doc_spans', 'improve_answer_span', 'check_is_max_context' ] @@ -10,9 +10,19 @@ import numpy.ma as ma -def truncate_equal_by_len(lens, max_len): +def truncate_seqs_equal(seqs, max_len): + """truncate a list of seqs so that the total length equals max length. + + Trying to truncate the seqs to equal length. + + Returns + ------- + list : list of truncated sequence keeping the origin order + """ + assert isinstance(seqs, list) + lens = list(map(len, seqs)) if sum(lens) <= max_len: - return lens + return seqs lens = ma.masked_array(lens, mask=[0] * len(lens)) while True: @@ -28,22 +38,7 @@ def truncate_equal_by_len(lens, max_len): for i in range(lens.count()) ] break - - return lens.data.tolist() - - -def truncate_seqs_equal(seqs, max_len): - """ - truncate a list of seqs so that the total length equals max length. - Trying to truncate the seqs to equal length. - - Returns - ------- - list : list of truncated sequence keeping the origin order - """ - assert isinstance(seqs, list) - lens = list(map(len, seqs)) - seqs = [seq[:length] for (seq, length) in zip(seqs, truncate_equal_by_len(lens, max_len))] + seqs = [seq[:length] for (seq, length) in zip(seqs, lens.data.tolist())] return seqs From 06701ce7fd519672dc7bc8154a5a9fed8e7be03c Mon Sep 17 00:00:00 2001 From: Wang Date: Fri, 6 Dec 2019 20:34:39 +0800 Subject: [PATCH 20/59] add more features --- .../data/data_preprocessing_transform.py | 48 ++++++-- src/gluonnlp/data/preprocessing_utils.py | 104 ++++++++++-------- 2 files changed, 93 insertions(+), 59 deletions(-) diff --git a/src/gluonnlp/data/data_preprocessing_transform.py b/src/gluonnlp/data/data_preprocessing_transform.py index 52beb46ce9..b1750d6962 100644 --- a/src/gluonnlp/data/data_preprocessing_transform.py +++ b/src/gluonnlp/data/data_preprocessing_transform.py @@ -8,7 +8,8 @@ import collections import numpy as np from .preprocessing_utils import truncate_seqs_equal, improve_answer_span, \ - ConcatSeqTransform, TokenizeAndPositionAlign, get_doc_spans, align_position2doc_spans + ConcatSeqTransform, TokenizeAndPositionAlign, get_doc_spans, align_position2doc_spans, \ + check_is_max_context class BertStyleGlueTransform: """Convert from gluonnlp.data.Glue* record to inputs for BERT-style model.""" @@ -62,6 +63,11 @@ def __call__(self, line): 'start_position', 'end_position', 'is_impossible' ]) +SquadBERTFeautre = collections.namedtuple('SquadBERTFeautre', ['example_id', 'qas_id', 'doc_tokens', + 'tokens', 'token_to_orig_map', 'token_is_max_context', 'input_ids', + 'p_mask', 'segment_ids', 'start_position', + 'end_position','is_impossible']) + class SQuADExampleTransform: """Convert from gluonnlp.data.SQuAD's record to SquadExample.""" @@ -100,8 +106,12 @@ def __call__(self, record): prev_is_whitespace = False char_to_word_offset.append(len(doc_tokens) - 1) - start_position = char_to_word_offset[answer_offset] if not is_impossible else -1 - end_position = char_to_word_offset[answer_offset + answer_length -1] if not is_impossible else -1 + if self.is_training: + start_position = -1 + end_position = -1 + else: + start_position = char_to_word_offset[answer_offset] if not is_impossible else -1 + end_position = char_to_word_offset[answer_offset + answer_length -1] if not is_impossible else -1 example = SquadExample(qas_id=qas_id, question_text=question_text, @@ -146,12 +156,13 @@ def __init__(self, self._doc_stride = doc_stride self._max_query_length = max_query_length self._get_example = SQuADExampleTransform(training=is_training) + self._is_training = is_training def __call__(self, line): example = self._get_example(line) - query_tokenized = [self._cls_token] + self._tokenizer(example.question_text)[:self._max_query_length] + query_tokenized = [self._cls_token] + self._tokenizer(example.question_text)[: self._max_query_length] #get the start/end position of the answer in tokenized paragraph - (tok_start_position, tok_end_position), all_doc_tokens = \ + (tok_start_position, tok_end_position), all_doc_tokens, _, tok_to_orig_index= \ TokenizeAndPositionAlign(example.doc_tokens, [example.start_position, example.end_position], @@ -164,17 +175,30 @@ def __call__(self, line): tok_start_position, tok_end_position = -1, -1 #get doc spans - doc_spans, doc_spans_indices = get_doc_spans(all_doc_tokens, self._max_seq_length - self._max_query_length - 3, + doc_spans, doc_spans_indices = get_doc_spans(all_doc_tokens, self._max_seq_length - len(query_tokenized) - 2, self._doc_stride) - #get sequence features: tokens, segment_ids, p_masks - seq_features = [ConcatSeqTransform([query_tokenized, doc_span], [[self._sep_token]] * 2) - for doc_span in doc_spans] #get the new start/end position positions = [align_position2doc_spans([tok_start_position, tok_end_position], doc_idx, offset=len(query_tokenized) + 1, default_value=0) for doc_idx in doc_spans_indices] - features = [[example.example_id] + [self._vocab[tokens], segment_id, p_mask] - + [start, end, example.is_impossible] - for (tokens, segment_id, p_mask), (start, end) in zip(seq_features, positions)] + token_is_max_context = [{len(query_tokenized) + p: check_is_max_context(doc_spans_indices, i, + p + doc_spans_indices[i][0]) + for p in range(len(doc_span))} + for (i, doc_span) in enumerate(doc_spans)] + token_to_orig_map = [{len(query_tokenized) + p: tok_to_orig_index[p + doc_spans_indices[i][0]] + for p in range(len(doc_span))} + for (i, doc_span) in enumerate(doc_spans)] + #get sequence features: tokens, segment_ids, p_masks + seq_features = [ConcatSeqTransform([query_tokenized, doc_span], [[self._sep_token]] * 2) + for doc_span in doc_spans] + + features = [SquadBERTFeautre(example_id=example.example_id, qas_id=example.qas_id, + doc_tokens=example.doc_tokens, tokens=tokens, token_to_orig_map=t2o, + token_is_max_context=is_max, input_ids=self._vocab[tokens], + p_mask=p_mask, segment_ids=segment_ids, start_position=start, end_position=end, + is_impossible=example.is_impossible) for (tokens, segment_ids, p_mask), (start, end), + is_max, t2o in zip(seq_features, positions, + token_is_max_context, + token_to_orig_map)] return features diff --git a/src/gluonnlp/data/preprocessing_utils.py b/src/gluonnlp/data/preprocessing_utils.py index c55ce3a4b9..7946d61506 100644 --- a/src/gluonnlp/data/preprocessing_utils.py +++ b/src/gluonnlp/data/preprocessing_utils.py @@ -91,13 +91,15 @@ def TokenizeAndPositionAlign(origin_text, positions, tokenizer): if not isinstance(positions, list): positions = [positions] orig_to_tok_index = [] + tok_to_orig_index = [] tokenized_text = [] for (i, token) in enumerate(origin_text): orig_to_tok_index.append(len(tokenized_text)) sub_tokens = tokenizer(token) tokenized_text += sub_tokens + tok_to_orig_index += [i] * len(sub_tokens) new_positions = [orig_to_tok_index[p] for p in positions] - return new_positions, tokenized_text + return new_positions, tokenized_text, orig_to_tok_index, tok_to_orig_index def get_doc_spans(full_doc, max_length, doc_stride): @@ -123,6 +125,8 @@ def get_doc_spans(full_doc, max_length, doc_stride): length = min(max_length, len(full_doc) - start_offset) end_offset = start_offset + length doc_spans.append((full_doc[start_offset: end_offset], (start_offset, end_offset))) + if start_offset + length == len(full_doc): + break start_offset += min(length, doc_stride) return list(zip(*doc_spans)) @@ -142,30 +146,31 @@ def align_position2doc_spans(positions, doc_spans_indices, def improve_answer_span(doc_tokens, input_start, input_end, tokenizer, orig_answer_text): - """Returns tokenized answer spans that better match the annotated answer.""" - - # The SQuAD annotations are character based. We first project them to - # whitespace-tokenized words. But then after WordPiece tokenization, we can - # often find a "better match". For example: - # - # Question: What year was John Smith born? - # Context: The leader was John Smith (1895-1943). - # Answer: 1895 - # - # The original whitespace-tokenized answer will be "(1895-1943).". However - # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match - # the exact answer, 1895. - # - # However, this is not always possible. Consider the following: - # - # Question: What country is the top exporter of electornics? - # Context: The Japanese electronics industry is the lagest in the world. - # Answer: Japan - # - # In this case, the annotator chose "Japan" as a character sub-span of - # the word "Japanese". Since our WordPiece tokenizer does not split - # "Japanese", we just use "Japanese" as the annotation. This is fairly rare - # in SQuAD, but does happen. + """Returns tokenized answer spans that better match the annotated answer. + + The SQuAD annotations are character based. We first project them to + whitespace-tokenized words. But then after WordPiece tokenization, we can + often find a "better match". For example: + + Question: What year was John Smith born? + Context: The leader was John Smith (1895-1943). + Answer: 1895 + + The original whitespace-tokenized answer will be "(1895-1943).". However + after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match + the exact answer, 1895. + + However, this is not always possible. Consider the following: + + Question: What country is the top exporter of electornics? + Context: The Japanese electronics industry is the lagest in the world. + Answer: Japan + + In this case, the annotator chose "Japan" as a character sub-span of + the word "Japanese". Since our WordPiece tokenizer does not split + "Japanese", we just use "Japanese" as the annotation. This is fairly rare + in SQuAD, but does happen. + """ tok_answer_text = ' '.join(tokenizer(orig_answer_text)) for new_start in range(input_start, input_end + 1): @@ -178,36 +183,41 @@ def improve_answer_span(doc_tokens, input_start, input_end, tokenizer, def check_is_max_context(doc_spans, cur_span_index, position): - """Check if this is the 'max context' doc span for the token.""" - - # Because of the sliding window approach taken to scoring documents, a single - # token can appear in multiple documents. E.g. - # Doc: the man went to the store and bought a gallon of milk - # Span A: the man went to the - # Span B: to the store and bought - # Span C: and bought a gallon of - # ... - # - # Now the word 'bought' will have two scores from spans B and C. We only - # want to consider the score with "maximum context", which we define as - # the *minimum* of its left and right context (the *sum* of left and - # right context will always be the same, of course). - # - # In the example the maximum context for 'bought' would be span C since - # it has 1 left context and 3 right context, while span B has 4 left context - # and 0 right context. + """Check if this is the 'max context' doc span for the token. + + Because of the sliding window approach taken to scoring documents, a single + token can appear in multiple documents. E.g. + Doc: the man went to the store and bought a gallon of milk + Span A: the man went to the + Span B: to the store and bought + Span C: and bought a gallon of + ... + + Now the word 'bought' will have two scores from spans B and C. We only + want to consider the score with "maximum context", which we define as + the *minimum* of its left and right context (the *sum* of left and + right context will always be the same, of course). + + In the example the maximum context for 'bought' would be span C since + it has 1 left context and 3 right context, while span B has 4 left context + and 0 right context. + + Note that position is the absolute position in the origin text. + """ best_score = None best_span_index = None for (span_index, doc_span) in enumerate(doc_spans): - end = doc_span.start + doc_span.length - 1 - if position < doc_span.start: + start, end = doc_span + end -= 1 + length = end - start + 1 + if position < start: continue if position > end: continue - num_left_context = position - doc_span.start + num_left_context = position - start num_right_context = end - position score = min(num_left_context, num_right_context) + \ - 0.01 * doc_span.length + 0.01 * length if best_score is None or score > best_score: best_score = score best_span_index = span_index From ec868325b2dd02d133bedefa4f0e36aaf1121f61 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 11 Dec 2019 17:23:24 +0800 Subject: [PATCH 21/59] refactor --- scripts/bert/data/__init__.py | 2 +- .../bert}/data/preprocessing_utils.py | 73 ++- scripts/bert/data/qa.py | 520 ------------------ scripts/bert/finetune_classifier.py | 73 ++- scripts/bert/finetune_squad.py | 166 +++++- scripts/tests/test_preprocess_utils.py | 30 + src/gluonnlp/data/__init__.py | 8 +- .../data/data_preprocessing_transform.py | 204 ------- tests/unittest/test_dataset_transform.py | 172 ------ 9 files changed, 287 insertions(+), 961 deletions(-) rename {src/gluonnlp => scripts/bert}/data/preprocessing_utils.py (75%) delete mode 100644 scripts/bert/data/qa.py create mode 100644 scripts/tests/test_preprocess_utils.py delete mode 100644 src/gluonnlp/data/data_preprocessing_transform.py delete mode 100644 tests/unittest/test_dataset_transform.py diff --git a/scripts/bert/data/__init__.py b/scripts/bert/data/__init__.py index 67cf59d629..927ff5ed29 100644 --- a/scripts/bert/data/__init__.py +++ b/scripts/bert/data/__init__.py @@ -17,4 +17,4 @@ # pylint: disable=wildcard-import """BERT data.""" -from . import qa, classification, embedding, transform, ner, dataloader, pretrain +from . import classification, embedding, transform, ner, dataloader, pretrain diff --git a/src/gluonnlp/data/preprocessing_utils.py b/scripts/bert/data/preprocessing_utils.py similarity index 75% rename from src/gluonnlp/data/preprocessing_utils.py rename to scripts/bert/data/preprocessing_utils.py index 7946d61506..bb6e0b172e 100644 --- a/src/gluonnlp/data/preprocessing_utils.py +++ b/scripts/bert/data/preprocessing_utils.py @@ -1,9 +1,9 @@ """Utility classes and functions for data processing""" __all__ = [ - 'truncate_seqs_equal', 'ConcatSeqTransform', 'TokenizeAndPositionAlign', - 'get_doc_spans', 'align_position2doc_spans', 'improve_answer_span', 'check_is_max_context' -] + 'truncate_seqs_equal', 'concat_sequences', 'tokenize_and_align_positions', + 'get_doc_spans', 'align_position2doc_spans', 'improve_answer_span', 'check_is_max_context', + 'convert_squad_examples'] import collections import itertools @@ -42,7 +42,7 @@ def truncate_seqs_equal(seqs, max_len): return seqs -def ConcatSeqTransform(seqs, separators, separator_mask=1): +def concat_sequences(seqs, separators, separator_mask=1): """ Insert special tokens for sequence list or a single sequence. For sequence pairs, the input is a list of 2 strings: @@ -86,10 +86,8 @@ def ConcatSeqTransform(seqs, separators, separator_mask=1): return concat, segment_ids, p_mask -def TokenizeAndPositionAlign(origin_text, positions, tokenizer): +def tokenize_and_align_positions(origin_text, start_position, end_position, tokenizer): """Tokenize the text and align the origin positions to the corresponding position""" - if not isinstance(positions, list): - positions = [positions] orig_to_tok_index = [] tok_to_orig_index = [] tokenized_text = [] @@ -98,8 +96,11 @@ def TokenizeAndPositionAlign(origin_text, positions, tokenizer): sub_tokens = tokenizer(token) tokenized_text += sub_tokens tok_to_orig_index += [i] * len(sub_tokens) - new_positions = [orig_to_tok_index[p] for p in positions] - return new_positions, tokenized_text, orig_to_tok_index, tok_to_orig_index + + start_position = orig_to_tok_index[start_position] + end_position = orig_to_tok_index[end_position + 1] - 1 if end_position < len(origin_text) - 1 \ + else len(tokenized_text) - 1 + return start_position, end_position, tokenized_text, orig_to_tok_index, tok_to_orig_index def get_doc_spans(full_doc, max_length, doc_stride): @@ -137,9 +138,9 @@ def align_position2doc_spans(positions, doc_spans_indices, if not isinstance(positions, list): positions = [positions] doc_start, doc_end = doc_spans_indices - if all_in_span and not all([p in range(doc_start, doc_end + 1) for p in positions]): + if all_in_span and not all([p in range(doc_start, doc_end) for p in positions]): return [default_value] * len(positions) - new_positions = [p - doc_start + offset if p in range(doc_start, doc_end + 1) + new_positions = [p - doc_start + offset if p in range(doc_start, doc_end) else default_value for p in positions] return new_positions @@ -223,3 +224,53 @@ def check_is_max_context(doc_spans, cur_span_index, position): best_span_index = span_index return cur_span_index == best_span_index + + +SquadExample = collections.namedtuple('SquadExample', [ + 'qas_id', 'question_text', 'doc_tokens', 'example_id', 'orig_answer_text', + 'start_position', 'end_position', 'is_impossible']) + + +def convert_squad_examples(record, is_training): + """read a single entry of gluonnlp.data.SQuAD and convert it to an example""" + example_id = record[0] + qas_id = record[1] + question_text = record[2] + paragraph_text = record[3] + orig_answer_text = record[4][0] if record[4] else '' + answer_offset = record[5][0] if record[5] else '' + is_impossible = record[6] if len(record) == 7 else False + + answer_length = len(orig_answer_text) + doc_tokens = [] + + char_to_word_offset = [] + prev_is_whitespace = True + + for c in paragraph_text: + if str.isspace(c): + prev_is_whitespace = True + else: + if prev_is_whitespace: + doc_tokens.append(c) + else: + doc_tokens[-1] += c + prev_is_whitespace = False + char_to_word_offset.append(len(doc_tokens) - 1) + + if not is_training: + start_position = -1 + end_position = -1 + else: + start_position = char_to_word_offset[answer_offset] if not is_impossible else -1 + end_position = char_to_word_offset[answer_offset + answer_length - 1] if not is_impossible else -1 + + example = SquadExample(qas_id=qas_id, + question_text=question_text, + doc_tokens=doc_tokens, + example_id=example_id, + orig_answer_text=orig_answer_text, + start_position=start_position, + end_position=end_position, + is_impossible=is_impossible) + return example diff --git a/scripts/bert/data/qa.py b/scripts/bert/data/qa.py deleted file mode 100644 index e6ef2294b4..0000000000 --- a/scripts/bert/data/qa.py +++ /dev/null @@ -1,520 +0,0 @@ -# Copyright 2018 The Google AI Language Team Authors and DMLC. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""BERT for QA datasets.""" -import collections -import multiprocessing as mp -import time -from functools import partial - -from mxnet.gluon.data import SimpleDataset -from gluonnlp.data.utils import whitespace_splitter - -__all__ = ['SQuADTransform', 'preprocess_dataset'] - -class SquadExample: - """A single training/test example for SQuAD question. - - For examples without an answer, the start and end position are -1. - """ - - def __init__(self, - qas_id, - question_text, - doc_tokens, - example_id, - orig_answer_text=None, - start_position=None, - end_position=None, - is_impossible=False): - self.qas_id = qas_id - self.question_text = question_text - self.doc_tokens = doc_tokens - self.orig_answer_text = orig_answer_text - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - self.example_id = example_id - -def _worker_fn(example, transform): - """Function for processing data in worker process.""" - feature = transform(example) - return feature - - -def preprocess_dataset(dataset, transform, num_workers=8): - """Use multiprocessing to perform transform for dataset. - - Parameters - ---------- - dataset: dataset-like object - Source dataset. - transform: callable - Transformer function. - num_workers: int, default 8 - The number of multiprocessing workers to use for data preprocessing. - - """ - worker_fn = partial(_worker_fn, transform=transform) - start = time.time() - - pool = mp.Pool(num_workers) - dataset_transform = [] - dataset_len = [] - for data in pool.map(worker_fn, dataset): - if data: - for _data in data: - dataset_transform.append(_data[:-1]) - dataset_len.append(_data[-1]) - - dataset = SimpleDataset(dataset_transform).transform( - lambda x: (x[0], x[1], x[2], x[3], x[4], x[5])) - end = time.time() - pool.close() - print('Done! Transform dataset costs %.2f seconds.' % (end-start)) - return dataset, dataset_len - - -class SQuADFeature: - """Single feature of a single example transform of the SQuAD question. - - """ - - def __init__(self, - example_id, - qas_id, - doc_tokens, - doc_span_index, - tokens, - token_to_orig_map, - token_is_max_context, - input_ids, - valid_length, - segment_ids, - start_position, - end_position, - is_impossible): - self.example_id = example_id - self.qas_id = qas_id - self.doc_tokens = doc_tokens - self.doc_span_index = doc_span_index - self.tokens = tokens - self.token_to_orig_map = token_to_orig_map - self.token_is_max_context = token_is_max_context - self.input_ids = input_ids - self.valid_length = valid_length - self.segment_ids = segment_ids - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - - -class SQuADTransform: - """Dataset Transformation for BERT-style QA. - - The transformation is processed in the following steps: - - Convert from gluonnlp.data.SQuAD's record to SquadExample. - - Tokenize the question_text in the example. - - For examples where the document is too long, - use a sliding window to split into multiple features and - record whether each token is a maximum context. - - Tokenize the split document chunks. - - Combine the token of question_text with the token - of the document and insert [CLS] and [SEP]. - - Generate the start position and end position of the answer. - - Generate valid length. - - E.g: - - Inputs: - - question_text: 'When did BBC Japan begin broadcasting?' - doc_tokens: ['BBC','Japan','was','a','general','entertainment','channel,', - 'which','operated','between','December','2004','and','April', - '2006.','It','ceased','operations','after','its','Japanese', - 'distributor','folded.'] - start_position: 10 - end_position: 11 - orig_answer_text: 'December 2004' - - Processed: - - tokens: ['[CLS]','when','did','bbc','japan','begin','broadcasting','?', - '[SEP]','bbc','japan','was','a','general','entertainment','channel', - ',','which','operated','between','december','2004','and','april', - '2006','.','it','ceased','operations','after','its','japanese', - 'distributor','folded','.','[SEP]'] - segment_ids: [0,0,0,0,0,0,0,0,0,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] - start_position: 20 - end_position: 21 - valid_length: 36 - - Because of the sliding window approach taken to scoring documents, a single - token can appear in multiple documents. - So you need to record whether each token is a maximum context. E.g. - Doc: the man went to the store and bought a gallon of milk - Span A: the man went to the - Span B: to the store and bought - Span C: and bought a gallon of - ... - - Now the word 'bought' will have two scores from spans B and C. We only - want to consider the score with "maximum context", which we define as - the *minimum* of its left and right context (the *sum* of left and - right context will always be the same, of course). - - In the example the maximum context for 'bought' would be span C since - it has 1 left context and 3 right context, while span B has 4 left context - and 0 right context. - - Parameters - ---------- - tokenizer : BERTTokenizer. - Tokenizer for the sentences. - labels : list of int. - List of all label ids for the classification task. - max_seq_length : int, default 384 - Maximum sequence length of the sentences. - doc_stride : int, default 128 - When splitting up a long document into chunks, - how much stride to take between chunks. - max_query_length : int, default 64 - The maximum length of the query tokens. - is_pad : bool, default True - Whether to pad the sentences to maximum length. - is_training : bool, default True - Whether to run training. - do_lookup : bool, default True - Whether to do vocabulary lookup for convert tokens to indices. - """ - - def __init__(self, - tokenizer, - max_seq_length=384, - doc_stride=128, - max_query_length=64, - is_pad=True, - is_training=True, - do_lookup=True): - self.tokenizer = tokenizer - self.max_seq_length = max_seq_length - self.max_query_length = max_query_length - self.doc_stride = doc_stride - self.is_pad = is_pad - self.is_training = is_training - self.do_lookup = do_lookup - - def _is_whitespace(self, c): - if c == ' ' or c == '\t' or c == '\r' or c == '\n' or ord( - c) == 0x202F: - return True - return False - - def _toSquadExample(self, record): - example_id = record[0] - qas_id = record[1] - question_text = record[2] - paragraph_text = record[3] - orig_answer_text = record[4][0] if record[4] else '' - answer_offset = record[5][0] if record[5] else '' - is_impossible = record[6] if len(record) == 7 else False - - doc_tokens = [] - - char_to_word_offset = [] - prev_is_whitespace = True - for c in paragraph_text: - if self._is_whitespace(c): - prev_is_whitespace = True - else: - if prev_is_whitespace: - doc_tokens.append(c) - else: - doc_tokens[-1] += c - prev_is_whitespace = False - char_to_word_offset.append(len(doc_tokens) - 1) - - start_position = -1 - end_position = -1 - - if self.is_training: - if not is_impossible: - answer_length = len(orig_answer_text) - start_position = char_to_word_offset[answer_offset] - end_position = char_to_word_offset[ - answer_offset + answer_length - 1] - # Only add answers where the text can be exactly recovered from the - # document. If this CAN'T happen it's likely due to weird Unicode - # stuff so we will just skip the example. - # - # Note that this means for training mode, every example is NOT - # guaranteed to be preserved. - actual_text = ' '.join( - doc_tokens[start_position:(end_position + 1)]) - cleaned_answer_text = ' '.join( - whitespace_splitter(orig_answer_text.strip())) - if actual_text.find(cleaned_answer_text) == -1: - print('Could not find answer: %s vs. %s' % - (actual_text, cleaned_answer_text)) - return None - else: - start_position = -1 - end_position = -1 - orig_answer_text = '' - - example = SquadExample( - qas_id=qas_id, - question_text=question_text, - doc_tokens=doc_tokens, - example_id=example_id, - orig_answer_text=orig_answer_text, - start_position=start_position, - end_position=end_position, - is_impossible=is_impossible) - return example - - def _transform(self, *record): - example = self._toSquadExample(record) - if not example: - return None - - padding = self.tokenizer.vocab.padding_token - if self.do_lookup: - padding = self.tokenizer.vocab[padding] - features = [] - query_tokens = self.tokenizer(example.question_text) - - if len(query_tokens) > self.max_query_length: - query_tokens = query_tokens[0:self.max_query_length] - - tok_to_orig_index = [] - orig_to_tok_index = [] - all_doc_tokens = [] - for (i, token) in enumerate(example.doc_tokens): - orig_to_tok_index.append(len(all_doc_tokens)) - sub_tokens = self.tokenizer(token) - for sub_token in sub_tokens: - tok_to_orig_index.append(i) - all_doc_tokens.append(sub_token) - - tok_start_position = None - tok_end_position = None - if self.is_training and example.is_impossible: - tok_start_position = -1 - tok_end_position = -1 - if self.is_training and not example.is_impossible: - tok_start_position = orig_to_tok_index[example.start_position] - if example.end_position < len(example.doc_tokens) - 1: - tok_end_position = orig_to_tok_index[example.end_position + - 1] - 1 - else: - tok_end_position = len(all_doc_tokens) - 1 - (tok_start_position, tok_end_position) = _improve_answer_span( - all_doc_tokens, tok_start_position, tok_end_position, - self.tokenizer, example.orig_answer_text) - - # The -3 accounts for [CLS], [SEP] and [SEP] - max_tokens_for_doc = self.max_seq_length - len(query_tokens) - 3 - - # We can have documents that are longer than the maximum sequence length. - # To deal with this we do a sliding window approach, where we take chunks - # of the up to our max length with a stride of `doc_stride`. - _DocSpan = collections.namedtuple( # pylint: disable=invalid-name - 'DocSpan', ['start', 'length']) - doc_spans = [] - start_offset = 0 - while start_offset < len(all_doc_tokens): - length = len(all_doc_tokens) - start_offset - if length > max_tokens_for_doc: - length = max_tokens_for_doc - doc_spans.append(_DocSpan(start=start_offset, length=length)) - if start_offset + length == len(all_doc_tokens): - break - start_offset += min(length, self.doc_stride) - - for (doc_span_index, doc_span) in enumerate(doc_spans): - tokens = [] - token_to_orig_map = {} - token_is_max_context = {} - segment_ids = [] - tokens.append(self.tokenizer.vocab.cls_token) - segment_ids.append(0) - for token in query_tokens: - tokens.append(token) - segment_ids.append(0) - tokens.append(self.tokenizer.vocab.sep_token) - segment_ids.append(0) - - for i in range(doc_span.length): - split_token_index = doc_span.start + i - token_to_orig_map[len( - tokens)] = tok_to_orig_index[split_token_index] - - is_max_context = _check_is_max_context( - doc_spans, doc_span_index, split_token_index) - token_is_max_context[len(tokens)] = is_max_context - tokens.append(all_doc_tokens[split_token_index]) - segment_ids.append(1) - tokens.append(self.tokenizer.vocab.sep_token) - segment_ids.append(1) - - if self.do_lookup: - input_ids = self.tokenizer.convert_tokens_to_ids(tokens) - else: - input_ids = tokens - - # The mask has 1 for real tokens and 0 for padding tokens. Only real - # tokens are attended to. - valid_length = len(input_ids) - - # Zero-pad up to the sequence length. - if self.is_pad: - while len(input_ids) < self.max_seq_length: - input_ids.append(padding) - segment_ids.append(padding) - - assert len(input_ids) == self.max_seq_length - assert len(segment_ids) == self.max_seq_length - - start_position = 0 - end_position = 0 - if self.is_training and not example.is_impossible: - # For training, if our document chunk does not contain an annotation - # we throw it out, since there is nothing to predict. - doc_start = doc_span.start - doc_end = doc_span.start + doc_span.length - 1 - out_of_span = False - if not (tok_start_position >= doc_start - and tok_end_position <= doc_end): - out_of_span = True - if out_of_span: - start_position = 0 - end_position = 0 - else: - doc_offset = len(query_tokens) + 2 - start_position = tok_start_position - doc_start + doc_offset - end_position = tok_end_position - doc_start + doc_offset - - if self.is_training and example.is_impossible: - start_position = 0 - end_position = 0 - - features.append(SQuADFeature(example_id=example.example_id, - qas_id=example.qas_id, - doc_tokens=example.doc_tokens, - doc_span_index=doc_span_index, - tokens=tokens, - token_to_orig_map=token_to_orig_map, - token_is_max_context=token_is_max_context, - input_ids=input_ids, - valid_length=valid_length, - segment_ids=segment_ids, - start_position=start_position, - end_position=end_position, - is_impossible=example.is_impossible)) - return features - - def __call__(self, record): - examples = self._transform(*record) - if not examples: - return None - features = [] - - for _example in examples: - feature = [] - feature.append(_example.example_id) - feature.append(_example.input_ids) - feature.append(_example.segment_ids) - feature.append(_example.valid_length) - feature.append(_example.start_position) - feature.append(_example.end_position) - feature.append(len(_example.input_ids)) - features.append(feature) - - return features - - -def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, - orig_answer_text): - """Returns tokenized answer spans that better match the annotated answer.""" - - # The SQuAD annotations are character based. We first project them to - # whitespace-tokenized words. But then after WordPiece tokenization, we can - # often find a "better match". For example: - # - # Question: What year was John Smith born? - # Context: The leader was John Smith (1895-1943). - # Answer: 1895 - # - # The original whitespace-tokenized answer will be "(1895-1943).". However - # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match - # the exact answer, 1895. - # - # However, this is not always possible. Consider the following: - # - # Question: What country is the top exporter of electornics? - # Context: The Japanese electronics industry is the lagest in the world. - # Answer: Japan - # - # In this case, the annotator chose "Japan" as a character sub-span of - # the word "Japanese". Since our WordPiece tokenizer does not split - # "Japanese", we just use "Japanese" as the annotation. This is fairly rare - # in SQuAD, but does happen. - tok_answer_text = ' '.join(tokenizer(orig_answer_text)) - - for new_start in range(input_start, input_end + 1): - for new_end in range(input_end, new_start - 1, -1): - text_span = ' '.join(doc_tokens[new_start:(new_end + 1)]) - if text_span == tok_answer_text: - return (new_start, new_end) - - return (input_start, input_end) - - -def _check_is_max_context(doc_spans, cur_span_index, position): - """Check if this is the 'max context' doc span for the token.""" - - # Because of the sliding window approach taken to scoring documents, a single - # token can appear in multiple documents. E.g. - # Doc: the man went to the store and bought a gallon of milk - # Span A: the man went to the - # Span B: to the store and bought - # Span C: and bought a gallon of - # ... - # - # Now the word 'bought' will have two scores from spans B and C. We only - # want to consider the score with "maximum context", which we define as - # the *minimum* of its left and right context (the *sum* of left and - # right context will always be the same, of course). - # - # In the example the maximum context for 'bought' would be span C since - # it has 1 left context and 3 right context, while span B has 4 left context - # and 0 right context. - best_score = None - best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): - end = doc_span.start + doc_span.length - 1 - if position < doc_span.start: - continue - if position > end: - continue - num_left_context = position - doc_span.start - num_right_context = end - position - score = min(num_left_context, num_right_context) + \ - 0.01 * doc_span.length - if best_score is None or score > best_score: - best_score = score - best_span_index = span_index - - return cur_span_index == best_span_index diff --git a/scripts/bert/finetune_classifier.py b/scripts/bert/finetune_classifier.py index c4f64f0804..ec04aaaf76 100644 --- a/scripts/bert/finetune_classifier.py +++ b/scripts/bert/finetune_classifier.py @@ -46,13 +46,13 @@ import gluonnlp as nlp from gluonnlp.data import BERTTokenizer from gluonnlp.model import BERTClassifier, RoBERTaClassifier - +from functools import partial from data.classification import MRPCTask, QQPTask, RTETask, STSBTask, SSTTask from data.classification import QNLITask, CoLATask, MNLITask, WNLITask, XNLITask from data.classification import LCQMCTask, ChnSentiCorpTask -from data.transform import BERTDatasetTransform +from data.preprocessing_utils import truncate_seqs_equal, concat_sequences -nlp.utils.check_version('0.8.1', warning_only=True) +#nlp.utils.check_version('0.8.1', warning_only=True) tasks = { 'MRPC': MRPCTask(), @@ -286,23 +286,60 @@ else: bert_tokenizer = BERTTokenizer(vocabulary, lower=do_lower_case) -def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab, pad=False): + +def convert_examples_to_features(example, tokenizer=None, truncate_length=512, cls_token=None, sep_token=None, + class_labels=None, label_alias=None, vocab=None, is_test=False): + assert tokenizer is not None + vocab = tokenizer.vocab if vocab is None else vocab + if not is_test: + label_dtype = 'int32' if class_labels else 'float32' + # get the label + label = example[-1] + example = example[:-1] + #create label maps if classification task + if class_labels: + label_map = {} + for (i, l) in enumerate(class_labels): + label_map[l] = i + if label_alias: + for key in label_alias: + label_map[key] = label_map[label_alias[key]] + label = label_map[label] + label = np.array([label], dtype=label_dtype) + + # tokenize raw text + tokens_raw = [tokenizer(l) for l in example] + # truncate to the truncate_length, + tokens_trun = truncate_seqs_equal(tokens_raw, truncate_length) + # concate the sequences with special tokens + tokens_trun[0] = [cls_token] + tokens_trun[0] + tokens, segment_ids, _ = concat_sequences( + tokens_trun, [[sep_token]] * len(tokens_trun)) + # convert the token to ids + input_ids = vocab[tokens] + valid_length = len(input_ids) + if not is_test: + return input_ids, valid_length, segment_ids, label + else: + return input_ids, valid_length, segment_ids + +def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab): """Train/eval Data preparation function.""" # transformation for data train and dev - label_dtype = 'float32' if not task.class_labels else 'int32' - trans = BERTDatasetTransform(tokenizer, max_len, - vocab=vocab, - class_labels=task.class_labels, - label_alias=task.label_alias, - pad=pad, pair=task.is_pair, - has_label=True) + label_dtype = 'int32' if task.class_labels else 'float32' + truncate_length = max_len - 3 if task.is_pair else max_len - 2 + trans = partial(convert_examples_to_features, tokenizer=tokenizer, truncate_length=truncate_length, + cls_token=vocab.cls_token if not use_roberta else vocab.bos_token, + sep_token=vocab.sep_token if not use_roberta else vocab.eos_token, + class_labels=task.class_labels, + label_alias=task.label_alias) # data train # task.dataset_train returns (segment_name, dataset) train_tsv = task.dataset_train()[1] data_train = mx.gluon.data.SimpleDataset(list(map(trans, train_tsv))) data_train_len = data_train.transform( - lambda input_id, length, segment_id, label_id: length, lazy=False) + lambda _, valid_length, segment_ids, label: valid_length, lazy=False) # bucket sampler for training pad_val = vocabulary[vocabulary.padding_token] batchify_fn = nlp.data.batchify.Tuple( @@ -342,11 +379,11 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab, nlp.data.batchify.Pad(axis=0, pad_val=pad_val), nlp.data.batchify.Stack(), nlp.data.batchify.Pad(axis=0, pad_val=0)) # transform for data test - test_trans = BERTDatasetTransform(tokenizer, max_len, - vocab=vocab, - class_labels=None, - pad=pad, pair=task.is_pair, - has_label=False) + test_trans = partial(convert_examples_to_features, tokenizer=tokenizer, truncate_length=max_len, + cls_token=vocab.cls_token if not use_roberta else vocab.bos_token, + sep_token=vocab.sep_token if not use_roberta else vocab.eos_token, + class_labels=None, + is_test=True) # data test. For MNLI, more than one test set is available test_tsv = task.dataset_test() @@ -367,7 +404,7 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab, # Get the loader. logging.info('processing dataset...') train_data, dev_data_list, test_data_list, num_train_examples = preprocess_data( - bert_tokenizer, task, batch_size, dev_batch_size, args.max_len, vocabulary, args.pad) + bert_tokenizer, task, batch_size, dev_batch_size, args.max_len, vocabulary) def test(loader_test, segment): diff --git a/scripts/bert/finetune_squad.py b/scripts/bert/finetune_squad.py index 16c8615853..6ec6f9c9d2 100644 --- a/scripts/bert/finetune_squad.py +++ b/scripts/bert/finetune_squad.py @@ -37,19 +37,23 @@ import logging import os import io -import copy import random import time import warnings +import itertools import numpy as np import mxnet as mx +import multiprocessing as mp import gluonnlp as nlp from gluonnlp.data import SQuAD +from functools import partial from model.qa import BertForQALoss, BertForQA -from data.qa import SQuADTransform, preprocess_dataset from bert_qa_evaluate import get_F1_EM, predict, PredResult +from data.preprocessing_utils import truncate_seqs_equal, improve_answer_span, \ + concat_sequences, tokenize_and_align_positions, get_doc_spans, align_position2doc_spans, \ + check_is_max_context, convert_squad_examples np.random.seed(6) random.seed(6) @@ -313,25 +317,24 @@ def train(): """Training function.""" - segment = 'train' if not args.debug else 'dev' + segment = 'train' #if not args.debug else 'dev' log.info('Loading %s data...', segment) if version_2: train_data = SQuAD(segment, version='2.0') else: train_data = SQuAD(segment, version='1.1') if args.debug: - sampled_data = [train_data[i] for i in range(1000)] + sampled_data = [train_data[i] for i in range(0, 10000)] train_data = mx.gluon.data.SimpleDataset(sampled_data) log.info('Number of records in Train data:{}'.format(len(train_data))) - - train_data_transform, _ = preprocess_dataset( - train_data, SQuADTransform( - copy.copy(tokenizer), + train_data_transform = preprocess_dataset( + tokenizer, + train_data, max_seq_length=max_seq_length, doc_stride=doc_stride, max_query_length=max_query_length, - is_pad=True, - is_training=True)) + input_features=True) + log.info('The number of examples after preprocessing:{}'.format( len(train_data_transform))) @@ -442,35 +445,33 @@ def set_new_lr(step_num, batch_id): def evaluate(): - """Evaluate the model on validation dataset. - """ + """Evaluate the model on validation dataset.""" log.info('Loading dev data...') if version_2: dev_data = SQuAD('dev', version='2.0') else: dev_data = SQuAD('dev', version='1.1') if args.debug: - sampled_data = [dev_data[0], dev_data[1], dev_data[2]] + sampled_data = [dev_data[i] for i in range(100)] dev_data = mx.gluon.data.SimpleDataset(sampled_data) log.info('Number of records in dev data:{}'.format(len(dev_data))) - dev_dataset = dev_data.transform( - SQuADTransform( - copy.copy(tokenizer), - max_seq_length=max_seq_length, - doc_stride=doc_stride, - max_query_length=max_query_length, - is_pad=False, - is_training=False)._transform, lazy=False) + dev_dataset = preprocess_dataset( + tokenizer, + dev_data, + max_seq_length=max_seq_length, + doc_stride=doc_stride, + max_query_length=max_query_length, + input_features=False) + + dev_data_transform = preprocess_dataset( + tokenizer, + dev_data, + max_seq_length=max_seq_length, + doc_stride=doc_stride, + max_query_length=max_query_length, + input_features=True) - dev_data_transform, _ = preprocess_dataset( - dev_data, SQuADTransform( - copy.copy(tokenizer), - max_seq_length=max_seq_length, - doc_stride=doc_stride, - max_query_length=max_query_length, - is_pad=False, - is_training=False)) log.info('The number of examples after preprocessing:{}'.format( len(dev_data_transform))) @@ -536,6 +537,113 @@ def evaluate(): log.info(F1_EM) +SquadBERTFeautre = collections.namedtuple('SquadBERTFeautre', ['example_id', 'qas_id', 'doc_tokens', 'valid_length', + 'tokens', 'token_to_orig_map', 'token_is_max_context', 'input_ids', + 'p_mask', 'segment_ids', 'start_position', + 'end_position','is_impossible']) + + +def convert_examples_to_features(example, + tokenizer=None, + cls_token=None, + sep_token=None, + vocab=None, + max_seq_length=384, + doc_stride=128, + max_query_length=64, + cls_index=0): + """convert the examples to the BERT features""" + query_tokenized = [cls_token] + tokenizer(example.question_text)[: max_query_length] + #tokenize paragraph and get start/end position of the answer in tokenized paragraph + tok_start_position, tok_end_position, all_doc_tokens, _, tok_to_orig_index= \ + tokenize_and_align_positions(example.doc_tokens, + example.start_position, + example.end_position, + tokenizer) + # get doc spans using sliding window + doc_spans, doc_spans_indices = get_doc_spans(all_doc_tokens, max_seq_length - len(query_tokenized) - 2, + doc_stride) + + if not example.is_impossible: + (tok_start_position, tok_end_position) = improve_answer_span( + all_doc_tokens, tok_start_position, tok_end_position, + tokenizer, example.orig_answer_text) + # get the new start/end position + positions = [align_position2doc_spans([tok_start_position, tok_end_position], doc_idx, + offset=len(query_tokenized) + 1, + default_value=0) for doc_idx in doc_spans_indices] + else: + # if the question is impossible to answer(in squad2.0), set the start/end position to cls index + positions = [[cls_index, cls_index] for _ in doc_spans_indices] + + token_is_max_context = [{len(query_tokenized) + p: check_is_max_context(doc_spans_indices, i, + p + doc_spans_indices[i][0]) + for p in range(len(doc_span))} + for (i, doc_span) in enumerate(doc_spans)] + token_to_orig_map = [{len(query_tokenized) + p + 1: tok_to_orig_index[p + doc_spans_indices[i][0]] + for p in range(len(doc_span))} + for (i, doc_span) in enumerate(doc_spans)] + + #get sequence features: tokens, segment_ids, p_masks + seq_features = [concat_sequences([query_tokenized, doc_span], [[sep_token]] * 2) + for doc_span in doc_spans] + + features = [SquadBERTFeautre(example_id=example.example_id, qas_id=example.qas_id, + doc_tokens=example.doc_tokens, valid_length=len(tokens), tokens=tokens, + token_to_orig_map=t2o, token_is_max_context=is_max, input_ids=vocab[tokens], + p_mask=p_mask, segment_ids=segment_ids, start_position=start, end_position=end, + is_impossible=example.is_impossible) + for (tokens, segment_ids, p_mask), (start, end), is_max, t2o + in zip(seq_features, positions, token_is_max_context, token_to_orig_map)] + return features + + +def preprocess_dataset(tokenizer, + dataset, + vocab = None, + max_seq_length=384, + doc_stride=128, + max_query_length=64, + input_features=True, + num_workers=4): + + """Loads a dataset into features""" + vocab = tokenizer.vocab if vocab is None else vocab + trans = partial(convert_examples_to_features, + tokenizer=tokenizer, + cls_token=vocab.cls_token, + sep_token=vocab.sep_token, + vocab=vocab, + max_seq_length=max_seq_length, + doc_stride=doc_stride, + max_query_length=max_query_length) + pool = mp.Pool(num_workers) + start = time.time() + + example_trans = partial(convert_squad_examples, is_training=input_features) + # convert the raw dataset into raw features + examples = pool.map(example_trans, dataset) + + if input_features: + # convert the full features into the training features + # Note that we will need the full features to make evaluation + # Due to using sliding windows in data preprocessing, + # we will have multiple examples for a single entry after processed. + # Thus we need to flatten it for training. + data_feature = mx.gluon.data.SimpleDataset(list(itertools.chain.from_iterable(pool.map(trans, examples)))) + data_feature = data_feature.transform(lambda *example: (example[0], # example_id + example[7], # inputs_id + example[9], # segment_ids + example[3], # valid_length, + example[10], # start_position, + example[11])) # end_position + else: + data_feature = mx.gluon.data.SimpleDataset(list(pool.map(trans, examples))) + + end = time.time() + print('Done! Transform dataset costs %.2f seconds.' % (end - start)) + return data_feature + if __name__ == '__main__': if not only_predict: train() diff --git a/scripts/tests/test_preprocess_utils.py b/scripts/tests/test_preprocess_utils.py new file mode 100644 index 0000000000..1f8a5dd7a1 --- /dev/null +++ b/scripts/tests/test_preprocess_utils.py @@ -0,0 +1,30 @@ +"""test data preprocessing utils""" + +import numpy as np +from ..bert.data.preprocessing_utils import truncate_seqs_equal, concat_sequences + + +def test_truncate(): + seqs = [[j*i for j in range(i)] for i in range(1,10)] + res1 = [[0], [0, 2], [0, 3, 6], [0, 4, 8], [0, 5, 10], [0, 6], [0, 7], [0, 8], [0, 9]] + seq = [[i for i in range(20)]] + + truncated = truncate_seqs_equal(seqs, 20) + truncated2 = truncate_seqs_equal(seq, 20) + + assert all(truncated == np.array(res1)) + assert all(truncated2[0] == np.array(seq)[0]) + +def test_concat_sequence(): + seqs = [[3 * i + j for j in range(3)] for i in range(3)] + seperators = [['a'], ['b'], ['c']] + res = concat_sequences(seqs, seperators) + assert res[0] == [0, 1, 2, 'a', 3, 4, 5, 'b', 6, 7, 8, 'c'] + assert res[1] == [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2] + assert res[2] == [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1] + + seperators = [['a'], [], ['b']] + res = concat_sequences(seqs, seperators) + assert res[0] == [0, 1, 2, 'a', 3, 4, 5, 6, 7, 8, 'b'] + assert res[1] == [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2] + assert res[2] == [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1] \ No newline at end of file diff --git a/src/gluonnlp/data/__init__.py b/src/gluonnlp/data/__init__.py index ba17254986..ac7d427dcc 100644 --- a/src/gluonnlp/data/__init__.py +++ b/src/gluonnlp/data/__init__.py @@ -21,8 +21,7 @@ from . import (batchify, candidate_sampler, conll, corpora, dataloader, dataset, question_answering, registry, sampler, sentiment, stream, transforms, translation, utils, - word_embedding_evaluation, intent_slot, glue, - data_preprocessing_transform, preprocessing_utils) + word_embedding_evaluation, intent_slot, glue) from .candidate_sampler import * from .conll import * from .glue import * @@ -39,13 +38,10 @@ from .utils import * from .word_embedding_evaluation import * from .intent_slot import * -from .data_preprocessing_transform import * -from .preprocessing_utils import * __all__ = (['batchify'] + utils.__all__ + transforms.__all__ + sampler.__all__ + dataset.__all__ + corpora.__all__ + sentiment.__all__ + word_embedding_evaluation.__all__ + stream.__all__ + conll.__all__ + translation.__all__ + registry.__all__ + question_answering.__all__ + dataloader.__all__ + candidate_sampler.__all__ + intent_slot.__all__ - + glue.__all__ + data_preprocessing_transform.__all__ - + preprocessing_utils.__all__)# pytype: disable=attribute-error + + glue.__all__)# pytype: disable=attribute-error diff --git a/src/gluonnlp/data/data_preprocessing_transform.py b/src/gluonnlp/data/data_preprocessing_transform.py deleted file mode 100644 index b1750d6962..0000000000 --- a/src/gluonnlp/data/data_preprocessing_transform.py +++ /dev/null @@ -1,204 +0,0 @@ -"""glue and squad transform.""" - -__all__ = [ - 'BertStyleGlueTransform','BertStyleSQuADTransform', - 'SQuADExampleTransform', 'SquadExample' -] - -import collections -import numpy as np -from .preprocessing_utils import truncate_seqs_equal, improve_answer_span, \ - ConcatSeqTransform, TokenizeAndPositionAlign, get_doc_spans, align_position2doc_spans, \ - check_is_max_context - -class BertStyleGlueTransform: - """Convert from gluonnlp.data.Glue* record to inputs for BERT-style model.""" - def __init__(self, - tokenizer, - truncate_length, - cls_token=None, - sep_token=None, - class_labels=None, - label_dtype='float32', - label_alias=None, - vocab=None): - self._vocab = tokenizer.vocab if vocab is None else vocab - self.class_labels = class_labels - self._label_dtype = label_dtype - self.label_alias = label_alias - if self.class_labels: - self._label_map = {} - for (i, label) in enumerate(self.class_labels): - self._label_map[label] = i - if self.label_alias: - for key in self.label_alias: - self._label_map[key] = self._label_map[ - self.label_alias[key]] - truncate_length += 3 if len(class_labels) > 1 else 2 - self._truncate_length = truncate_length - self._tokenizer = tokenizer - self._sep_token = sep_token - self._cls_token = cls_token - - def __call__(self, line): - #process the token pair - tokens_raw = [self._tokenizer(l) for l in line[:-1]] - tokens_trun = truncate_seqs_equal(tokens_raw, self._truncate_length) - tokens_trun[0] = [self._cls_token] + tokens_trun[0] - tokens, segment_ids, _ = ConcatSeqTransform( - tokens_trun, [[self._sep_token]] * len(tokens_trun)) - input_ids = self._vocab[tokens] - #get label - label = line[-1] - # map to int if class labels are available - if self.class_labels: - label = self._label_map[label] - label = np.array([label], dtype=self._label_dtype) - return input_ids, segment_ids, label - - - -SquadExample = collections.namedtuple('SquadExample', [ - 'qas_id', 'question_text', 'doc_tokens', 'example_id', 'orig_answer_text', - 'start_position', 'end_position', 'is_impossible' -]) - -SquadBERTFeautre = collections.namedtuple('SquadBERTFeautre', ['example_id', 'qas_id', 'doc_tokens', - 'tokens', 'token_to_orig_map', 'token_is_max_context', 'input_ids', - 'p_mask', 'segment_ids', 'start_position', - 'end_position','is_impossible']) - - -class SQuADExampleTransform: - """Convert from gluonnlp.data.SQuAD's record to SquadExample.""" - def __init__(self, training=True, version_2=False): - self.is_training = training - self._version_2 = version_2 - - def _is_whitespace(self, c): - if c == ' ' or c == '\t' or c == '\r' or c == '\n' or ord(c) == 0x202F: - return True - return False - - def __call__(self, record): - example_id = record[0] - qas_id = record[1] - question_text = record[2] - paragraph_text = record[3] - orig_answer_text = record[4][0] if record[4] else '' - answer_offset = record[5][0] if record[5] else '' - is_impossible = record[6] if len(record) == 7 else False - - answer_length = len(orig_answer_text) - doc_tokens = [] - - char_to_word_offset = [] - prev_is_whitespace = True - - for c in paragraph_text: - if self._is_whitespace(c): - prev_is_whitespace = True - else: - if prev_is_whitespace: - doc_tokens.append(c) - else: - doc_tokens[-1] += c - prev_is_whitespace = False - char_to_word_offset.append(len(doc_tokens) - 1) - - if self.is_training: - start_position = -1 - end_position = -1 - else: - start_position = char_to_word_offset[answer_offset] if not is_impossible else -1 - end_position = char_to_word_offset[answer_offset + answer_length -1] if not is_impossible else -1 - - example = SquadExample(qas_id=qas_id, - question_text=question_text, - doc_tokens=doc_tokens, - example_id=example_id, - orig_answer_text=orig_answer_text, - start_position=start_position, - end_position=end_position, - is_impossible=is_impossible) - return example - - -class BertStyleSQuADTransform: - """Dataset Transformation for BERT-style QA. - - The transformation is processed in the following steps: - - Convert from gluonnlp.data.SQuAD's record to SquadExample. - - Tokenize the question_text in the example. - - For examples where the document is too long, - use a sliding window to split into multiple features and - record whether each token is a maximum context. - - Tokenize the split document chunks. - - Combine the token of question_text with the token - of the document and insert [CLS] and [SEP]. - - Generate the start position and end position of the answer. - - Generate valid length. - """ - def __init__(self, - tokenizer, - cls_token, - sep_token, - vocab=None, - max_seq_length=384, - doc_stride=128, - max_query_length=64, - is_training=True): - self._tokenizer = tokenizer - self._vocab = tokenizer.vocab if vocab is None else vocab - self._cls_token = cls_token - self._sep_token = sep_token - self._max_seq_length = max_seq_length - self._doc_stride = doc_stride - self._max_query_length = max_query_length - self._get_example = SQuADExampleTransform(training=is_training) - self._is_training = is_training - - def __call__(self, line): - example = self._get_example(line) - query_tokenized = [self._cls_token] + self._tokenizer(example.question_text)[: self._max_query_length] - #get the start/end position of the answer in tokenized paragraph - (tok_start_position, tok_end_position), all_doc_tokens, _, tok_to_orig_index= \ - TokenizeAndPositionAlign(example.doc_tokens, - [example.start_position, - example.end_position], - self._tokenizer) - if not example.is_impossible: - (tok_start_position, tok_end_position) = improve_answer_span( - all_doc_tokens, tok_start_position, tok_end_position, - self._tokenizer, example.orig_answer_text) - else: - tok_start_position, tok_end_position = -1, -1 - - #get doc spans - doc_spans, doc_spans_indices = get_doc_spans(all_doc_tokens, self._max_seq_length - len(query_tokenized) - 2, - self._doc_stride) - #get the new start/end position - positions = [align_position2doc_spans([tok_start_position, tok_end_position], doc_idx, - offset=len(query_tokenized) + 1, - default_value=0) for doc_idx in doc_spans_indices] - - token_is_max_context = [{len(query_tokenized) + p: check_is_max_context(doc_spans_indices, i, - p + doc_spans_indices[i][0]) - for p in range(len(doc_span))} - for (i, doc_span) in enumerate(doc_spans)] - token_to_orig_map = [{len(query_tokenized) + p: tok_to_orig_index[p + doc_spans_indices[i][0]] - for p in range(len(doc_span))} - for (i, doc_span) in enumerate(doc_spans)] - #get sequence features: tokens, segment_ids, p_masks - seq_features = [ConcatSeqTransform([query_tokenized, doc_span], [[self._sep_token]] * 2) - for doc_span in doc_spans] - - features = [SquadBERTFeautre(example_id=example.example_id, qas_id=example.qas_id, - doc_tokens=example.doc_tokens, tokens=tokens, token_to_orig_map=t2o, - token_is_max_context=is_max, input_ids=self._vocab[tokens], - p_mask=p_mask, segment_ids=segment_ids, start_position=start, end_position=end, - is_impossible=example.is_impossible) for (tokens, segment_ids, p_mask), (start, end), - is_max, t2o in zip(seq_features, positions, - token_is_max_context, - token_to_orig_map)] - return features diff --git a/tests/unittest/test_dataset_transform.py b/tests/unittest/test_dataset_transform.py deleted file mode 100644 index 792272e88b..0000000000 --- a/tests/unittest/test_dataset_transform.py +++ /dev/null @@ -1,172 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test BERTStyleDataSetTransform.""" - -import numpy as np -from gluonnlp.vocab import BERTVocab -from gluonnlp.data import count_tokens, BERTTokenizer, \ - BertStyleGlueTransform, BertStyleSQuADTransform, truncate_seqs_equal, \ - ConcatSeqTransform - - -def test_bertstyle_glue_dataset_transform(): - text_a = u'is this jacksonville ?' - text_b = u'no it is not' - text_ab = u'is this jacksonville ? no it is not' - label_cls = 0 - vocab_tokens = ['is', 'this', 'jack', '##son', '##ville', '?', 'no', 'it', 'is', 'not'] - - bert_vocab = BERTVocab(count_tokens(vocab_tokens)) - tokenizer = BERTTokenizer(vocab=bert_vocab) - - # test Transform for classification task - bert_cls_dataset_t = BertStyleGlueTransform(tokenizer, 15, class_labels=[label_cls], - cls_token=bert_vocab.cls_token, - sep_token=bert_vocab.sep_token) - - token_ids, type_ids, label_ids = bert_cls_dataset_t((text_a, text_b, label_cls)) - text_a_tokens = ['is', 'this', 'jack', '##son', '##ville', '?'] - text_b_tokens = ['no', 'it', 'is', 'not'] - text_a_ids = bert_vocab[text_a_tokens] - text_b_ids = bert_vocab[text_b_tokens] - - cls_ids = bert_vocab[[bert_vocab.cls_token]] - sep_ids = bert_vocab[[bert_vocab.sep_token]] - - concated_ids = cls_ids + text_a_ids + sep_ids + text_b_ids + sep_ids - - valid_type_ids = np.zeros((13,), dtype=np.int32) - start = len(text_a_tokens) + 2 - end = len(text_a_tokens)+2+len(text_b_tokens)+1 - valid_type_ids[start:end] = 1 - assert all(np.array(token_ids) == np.array(concated_ids)) - assert all(type_ids == valid_type_ids) - assert all(label_ids == np.array([label_cls], dtype=np.int32)) - - #test Transform for regression task - label_reg = 0.2 - bert_reg_dataset_t = BertStyleGlueTransform(tokenizer, 15, - cls_token=bert_vocab.cls_token, - sep_token=bert_vocab.sep_token) - token_ids, type_ids, label_reg_val = bert_reg_dataset_t((text_a, text_b, label_reg)) - assert all(token_ids == np.array(concated_ids)) - assert all(type_ids == valid_type_ids) - assert all(label_reg_val == np.array([label_reg], dtype=np.float32)) - - #test Transform for single input sequence - label_reg = 0.2 - bert_reg_dataset_t = BertStyleGlueTransform(tokenizer, 15, - cls_token=bert_vocab.cls_token, - sep_token=bert_vocab.sep_token) - token_ids, type_ids, label_reg_val = bert_reg_dataset_t((text_ab, label_reg)) - concated_ids = cls_ids + text_a_ids + text_b_ids + sep_ids - - valid_type_ids = np.zeros((12,), dtype=np.int32) - assert all(token_ids == np.array(concated_ids)) - assert all(type_ids == valid_type_ids) - assert all(label_reg_val == np.array([label_reg], dtype=np.float32)) - -def test_bertstyle_squad_dataset_transform(): - data_without_impossible = (0, - '1', - 'what is my name?', - 'my name is jack', - ['jack'], - [11], - False) - - data_with_impossible = (0, - '1', - 'what is my name?', - 'my name is jack', - ['John'], - [0], - True) - - vocab_tokens = ['what', 'is', 'my', 'na', '##me', '?', 'my', 'na', '##me', 'is', 'jack'] - bert_vocab = BERTVocab(count_tokens(vocab_tokens)) - tokenizer = BERTTokenizer(vocab=bert_vocab) - trans = BertStyleSQuADTransform(tokenizer, bert_vocab.cls_token, - bert_vocab.sep_token, - max_seq_length=len(vocab_tokens) + 3, - doc_stride=10, max_query_length=6, - is_training=True) - example_id, inputs, token_types, p_mask, start_label, end_label, is_impossible = \ - trans(data_without_impossible)[0] - text_a_tokens = ['what', 'is', 'my', 'na','##me', '?'] - text_b_tokens = ['my', 'na', '##me', 'is', 'jack'] - text_a_ids = bert_vocab[text_a_tokens] - text_b_ids = bert_vocab[text_b_tokens] - - cls_ids = bert_vocab[[bert_vocab.cls_token]] - sep_ids = bert_vocab[[bert_vocab.sep_token]] - concated_ids = cls_ids + text_a_ids + sep_ids + text_b_ids + sep_ids - inputs = np.array(inputs) - concated_ids = np.array(concated_ids) - valid_token_type =np.ones((len(vocab_tokens) + 3,), dtype=np.int32) - start, end = 0, len(text_a_tokens) + 2 - valid_token_type[start:end] = 0 - - p_mask_valid = np.zeros((len(vocab_tokens) + 3,), dtype=np.int32) - p_mask_valid[len(text_a_tokens) + 1] = 1 - p_mask_valid[-1] = 1 - - assert all(inputs == concated_ids) - assert example_id == data_with_impossible[0] - assert all(token_types == valid_token_type) - assert all(p_mask == p_mask_valid) - assert start_label == 12 - assert end_label == 12 - assert is_impossible == False - - #squad2 with impossible - example_id, inputs, token_types, p_mask, start_label, end_label, is_impossible = \ - trans(data_with_impossible)[0] - assert all(inputs == concated_ids) - assert example_id == data_with_impossible[0] - assert all(token_types == valid_token_type) - assert all(p_mask == p_mask_valid) - assert start_label == 0 - assert end_label == 0 - assert is_impossible == True - -def test_truncate(): - seqs = [[j*i for j in range(i)] for i in range(1,10)] - res1 = [[0], [0, 2], [0, 3, 6], [0, 4, 8], [0, 5, 10], [0, 6], [0, 7], [0, 8], [0, 9]] - seq = [[i for i in range(20)]] - - truncated = truncate_seqs_equal(seqs, 20) - truncated2 = truncate_seqs_equal(seq, 20) - - assert all(truncated == np.array(res1)) - assert all(truncated2[0] == np.array(seq)[0]) - -def test_concat_sequence(): - seqs = [[3 * i + j for j in range(3)] for i in range(3)] - seperators = [['a'], ['b'], ['c']] - res = ConcatSeqTransform(seqs, seperators) - assert res[0] == [0, 1, 2, 'a', 3, 4, 5, 'b', 6, 7, 8, 'c'] - assert res[1] == [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2] - assert res[2] == [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1] - - seperators = [['a'], [], ['b']] - res = ConcatSeqTransform(seqs, seperators) - assert res[0] == [0, 1, 2, 'a', 3, 4, 5, 6, 7, 8, 'b'] - assert res[1] == [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2] - assert res[2] == [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1] - From 36f0905123503488633f3276f8a4235454cdc44b Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 11 Dec 2019 17:32:00 +0800 Subject: [PATCH 22/59] fix lint --- scripts/bert/data/preprocessing_utils.py | 51 ++- scripts/bert/finetune_classifier.py | 308 ++++++++------ scripts/bert/finetune_squad.py | 520 +++++++++++++---------- 3 files changed, 496 insertions(+), 383 deletions(-) diff --git a/scripts/bert/data/preprocessing_utils.py b/scripts/bert/data/preprocessing_utils.py index bb6e0b172e..999bda972d 100644 --- a/scripts/bert/data/preprocessing_utils.py +++ b/scripts/bert/data/preprocessing_utils.py @@ -2,8 +2,9 @@ __all__ = [ 'truncate_seqs_equal', 'concat_sequences', 'tokenize_and_align_positions', - 'get_doc_spans', 'align_position2doc_spans', 'improve_answer_span', 'check_is_max_context', - 'convert_squad_examples'] + 'get_doc_spans', 'align_position2doc_spans', 'improve_answer_span', + 'check_is_max_context', 'convert_squad_examples' +] import collections import itertools @@ -77,16 +78,22 @@ def concat_sequences(seqs, separators, separator_mask=1): np.array: mask for special tokens """ assert isinstance(seqs, collections.abc.Iterable) and len(seqs) > 0 - concat = sum((seq + sep for sep, seq in - itertools.zip_longest(separators, seqs, fillvalue=[])), []) - segment_ids = sum(([i] * (len(seq) + len(sep)) for i, (sep, seq) in - enumerate(itertools.zip_longest(separators, seqs, fillvalue=[]))), []) - p_mask = sum(([0] * len(seq) + [separator_mask] * len(sep) for sep, seq in - itertools.zip_longest(separators, seqs, fillvalue=[])), []) + concat = sum(( + seq + sep + for sep, seq in itertools.zip_longest(separators, seqs, fillvalue=[])), + []) + segment_ids = sum( + ([i] * (len(seq) + len(sep)) for i, (sep, seq) in enumerate( + itertools.zip_longest(separators, seqs, fillvalue=[]))), []) + p_mask = sum(( + [0] * len(seq) + [separator_mask] * len(sep) + for sep, seq in itertools.zip_longest(separators, seqs, fillvalue=[])), + []) return concat, segment_ids, p_mask -def tokenize_and_align_positions(origin_text, start_position, end_position, tokenizer): +def tokenize_and_align_positions(origin_text, start_position, end_position, + tokenizer): """Tokenize the text and align the origin positions to the corresponding position""" orig_to_tok_index = [] tok_to_orig_index = [] @@ -125,23 +132,30 @@ def get_doc_spans(full_doc, max_length, doc_stride): while start_offset < len(full_doc): length = min(max_length, len(full_doc) - start_offset) end_offset = start_offset + length - doc_spans.append((full_doc[start_offset: end_offset], (start_offset, end_offset))) + doc_spans.append( + (full_doc[start_offset:end_offset], (start_offset, end_offset))) if start_offset + length == len(full_doc): break start_offset += min(length, doc_stride) return list(zip(*doc_spans)) -def align_position2doc_spans(positions, doc_spans_indices, - offset=0, default_value=-1, all_in_span=True): +def align_position2doc_spans(positions, + doc_spans_indices, + offset=0, + default_value=-1, + all_in_span=True): """Align the origin positions to the corresponding position in doc spans""" if not isinstance(positions, list): positions = [positions] doc_start, doc_end = doc_spans_indices if all_in_span and not all([p in range(doc_start, doc_end) for p in positions]): return [default_value] * len(positions) - new_positions = [p - doc_start + offset if p in range(doc_start, doc_end) - else default_value for p in positions] + new_positions = [ + p - doc_start + + offset if p in range(doc_start, doc_end) else default_value + for p in positions + ] return new_positions @@ -228,7 +242,8 @@ def check_is_max_context(doc_spans, cur_span_index, position): SquadExample = collections.namedtuple('SquadExample', [ 'qas_id', 'question_text', 'doc_tokens', 'example_id', 'orig_answer_text', - 'start_position', 'end_position', 'is_impossible']) + 'start_position', 'end_position', 'is_impossible' +]) def convert_squad_examples(record, is_training): @@ -262,8 +277,10 @@ def convert_squad_examples(record, is_training): start_position = -1 end_position = -1 else: - start_position = char_to_word_offset[answer_offset] if not is_impossible else -1 - end_position = char_to_word_offset[answer_offset + answer_length - 1] if not is_impossible else -1 + start_position = char_to_word_offset[ + answer_offset] if not is_impossible else -1 + end_position = char_to_word_offset[answer_offset + answer_length - + 1] if not is_impossible else -1 example = SquadExample(qas_id=qas_id, question_text=question_text, diff --git a/scripts/bert/finetune_classifier.py b/scripts/bert/finetune_classifier.py index ec04aaaf76..3c816fe793 100644 --- a/scripts/bert/finetune_classifier.py +++ b/scripts/bert/finetune_classifier.py @@ -39,6 +39,7 @@ import random import logging import warnings +from functools import partial import numpy as np import mxnet as mx from mxnet import gluon @@ -46,7 +47,6 @@ import gluonnlp as nlp from gluonnlp.data import BERTTokenizer from gluonnlp.model import BERTClassifier, RoBERTaClassifier -from functools import partial from data.classification import MRPCTask, QQPTask, RTETask, STSBTask, SSTTask from data.classification import QNLITask, CoLATask, MNLITask, WNLITask, XNLITask from data.classification import LCQMCTask, ChnSentiCorpTask @@ -72,84 +72,85 @@ parser = argparse.ArgumentParser( description='BERT fine-tune examples for classification/regression tasks.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) -parser.add_argument( - '--epochs', type=int, default=3, help='number of epochs.') +parser.add_argument('--epochs', type=int, default=3, help='number of epochs.') parser.add_argument( '--batch_size', type=int, default=32, help='Batch size. Number of examples per gpu in a minibatch.') -parser.add_argument( - '--dev_batch_size', - type=int, - default=8, - help='Batch size for dev set and test set') -parser.add_argument( - '--lr', - type=float, - default=5e-5, - help='Initial learning rate') -parser.add_argument( - '--epsilon', - type=float, - default=1e-6, - help='Small value to avoid division by 0' -) +parser.add_argument('--dev_batch_size', + type=int, + default=8, + help='Batch size for dev set and test set') +parser.add_argument('--lr', + type=float, + default=5e-5, + help='Initial learning rate') +parser.add_argument('--epsilon', + type=float, + default=1e-6, + help='Small value to avoid division by 0') parser.add_argument( '--warmup_ratio', type=float, default=0.1, help='ratio of warmup steps used in NOAM\'s stepsize schedule') -parser.add_argument( - '--log_interval', - type=int, - default=10, - help='report interval') -parser.add_argument( - '--max_len', - type=int, - default=128, - help='Maximum length of the sentence pairs') +parser.add_argument('--log_interval', + type=int, + default=10, + help='report interval') +parser.add_argument('--max_len', + type=int, + default=128, + help='Maximum length of the sentence pairs') parser.add_argument( '--pad', action='store_true', - help='Whether to pad to maximum length when preparing data batches. Default is False.') -parser.add_argument( - '--seed', type=int, default=2, help='Random seed') + help= + 'Whether to pad to maximum length when preparing data batches. Default is False.' +) +parser.add_argument('--seed', type=int, default=2, help='Random seed') parser.add_argument( '--accumulate', type=int, default=None, - help='The number of batches for gradients accumulation to simulate large batch size. ' - 'Default is None') -parser.add_argument( - '--gpu', type=int, default=None, help='Which gpu for finetuning.') + help= + 'The number of batches for gradients accumulation to simulate large batch size. ' + 'Default is None') +parser.add_argument('--gpu', + type=int, + default=None, + help='Which gpu for finetuning.') parser.add_argument( '--task_name', type=str, choices=tasks.keys(), help='The name of the task to fine-tune. Choices include MRPC, QQP, ' - 'QNLI, RTE, STS-B, CoLA, MNLI, WNLI, SST.') -parser.add_argument( - '--bert_model', - type=str, - default='bert_12_768_12', - choices=['bert_12_768_12', 'bert_24_1024_16', 'roberta_12_768_12', 'roberta_24_1024_16'], - help='The name of pre-trained BERT model to fine-tune') -parser.add_argument( - '--bert_dataset', - type=str, - default='book_corpus_wiki_en_uncased', - choices=['book_corpus_wiki_en_uncased', 'book_corpus_wiki_en_cased', - 'openwebtext_book_corpus_wiki_en_uncased', 'wiki_multilingual_uncased', - 'wiki_multilingual_cased', 'wiki_cn_cased', - 'openwebtext_ccnews_stories_books_cased'], - help='The dataset BERT pre-trained with.') -parser.add_argument( - '--pretrained_bert_parameters', - type=str, - default=None, - help='Pre-trained bert model parameter file.') + 'QNLI, RTE, STS-B, CoLA, MNLI, WNLI, SST.') +parser.add_argument('--bert_model', + type=str, + default='bert_12_768_12', + choices=[ + 'bert_12_768_12', 'bert_24_1024_16', + 'roberta_12_768_12', 'roberta_24_1024_16' + ], + help='The name of pre-trained BERT model to fine-tune') +parser.add_argument('--bert_dataset', + type=str, + default='book_corpus_wiki_en_uncased', + choices=[ + 'book_corpus_wiki_en_uncased', + 'book_corpus_wiki_en_cased', + 'openwebtext_book_corpus_wiki_en_uncased', + 'wiki_multilingual_uncased', 'wiki_multilingual_cased', + 'wiki_cn_cased', + 'openwebtext_ccnews_stories_books_cased' + ], + help='The dataset BERT pre-trained with.') +parser.add_argument('--pretrained_bert_parameters', + type=str, + default=None, + help='Pre-trained bert model parameter file.') parser.add_argument( '--model_parameters', type=str, @@ -165,19 +166,20 @@ parser.add_argument( '--only_inference', action='store_true', - help='If set, we skip training and only perform inference on dev and test data.') -parser.add_argument( - '--dtype', - type=str, - default='float32', - choices=['float32', 'float16'], - help='The data type for training.') + help= + 'If set, we skip training and only perform inference on dev and test data.' +) +parser.add_argument('--dtype', + type=str, + default='float32', + choices=['float32', 'float16'], + help='The data type for training.') parser.add_argument( '--early_stop', type=int, default=None, help='Whether to perform early stopping based on the metric on dev set. ' - 'The provided value is the patience. ') + 'The provided value is the patience. ') args = parser.parse_args() @@ -231,12 +233,12 @@ use_roberta = 'roberta' in model_name get_model_params = { - 'name' : model_name, - 'dataset_name' : dataset, - 'pretrained' : get_pretrained, - 'ctx' : ctx, - 'use_decoder' : False, - 'use_classifier' : False, + 'name': model_name, + 'dataset_name': dataset, + 'pretrained': get_pretrained, + 'ctx': ctx, + 'use_decoder': False, + 'use_classifier': False, } # RoBERTa does not contain parameters for sentence pair classification if not use_roberta: @@ -268,11 +270,17 @@ output_dir = args.output_dir if pretrained_bert_parameters: logging.info('loading bert params from %s', pretrained_bert_parameters) - nlp.utils.load_parameters(model.bert, pretrained_bert_parameters, ctx=ctx, - ignore_extra=True, cast_dtype=True) + nlp.utils.load_parameters(model.bert, + pretrained_bert_parameters, + ctx=ctx, + ignore_extra=True, + cast_dtype=True) if model_parameters: logging.info('loading model params from %s', model_parameters) - nlp.utils.load_parameters(model, model_parameters, ctx=ctx, cast_dtype=True) + nlp.utils.load_parameters(model, + model_parameters, + ctx=ctx, + cast_dtype=True) nlp.utils.mkdir(output_dir) logging.debug(model) @@ -287,8 +295,16 @@ bert_tokenizer = BERTTokenizer(vocabulary, lower=do_lower_case) -def convert_examples_to_features(example, tokenizer=None, truncate_length=512, cls_token=None, sep_token=None, - class_labels=None, label_alias=None, vocab=None, is_test=False): +def convert_examples_to_features(example, + tokenizer=None, + truncate_length=512, + cls_token=None, + sep_token=None, + class_labels=None, + label_alias=None, + vocab=None, + is_test=False): + """convert glue examples into necessary features""" assert tokenizer is not None vocab = tokenizer.vocab if vocab is None else vocab if not is_test: @@ -313,8 +329,8 @@ def convert_examples_to_features(example, tokenizer=None, truncate_length=512, c tokens_trun = truncate_seqs_equal(tokens_raw, truncate_length) # concate the sequences with special tokens tokens_trun[0] = [cls_token] + tokens_trun[0] - tokens, segment_ids, _ = concat_sequences( - tokens_trun, [[sep_token]] * len(tokens_trun)) + tokens, segment_ids, _ = concat_sequences(tokens_trun, + [[sep_token]] * len(tokens_trun)) # convert the token to ids input_ids = vocab[tokens] valid_length = len(input_ids) @@ -323,16 +339,20 @@ def convert_examples_to_features(example, tokenizer=None, truncate_length=512, c else: return input_ids, valid_length, segment_ids -def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab): + +def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, + vocab): """Train/eval Data preparation function.""" - # transformation for data train and dev label_dtype = 'int32' if task.class_labels else 'float32' truncate_length = max_len - 3 if task.is_pair else max_len - 2 - trans = partial(convert_examples_to_features, tokenizer=tokenizer, truncate_length=truncate_length, - cls_token=vocab.cls_token if not use_roberta else vocab.bos_token, - sep_token=vocab.sep_token if not use_roberta else vocab.eos_token, - class_labels=task.class_labels, - label_alias=task.label_alias) + trans = partial( + convert_examples_to_features, + tokenizer=tokenizer, + truncate_length=truncate_length, + cls_token=vocab.cls_token if not use_roberta else vocab.bos_token, + sep_token=vocab.sep_token if not use_roberta else vocab.eos_token, + class_labels=task.class_labels, + label_alias=task.label_alias) # data train # task.dataset_train returns (segment_name, dataset) @@ -343,22 +363,20 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab) # bucket sampler for training pad_val = vocabulary[vocabulary.padding_token] batchify_fn = nlp.data.batchify.Tuple( - nlp.data.batchify.Pad(axis=0, pad_val=pad_val), # input - nlp.data.batchify.Stack(), # length - nlp.data.batchify.Pad(axis=0, pad_val=0), # segment - nlp.data.batchify.Stack(label_dtype)) # label - batch_sampler = nlp.data.sampler.FixedBucketSampler( - data_train_len, - batch_size=batch_size, - num_buckets=10, - ratio=0, - shuffle=True) + nlp.data.batchify.Pad(axis=0, pad_val=pad_val), # input + nlp.data.batchify.Stack(), # length + nlp.data.batchify.Pad(axis=0, pad_val=0), # segment + nlp.data.batchify.Stack(label_dtype)) # label + batch_sampler = nlp.data.sampler.FixedBucketSampler(data_train_len, + batch_size=batch_size, + num_buckets=10, + ratio=0, + shuffle=True) # data loader for training - loader_train = gluon.data.DataLoader( - dataset=data_train, - num_workers=4, - batch_sampler=batch_sampler, - batchify_fn=batchify_fn) + loader_train = gluon.data.DataLoader(dataset=data_train, + num_workers=4, + batch_sampler=batch_sampler, + batchify_fn=batchify_fn) # data dev. For MNLI, more than one dev set is available dev_tsv = task.dataset_dev() @@ -366,24 +384,26 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab) loader_dev_list = [] for segment, data in dev_tsv_list: data_dev = mx.gluon.data.SimpleDataset(list(map(trans, data))) - loader_dev = mx.gluon.data.DataLoader( - data_dev, - batch_size=dev_batch_size, - num_workers=4, - shuffle=False, - batchify_fn=batchify_fn) + loader_dev = mx.gluon.data.DataLoader(data_dev, + batch_size=dev_batch_size, + num_workers=4, + shuffle=False, + batchify_fn=batchify_fn) loader_dev_list.append((segment, loader_dev)) # batchify for data test test_batchify_fn = nlp.data.batchify.Tuple( - nlp.data.batchify.Pad(axis=0, pad_val=pad_val), nlp.data.batchify.Stack(), - nlp.data.batchify.Pad(axis=0, pad_val=0)) + nlp.data.batchify.Pad(axis=0, pad_val=pad_val), + nlp.data.batchify.Stack(), nlp.data.batchify.Pad(axis=0, pad_val=0)) # transform for data test - test_trans = partial(convert_examples_to_features, tokenizer=tokenizer, truncate_length=max_len, - cls_token=vocab.cls_token if not use_roberta else vocab.bos_token, - sep_token=vocab.sep_token if not use_roberta else vocab.eos_token, - class_labels=None, - is_test=True) + test_trans = partial( + convert_examples_to_features, + tokenizer=tokenizer, + truncate_length=max_len, + cls_token=vocab.cls_token if not use_roberta else vocab.bos_token, + sep_token=vocab.sep_token if not use_roberta else vocab.eos_token, + class_labels=None, + is_test=True) # data test. For MNLI, more than one test set is available test_tsv = task.dataset_test() @@ -391,12 +411,11 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab) loader_test_list = [] for segment, data in test_tsv_list: data_test = mx.gluon.data.SimpleDataset(list(map(test_trans, data))) - loader_test = mx.gluon.data.DataLoader( - data_test, - batch_size=dev_batch_size, - num_workers=4, - shuffle=False, - batchify_fn=test_batchify_fn) + loader_test = mx.gluon.data.DataLoader(data_test, + batch_size=dev_batch_size, + num_workers=4, + shuffle=False, + batchify_fn=test_batchify_fn) loader_test_list.append((segment, loader_test)) return loader_train, loader_dev_list, loader_test_list, len(data_train) @@ -420,14 +439,16 @@ def test(loader_test, segment): if use_roberta: out = model(input_ids, valid_length) else: - out = model(input_ids, segment_ids.as_in_context(ctx), valid_length) + out = model(input_ids, segment_ids.as_in_context(ctx), + valid_length) if not task.class_labels: # regression task for result in out.asnumpy().reshape(-1).tolist(): results.append('{:.3f}'.format(result)) else: # classification task - indices = mx.nd.topk(out, k=1, ret_typ='indices', dtype='int32').asnumpy() + indices = mx.nd.topk(out, k=1, ret_typ='indices', + dtype='int32').asnumpy() for index in indices: results.append(task.class_labels[int(index)]) @@ -444,10 +465,11 @@ def test(loader_test, segment): with io.open(test_path, 'w', encoding='utf-8') as f: f.write(u'index\tprediction\n') for i, pred in enumerate(results): - f.write(u'%d\t%s\n'%(i, str(pred))) + f.write(u'%d\t%s\n' % (i, str(pred))) -def log_train(batch_id, batch_num, metric, step_loss, log_interval, epoch_id, learning_rate): +def log_train(batch_id, batch_num, metric, step_loss, log_interval, epoch_id, + learning_rate): """Generate and print out the log message for training. """ metric_nm, metric_val = metric.get() if not isinstance(metric_nm, list): @@ -467,19 +489,22 @@ def log_eval(batch_id, batch_num, metric, step_loss, log_interval): eval_str = '[Batch %d/%d] loss=%.4f, metrics:' + \ ','.join([i + ':%.4f' for i in metric_nm]) - logging.info(eval_str, batch_id + 1, batch_num, - step_loss / log_interval, *metric_val) + logging.info(eval_str, batch_id + 1, batch_num, step_loss / log_interval, + *metric_val) def train(metric): """Training function.""" if not only_inference: - logging.info('Now we are doing BERT classification training on %s!', ctx) + logging.info('Now we are doing BERT classification training on %s!', + ctx) all_model_params = model.collect_params() optimizer_params = {'learning_rate': lr, 'epsilon': epsilon, 'wd': 0.01} - trainer = gluon.Trainer(all_model_params, 'bertadam', - optimizer_params, update_on_kvstore=False) + trainer = gluon.Trainer(all_model_params, + 'bertadam', + optimizer_params, + update_on_kvstore=False) if args.dtype == 'float16': amp.init_trainer(trainer) @@ -521,7 +546,8 @@ def train(metric): new_lr = lr * step_num / num_warmup_steps else: non_warmup_steps = step_num - num_warmup_steps - offset = non_warmup_steps / (num_train_steps - num_warmup_steps) + offset = non_warmup_steps / (num_train_steps - + num_warmup_steps) new_lr = lr - offset * lr trainer.set_learning_rate(new_lr) @@ -529,12 +555,14 @@ def train(metric): with mx.autograd.record(): input_ids, valid_length, segment_ids, label = seqs input_ids = input_ids.as_in_context(ctx) - valid_length = valid_length.as_in_context(ctx).astype('float32') + valid_length = valid_length.as_in_context(ctx).astype( + 'float32') label = label.as_in_context(ctx) if use_roberta: out = model(input_ids, valid_length) else: - out = model(input_ids, segment_ids.as_in_context(ctx), valid_length) + out = model(input_ids, segment_ids.as_in_context(ctx), + valid_length) ls = loss_function(out, label).mean() if args.dtype == 'float16': with amp.scale_loss(ls, trainer) as scaled_loss: @@ -555,8 +583,9 @@ def train(metric): step_loss += ls.asscalar() metric.update([label], [out]) if (batch_id + 1) % (args.log_interval) == 0: - log_train(batch_id, len(train_data), metric, step_loss, args.log_interval, - epoch_id, trainer.learning_rate) + log_train(batch_id, len(train_data), metric, step_loss, + args.log_interval, epoch_id, + trainer.learning_rate) step_loss = 0 mx.nd.waitall() @@ -590,7 +619,8 @@ def train(metric): ckpt_name = 'model_bert_{0}_{1}.params'.format(task_name, epoch_id) params_saved = os.path.join(output_dir, ckpt_name) nlp.utils.load_parameters(model, params_saved) - metric_str = 'Best model at epoch {}. Validation metrics:'.format(epoch_id) + metric_str = 'Best model at epoch {}. Validation metrics:'.format( + epoch_id) metric_str += ','.join([i + ':%.4f' for i in metric_nm]) logging.info(metric_str, *metric_val) @@ -598,6 +628,7 @@ def train(metric): for segment, test_data in test_data_list: test(test_data, segment) + def evaluate(loader_dev, metric, segment): """Evaluate the model on validation dataset.""" logging.info('Now we are doing evaluation on %s with %s.', segment, ctx) @@ -614,7 +645,8 @@ def evaluate(loader_dev, metric, segment): if use_roberta: out = model(input_ids, valid_length) else: - out = model(input_ids, segment_ids.as_in_context(ctx), valid_length) + out = model(input_ids, segment_ids.as_in_context(ctx), + valid_length) label_list.append(label.as_in_context(mx.cpu(0))) out_list.append(out.as_in_context(mx.cpu(0))) ls = loss_function(out, label).mean() @@ -622,7 +654,8 @@ def evaluate(loader_dev, metric, segment): step_loss += ls.asscalar() if (batch_id + 1) % (args.log_interval) == 0: - log_eval(batch_id, len(loader_dev), metric, step_loss, args.log_interval) + log_eval(batch_id, len(loader_dev), metric, step_loss, + args.log_interval) step_loss = 0 label_list = mx.nd.concat(*label_list, dim=0) @@ -631,7 +664,8 @@ def evaluate(loader_dev, metric, segment): metric_nm, metric_val = metric.get() if not isinstance(metric_nm, list): metric_nm, metric_val = [metric_nm], [metric_val] - metric_str = 'validation metrics:' + ','.join([i + ':%.4f' for i in metric_nm]) + metric_str = 'validation metrics:' + ','.join( + [i + ':%.4f' for i in metric_nm]) logging.info(metric_str, *metric_val) mx.nd.waitall() diff --git a/scripts/bert/finetune_squad.py b/scripts/bert/finetune_squad.py index 6ec6f9c9d2..0ea5282ff3 100644 --- a/scripts/bert/finetune_squad.py +++ b/scripts/bert/finetune_squad.py @@ -41,17 +41,17 @@ import time import warnings import itertools +import multiprocessing as mp +from functools import partial import numpy as np import mxnet as mx -import multiprocessing as mp import gluonnlp as nlp from gluonnlp.data import SQuAD -from functools import partial from model.qa import BertForQALoss, BertForQA from bert_qa_evaluate import get_F1_EM, predict, PredResult -from data.preprocessing_utils import truncate_seqs_equal, improve_answer_span, \ +from data.preprocessing_utils import improve_answer_span, \ concat_sequences, tokenize_and_align_positions, get_doc_spans, align_position2doc_spans, \ check_is_max_context, convert_squad_examples @@ -64,9 +64,9 @@ formatter = logging.Formatter( fmt='%(levelname)s:%(name)s:%(asctime)s %(message)s', datefmt='%H:%M:%S') - -parser = argparse.ArgumentParser(description='BERT QA example.' - 'We fine-tune the BERT model on SQuAD dataset.') +parser = argparse.ArgumentParser( + description='BERT QA example.' + 'We fine-tune the BERT model on SQuAD dataset.') parser.add_argument('--only_predict', action='store_true', @@ -77,125 +77,145 @@ default=None, help='Model parameter file') -parser.add_argument('--bert_model', - type=str, - default='bert_12_768_12', - help='BERT model name. options are bert_12_768_12 and bert_24_1024_16.') - -parser.add_argument('--bert_dataset', - type=str, - default='book_corpus_wiki_en_uncased', - help='BERT dataset name.' - 'options are book_corpus_wiki_en_uncased and book_corpus_wiki_en_cased.') - -parser.add_argument('--pretrained_bert_parameters', - type=str, - default=None, - help='Pre-trained bert model parameter file. default is None') +parser.add_argument( + '--bert_model', + type=str, + default='bert_12_768_12', + help='BERT model name. options are bert_12_768_12 and bert_24_1024_16.') + +parser.add_argument( + '--bert_dataset', + type=str, + default='book_corpus_wiki_en_uncased', + help='BERT dataset name.' + 'options are book_corpus_wiki_en_uncased and book_corpus_wiki_en_cased.') + +parser.add_argument( + '--pretrained_bert_parameters', + type=str, + default=None, + help='Pre-trained bert model parameter file. default is None') parser.add_argument('--uncased', action='store_false', help='if not set, inputs are converted to lower case.') -parser.add_argument('--output_dir', - type=str, - default='./output_dir', - help='The output directory where the model params will be written.' - ' default is ./output_dir') +parser.add_argument( + '--output_dir', + type=str, + default='./output_dir', + help='The output directory where the model params will be written.' + ' default is ./output_dir') parser.add_argument('--epochs', type=int, default=3, help='number of epochs, default is 3') -parser.add_argument('--batch_size', - type=int, - default=32, - help='Batch size. Number of examples per gpu in a minibatch. default is 32') +parser.add_argument( + '--batch_size', + type=int, + default=32, + help='Batch size. Number of examples per gpu in a minibatch. default is 32' +) parser.add_argument('--test_batch_size', type=int, default=24, help='Test batch size. default is 24') -parser.add_argument('--optimizer', - type=str, - default='bertadam', - help='optimization algorithm. default is bertadam(mxnet >= 1.5.0.)') +parser.add_argument( + '--optimizer', + type=str, + default='bertadam', + help='optimization algorithm. default is bertadam(mxnet >= 1.5.0.)') -parser.add_argument('--accumulate', - type=int, - default=None, - help='The number of batches for ' - 'gradients accumulation to simulate large batch size. Default is None') +parser.add_argument( + '--accumulate', + type=int, + default=None, + help='The number of batches for ' + 'gradients accumulation to simulate large batch size. Default is None') parser.add_argument('--lr', type=float, default=5e-5, help='Initial learning rate. default is 5e-5') -parser.add_argument('--warmup_ratio', - type=float, - default=0.1, - help='ratio of warmup steps that linearly increase learning rate from ' - '0 to target learning rate. default is 0.1') +parser.add_argument( + '--warmup_ratio', + type=float, + default=0.1, + help='ratio of warmup steps that linearly increase learning rate from ' + '0 to target learning rate. default is 0.1') parser.add_argument('--log_interval', type=int, default=50, help='report interval. default is 50') -parser.add_argument('--max_seq_length', - type=int, - default=384, - help='The maximum total input sequence length after WordPiece tokenization.' - 'Sequences longer than this will be truncated, and sequences shorter ' - 'than this will be padded. default is 384') - -parser.add_argument('--doc_stride', - type=int, - default=128, - help='When splitting up a long document into chunks, how much stride to ' - 'take between chunks. default is 128') - -parser.add_argument('--max_query_length', - type=int, - default=64, - help='The maximum number of tokens for the question. Questions longer than ' - 'this will be truncated to this length. default is 64') - -parser.add_argument('--n_best_size', - type=int, - default=20, - help='The total number of n-best predictions to generate in the ' - 'nbest_predictions.json output file. default is 20') - -parser.add_argument('--max_answer_length', - type=int, - default=30, - help='The maximum length of an answer that can be generated. This is needed ' - 'because the start and end predictions are not conditioned on one another.' - ' default is 30') - -parser.add_argument('--version_2', - action='store_true', - help='SQuAD examples whether contain some that do not have an answer.') - -parser.add_argument('--null_score_diff_threshold', - type=float, - default=0.0, - help='If null_score - best_non_null is greater than the threshold predict null.' - 'Typical values are between -1.0 and -5.0. default is 0.0') - -parser.add_argument('--gpu', - type=int, - default=None, - help='which gpu to use for finetuning. CPU is used if not set.') - -parser.add_argument('--sentencepiece', - type=str, - default=None, - help='Path to the sentencepiece .model file for both tokenization and vocab.') +parser.add_argument( + '--max_seq_length', + type=int, + default=384, + help='The maximum total input sequence length after WordPiece tokenization.' + 'Sequences longer than this will be truncated, and sequences shorter ' + 'than this will be padded. default is 384') + +parser.add_argument( + '--doc_stride', + type=int, + default=128, + help='When splitting up a long document into chunks, how much stride to ' + 'take between chunks. default is 128') + +parser.add_argument( + '--max_query_length', + type=int, + default=64, + help='The maximum number of tokens for the question. Questions longer than ' + 'this will be truncated to this length. default is 64') + +parser.add_argument( + '--n_best_size', + type=int, + default=20, + help='The total number of n-best predictions to generate in the ' + 'nbest_predictions.json output file. default is 20') + +parser.add_argument( + '--max_answer_length', + type=int, + default=30, + help='The maximum length of an answer that can be generated. This is needed ' + 'because the start and end predictions are not conditioned on one another.' + ' default is 30') + +parser.add_argument( + '--version_2', + action='store_true', + help='SQuAD examples whether contain some that do not have an answer.') + +parser.add_argument( + '--null_score_diff_threshold', + type=float, + default=0.0, + help= + 'If null_score - best_non_null is greater than the threshold predict null.' + 'Typical values are between -1.0 and -5.0. default is 0.0') + +parser.add_argument( + '--gpu', + type=int, + default=None, + help='which gpu to use for finetuning. CPU is used if not set.') + +parser.add_argument( + '--sentencepiece', + type=str, + default=None, + help= + 'Path to the sentencepiece .model file for both tokenization and vocab.') parser.add_argument('--debug', action='store_true', @@ -207,8 +227,8 @@ if not os.path.exists(output_dir): os.mkdir(output_dir) -fh = logging.FileHandler(os.path.join( - args.output_dir, 'finetune_squad.log'), mode='w') +fh = logging.FileHandler(os.path.join(args.output_dir, 'finetune_squad.log'), + mode='w') fh.setLevel(logging.INFO) fh.setFormatter(formatter) console = logging.StreamHandler() @@ -238,13 +258,12 @@ accumulate = args.accumulate log_interval = args.log_interval if accumulate: - log.info('Using gradient accumulation. Effective batch size = {}'. - format(accumulate*batch_size)) + log.info('Using gradient accumulation. Effective batch size = {}'.format( + accumulate * batch_size)) optimizer = args.optimizer warmup_ratio = args.warmup_ratio - version_2 = args.version_2 null_score_diff_threshold = args.null_score_diff_threshold @@ -255,33 +274,37 @@ max_answer_length = args.max_answer_length if max_seq_length <= max_query_length + 3: - raise ValueError('The max_seq_length (%d) must be greater than max_query_length ' - '(%d) + 3' % (max_seq_length, max_query_length)) + raise ValueError( + 'The max_seq_length (%d) must be greater than max_query_length ' + '(%d) + 3' % (max_seq_length, max_query_length)) # vocabulary and tokenizer if args.sentencepiece: - logging.info('loading vocab file from sentence piece model: %s', args.sentencepiece) + logging.info('loading vocab file from sentence piece model: %s', + args.sentencepiece) if dataset_name: - warnings.warn('Both --dataset_name and --sentencepiece are provided. ' - 'The vocabulary will be loaded based on --sentencepiece.') + warnings.warn( + 'Both --dataset_name and --sentencepiece are provided. ' + 'The vocabulary will be loaded based on --sentencepiece.') vocab = nlp.vocab.BERTVocab.from_sentencepiece(args.sentencepiece) dataset_name = None else: vocab = None pretrained = not model_parameters and not pretrained_bert_parameters and not args.sentencepiece -bert, vocab = nlp.model.get_model( - name=model_name, - dataset_name=dataset_name, - vocab=vocab, - pretrained=pretrained, - ctx=ctx, - use_pooler=False, - use_decoder=False, - use_classifier=False) +bert, vocab = nlp.model.get_model(name=model_name, + dataset_name=dataset_name, + vocab=vocab, + pretrained=pretrained, + ctx=ctx, + use_pooler=False, + use_decoder=False, + use_classifier=False) if args.sentencepiece: - tokenizer = nlp.data.BERTSPTokenizer(args.sentencepiece, vocab, lower=lower) + tokenizer = nlp.data.BERTSPTokenizer(args.sentencepiece, + vocab, + lower=lower) else: tokenizer = nlp.data.BERTTokenizer(vocab=vocab, lower=lower) @@ -289,8 +312,7 @@ nlp.data.batchify.Stack(), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), - nlp.data.batchify.Stack('float32'), - nlp.data.batchify.Stack('float32'), + nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32')) net = BertForQA(bert=bert) @@ -299,8 +321,11 @@ nlp.utils.load_parameters(net, model_parameters, ctx=ctx, cast_dtype=True) elif pretrained_bert_parameters: # only load BertModel parameters - nlp.utils.load_parameters(bert, pretrained_bert_parameters, ctx=ctx, - ignore_extra=True, cast_dtype=True) + nlp.utils.load_parameters(bert, + pretrained_bert_parameters, + ctx=ctx, + ignore_extra=True, + cast_dtype=True) net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx) elif pretrained: # only load BertModel parameters @@ -317,7 +342,7 @@ def train(): """Training function.""" - segment = 'train' #if not args.debug else 'dev' + segment = 'train' #if not args.debug else 'dev' log.info('Loading %s data...', segment) if version_2: train_data = SQuAD(segment, version='2.0') @@ -328,32 +353,39 @@ def train(): train_data = mx.gluon.data.SimpleDataset(sampled_data) log.info('Number of records in Train data:{}'.format(len(train_data))) train_data_transform = preprocess_dataset( - tokenizer, - train_data, - max_seq_length=max_seq_length, - doc_stride=doc_stride, - max_query_length=max_query_length, - input_features=True) + tokenizer, + train_data, + max_seq_length=max_seq_length, + doc_stride=doc_stride, + max_query_length=max_query_length, + input_features=True) log.info('The number of examples after preprocessing:{}'.format( len(train_data_transform))) - train_dataloader = mx.gluon.data.DataLoader( - train_data_transform, batchify_fn=batchify_fn, - batch_size=batch_size, num_workers=4, shuffle=True) + train_dataloader = mx.gluon.data.DataLoader(train_data_transform, + batchify_fn=batchify_fn, + batch_size=batch_size, + num_workers=4, + shuffle=True) log.info('Start Training') optimizer_params = {'learning_rate': lr} try: - trainer = mx.gluon.Trainer(net.collect_params(), optimizer, - optimizer_params, update_on_kvstore=False) + trainer = mx.gluon.Trainer(net.collect_params(), + optimizer, + optimizer_params, + update_on_kvstore=False) except ValueError as e: print(e) - warnings.warn('AdamW optimizer is not found. Please consider upgrading to ' - 'mxnet>=1.5.0. Now the original Adam optimizer is used instead.') - trainer = mx.gluon.Trainer(net.collect_params(), 'adam', - optimizer_params, update_on_kvstore=False) + warnings.warn( + 'AdamW optimizer is not found. Please consider upgrading to ' + 'mxnet>=1.5.0. Now the original Adam optimizer is used instead.') + trainer = mx.gluon.Trainer(net.collect_params(), + 'adam', + optimizer_params, + update_on_kvstore=False) num_train_examples = len(train_data_transform) step_size = batch_size * accumulate if accumulate else batch_size @@ -386,8 +418,7 @@ def set_new_lr(step_num, batch_id): for _, v in net.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 # Collect differentiable parameters - params = [p for p in net.collect_params().values() - if p.grad_req != 'null'] + params = [p for p in net.collect_params().values() if p.grad_req != 'null'] # Set grad_req if gradient accumulation is required if accumulate: for p in params: @@ -409,13 +440,15 @@ def set_new_lr(step_num, batch_id): log_num += len(inputs) total_num += len(inputs) - out = net(inputs.astype('float32').as_in_context(ctx), - token_types.astype('float32').as_in_context(ctx), - valid_length.astype('float32').as_in_context(ctx)) + out = net( + inputs.astype('float32').as_in_context(ctx), + token_types.astype('float32').as_in_context(ctx), + valid_length.astype('float32').as_in_context(ctx)) ls = loss_function(out, [ start_label.astype('float32').as_in_context(ctx), - end_label.astype('float32').as_in_context(ctx)]).mean() + end_label.astype('float32').as_in_context(ctx) + ]).mean() if accumulate: ls = ls / accumulate @@ -428,18 +461,20 @@ def set_new_lr(step_num, batch_id): step_loss += ls.asscalar() - if (batch_id + 1) % (log_interval * (accumulate if accumulate else 1)) == 0: + if (batch_id + 1) % (log_interval * + (accumulate if accumulate else 1)) == 0: toc = time.time() - log.info('Epoch: {}, Batch: {}/{}, Loss={:.4f}, lr={:.7f} Time cost={:.1f} Thoughput={:.2f} samples/s' # pylint: disable=line-too-long - .format(epoch_id, batch_id, len(train_dataloader), - step_loss / log_interval, - trainer.learning_rate, toc - tic, log_num/(toc - tic))) + log.info( + 'Epoch: {}, Batch: {}/{}, Loss={:.4f}, lr={:.7f} Time cost={:.1f} Thoughput={:.2f} samples/s' # pylint: disable=line-too-long + .format(epoch_id, batch_id, len(train_dataloader), + step_loss / log_interval, trainer.learning_rate, + toc - tic, log_num / (toc - tic))) tic = time.time() step_loss = 0.0 log_num = 0 epoch_toc = time.time() log.info('Time cost={:.2f} s, Thoughput={:.2f} samples/s'.format( - epoch_toc - epoch_tic, total_num/(epoch_toc - epoch_tic))) + epoch_toc - epoch_tic, total_num / (epoch_toc - epoch_tic))) net.save_parameters(os.path.join(output_dir, 'net.params')) @@ -456,30 +491,29 @@ def evaluate(): dev_data = mx.gluon.data.SimpleDataset(sampled_data) log.info('Number of records in dev data:{}'.format(len(dev_data))) - dev_dataset = preprocess_dataset( - tokenizer, - dev_data, - max_seq_length=max_seq_length, - doc_stride=doc_stride, - max_query_length=max_query_length, - input_features=False) + dev_dataset = preprocess_dataset(tokenizer, + dev_data, + max_seq_length=max_seq_length, + doc_stride=doc_stride, + max_query_length=max_query_length, + input_features=False) - dev_data_transform = preprocess_dataset( - tokenizer, - dev_data, - max_seq_length=max_seq_length, - doc_stride=doc_stride, - max_query_length=max_query_length, - input_features=True) + dev_data_transform = preprocess_dataset(tokenizer, + dev_data, + max_seq_length=max_seq_length, + doc_stride=doc_stride, + max_query_length=max_query_length, + input_features=True) log.info('The number of examples after preprocessing:{}'.format( len(dev_data_transform))) - dev_dataloader = mx.gluon.data.DataLoader( - dev_data_transform, - batchify_fn=batchify_fn, - num_workers=4, batch_size=test_batch_size, - shuffle=False, last_batch='keep') + dev_dataloader = mx.gluon.data.DataLoader(dev_data_transform, + batchify_fn=batchify_fn, + num_workers=4, + batch_size=test_batch_size, + shuffle=False, + last_batch='keep') log.info('start prediction') @@ -490,9 +524,10 @@ def evaluate(): for data in dev_dataloader: example_ids, inputs, token_types, valid_length, _, _ = data total_num += len(inputs) - out = net(inputs.astype('float32').as_in_context(ctx), - token_types.astype('float32').as_in_context(ctx), - valid_length.astype('float32').as_in_context(ctx)) + out = net( + inputs.astype('float32').as_in_context(ctx), + token_types.astype('float32').as_in_context(ctx), + valid_length.astype('float32').as_in_context(ctx)) output = mx.nd.split(out, axis=2, num_outputs=2) example_ids = example_ids.asnumpy().tolist() @@ -504,7 +539,7 @@ def evaluate(): epoch_toc = time.time() log.info('Time cost={:.2f} s, Thoughput={:.2f} samples/s'.format( - epoch_toc - epoch_tic, total_num/(epoch_toc - epoch_tic))) + epoch_toc - epoch_tic, total_num / (epoch_toc - epoch_tic))) log.info('Get prediction results...') @@ -526,21 +561,25 @@ def evaluate(): all_predictions[example_qas_id] = prediction with io.open(os.path.join(output_dir, 'predictions.json'), - 'w', encoding='utf-8') as fout: + 'w', + encoding='utf-8') as fout: data = json.dumps(all_predictions, ensure_ascii=False) fout.write(data) if version_2: - log.info('Please run evaluate-v2.0.py to get evaluation results for SQuAD 2.0') + log.info( + 'Please run evaluate-v2.0.py to get evaluation results for SQuAD 2.0' + ) else: F1_EM = get_F1_EM(dev_data, all_predictions) log.info(F1_EM) -SquadBERTFeautre = collections.namedtuple('SquadBERTFeautre', ['example_id', 'qas_id', 'doc_tokens', 'valid_length', - 'tokens', 'token_to_orig_map', 'token_is_max_context', 'input_ids', - 'p_mask', 'segment_ids', 'start_position', - 'end_position','is_impossible']) +SquadBERTFeautre = collections.namedtuple('SquadBERTFeautre', [ + 'example_id', 'qas_id', 'doc_tokens', 'valid_length', 'tokens', + 'token_to_orig_map', 'token_is_max_context', 'input_ids', 'p_mask', + 'segment_ids', 'start_position', 'end_position', 'is_impossible' +]) def convert_examples_to_features(example, @@ -553,70 +592,89 @@ def convert_examples_to_features(example, max_query_length=64, cls_index=0): """convert the examples to the BERT features""" - query_tokenized = [cls_token] + tokenizer(example.question_text)[: max_query_length] + query_tokenized = [cls_token] + tokenizer( + example.question_text)[:max_query_length] #tokenize paragraph and get start/end position of the answer in tokenized paragraph - tok_start_position, tok_end_position, all_doc_tokens, _, tok_to_orig_index= \ + tok_start_position, tok_end_position, all_doc_tokens, _, tok_to_orig_index = \ tokenize_and_align_positions(example.doc_tokens, - example.start_position, - example.end_position, - tokenizer) + example.start_position, + example.end_position, + tokenizer) # get doc spans using sliding window - doc_spans, doc_spans_indices = get_doc_spans(all_doc_tokens, max_seq_length - len(query_tokenized) - 2, - doc_stride) + doc_spans, doc_spans_indices = get_doc_spans( + all_doc_tokens, max_seq_length - len(query_tokenized) - 2, doc_stride) if not example.is_impossible: (tok_start_position, tok_end_position) = improve_answer_span( - all_doc_tokens, tok_start_position, tok_end_position, - tokenizer, example.orig_answer_text) + all_doc_tokens, tok_start_position, tok_end_position, tokenizer, + example.orig_answer_text) # get the new start/end position - positions = [align_position2doc_spans([tok_start_position, tok_end_position], doc_idx, - offset=len(query_tokenized) + 1, - default_value=0) for doc_idx in doc_spans_indices] + positions = [ + align_position2doc_spans([tok_start_position, tok_end_position], + doc_idx, + offset=len(query_tokenized) + 1, + default_value=0) + for doc_idx in doc_spans_indices + ] else: - # if the question is impossible to answer(in squad2.0), set the start/end position to cls index + # if the question is impossible to answer, set the start/end position to cls index positions = [[cls_index, cls_index] for _ in doc_spans_indices] - token_is_max_context = [{len(query_tokenized) + p: check_is_max_context(doc_spans_indices, i, - p + doc_spans_indices[i][0]) - for p in range(len(doc_span))} - for (i, doc_span) in enumerate(doc_spans)] - token_to_orig_map = [{len(query_tokenized) + p + 1: tok_to_orig_index[p + doc_spans_indices[i][0]] - for p in range(len(doc_span))} - for (i, doc_span) in enumerate(doc_spans)] + token_is_max_context = [{ + len(query_tokenized) + p: + check_is_max_context(doc_spans_indices, i, p + doc_spans_indices[i][0]) + for p in range(len(doc_span)) + } for (i, doc_span) in enumerate(doc_spans)] + token_to_orig_map = [{ + len(query_tokenized) + p + 1: + tok_to_orig_index[p + doc_spans_indices[i][0]] + for p in range(len(doc_span)) + } for (i, doc_span) in enumerate(doc_spans)] #get sequence features: tokens, segment_ids, p_masks - seq_features = [concat_sequences([query_tokenized, doc_span], [[sep_token]] * 2) - for doc_span in doc_spans] - - features = [SquadBERTFeautre(example_id=example.example_id, qas_id=example.qas_id, - doc_tokens=example.doc_tokens, valid_length=len(tokens), tokens=tokens, - token_to_orig_map=t2o, token_is_max_context=is_max, input_ids=vocab[tokens], - p_mask=p_mask, segment_ids=segment_ids, start_position=start, end_position=end, - is_impossible=example.is_impossible) - for (tokens, segment_ids, p_mask), (start, end), is_max, t2o - in zip(seq_features, positions, token_is_max_context, token_to_orig_map)] + seq_features = [ + concat_sequences([query_tokenized, doc_span], [[sep_token]] * 2) + for doc_span in doc_spans + ] + + features = [ + SquadBERTFeautre(example_id=example.example_id, + qas_id=example.qas_id, + doc_tokens=example.doc_tokens, + valid_length=len(tokens), + tokens=tokens, + token_to_orig_map=t2o, + token_is_max_context=is_max, + input_ids=vocab[tokens], + p_mask=p_mask, + segment_ids=segment_ids, + start_position=start, + end_position=end, + is_impossible=example.is_impossible) + for (tokens, segment_ids, p_mask), (start, end), is_max, t2o in zip( + seq_features, positions, token_is_max_context, token_to_orig_map) + ] return features def preprocess_dataset(tokenizer, - dataset, - vocab = None, - max_seq_length=384, - doc_stride=128, - max_query_length=64, - input_features=True, - num_workers=4): - + dataset, + vocab=None, + max_seq_length=384, + doc_stride=128, + max_query_length=64, + input_features=True, + num_workers=4): """Loads a dataset into features""" vocab = tokenizer.vocab if vocab is None else vocab trans = partial(convert_examples_to_features, - tokenizer=tokenizer, - cls_token=vocab.cls_token, - sep_token=vocab.sep_token, - vocab=vocab, - max_seq_length=max_seq_length, - doc_stride=doc_stride, - max_query_length=max_query_length) + tokenizer=tokenizer, + cls_token=vocab.cls_token, + sep_token=vocab.sep_token, + vocab=vocab, + max_seq_length=max_seq_length, + doc_stride=doc_stride, + max_query_length=max_query_length) pool = mp.Pool(num_workers) start = time.time() @@ -630,20 +688,24 @@ def preprocess_dataset(tokenizer, # Due to using sliding windows in data preprocessing, # we will have multiple examples for a single entry after processed. # Thus we need to flatten it for training. - data_feature = mx.gluon.data.SimpleDataset(list(itertools.chain.from_iterable(pool.map(trans, examples)))) - data_feature = data_feature.transform(lambda *example: (example[0], # example_id - example[7], # inputs_id - example[9], # segment_ids - example[3], # valid_length, - example[10], # start_position, - example[11])) # end_position + data_feature = mx.gluon.data.SimpleDataset( + list(itertools.chain.from_iterable(pool.map(trans, examples)))) + data_feature = data_feature.transform(lambda *example: ( + example[0], # example_id + example[7], # inputs_id + example[9], # segment_ids + example[3], # valid_length, + example[10], # start_position, + example[11])) # end_position else: - data_feature = mx.gluon.data.SimpleDataset(list(pool.map(trans, examples))) + data_feature = mx.gluon.data.SimpleDataset( + list(pool.map(trans, examples))) end = time.time() print('Done! Transform dataset costs %.2f seconds.' % (end - start)) return data_feature + if __name__ == '__main__': if not only_predict: train() From b6d038d79d062338f1cda29e9e9433efb86ec559 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 16 Dec 2019 15:02:31 +0800 Subject: [PATCH 23/59] fix vocab & refactor XLNet script --- scripts/bert/finetune_classifier.py | 8 +- .../language_model/model/XLNet_classifier.py | 27 +-- scripts/language_model/run_glue.py | 198 ++++++++++++------ scripts/language_model/transformer/model.py | 13 +- 4 files changed, 155 insertions(+), 91 deletions(-) diff --git a/scripts/bert/finetune_classifier.py b/scripts/bert/finetune_classifier.py index 3c816fe793..4a892cf5d9 100644 --- a/scripts/bert/finetune_classifier.py +++ b/scripts/bert/finetune_classifier.py @@ -305,8 +305,6 @@ def convert_examples_to_features(example, vocab=None, is_test=False): """convert glue examples into necessary features""" - assert tokenizer is not None - vocab = tokenizer.vocab if vocab is None else vocab if not is_test: label_dtype = 'int32' if class_labels else 'float32' # get the label @@ -352,7 +350,8 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, cls_token=vocab.cls_token if not use_roberta else vocab.bos_token, sep_token=vocab.sep_token if not use_roberta else vocab.eos_token, class_labels=task.class_labels, - label_alias=task.label_alias) + label_alias=task.label_alias, + vocab=vocab) # data train # task.dataset_train returns (segment_name, dataset) @@ -403,7 +402,8 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, cls_token=vocab.cls_token if not use_roberta else vocab.bos_token, sep_token=vocab.sep_token if not use_roberta else vocab.eos_token, class_labels=None, - is_test=True) + is_test=True, + vocab=vocab) # data test. For MNLI, more than one test set is available test_tsv = task.dataset_test() diff --git a/scripts/language_model/model/XLNet_classifier.py b/scripts/language_model/model/XLNet_classifier.py index 69f527abbd..12ede15423 100644 --- a/scripts/language_model/model/XLNet_classifier.py +++ b/scripts/language_model/model/XLNet_classifier.py @@ -41,24 +41,26 @@ def __call__(self, inputs, token_types, valid_length=None, mems=None): """ return super(XLNetClassifier, self).__call__(inputs, token_types, valid_length, mems) - def _apply_pooling(self, sequence): + def _apply_pooling(self, sequence, valid_length): """Generate the representation given the inputs. This is used for pre-training or fine-tuning a XLNet model. """ # Note that we are using left pad so we always take the last hidden state - outputs = sequence.slice(begin=(0, -1, 0), end=(None, -2, None), step=(None, -1, None)) - outputs = outputs.reshape(shape=(-1, self._units)) - return self.pooler(outputs) + F = mx.ndarray + index = F.contrib.arange_like(sequence, axis=0, ctx=sequence.context).expand_dims(1) + valid_length_rs = valid_length.reshape((-1, 1)) - 1 + gather_index = F.concat(index, valid_length_rs).T + cls_states = F.gather_nd(sequence, gather_index) + return self.pooler(cls_states) - def _padding_mask(self, inputs, valid_length_start): - #we are using left pad + def _padding_mask(self, inputs, valid_length): F = mx.ndarray - valid_length_start = valid_length_start.astype('int32') - steps = F.contrib.arange_like(inputs, axis=1) + 1 + valid_length = valid_length.astype(inputs.dtype) + steps = F.contrib.arange_like(inputs, axis=1) ones = F.ones_like(steps) - mask = F.broadcast_greater(F.reshape(steps, shape=(1, -1)), - F.reshape(valid_length_start, shape=(-1, 1))) + mask = F.broadcast_lesser(F.reshape(steps, shape=(1, -1)), + F.reshape(valid_length, shape=(-1, 1))) mask = F.broadcast_mul(F.expand_dims(mask, axis=1), F.broadcast_mul(ones, F.reshape(ones, shape=(-1, 1)))) return mask @@ -82,9 +84,8 @@ def forward(self, inputs, token_types, valid_length=None, mems=None): outputs : NDArray Shape (batch_size, num_classes) """ - valid_length_start = inputs.shape[1] - valid_length - attention_mask = self._padding_mask(inputs, valid_length_start).astype('float32') + attention_mask = self._padding_mask(inputs, valid_length).astype('float32') output, _ = self.xlnet(inputs, token_types, mems, attention_mask) - output = self._apply_pooling(output) + output = self._apply_pooling(output, valid_length.astype('float32')) pooler_out = self.pooler(output) return self.classifier(pooler_out) diff --git a/scripts/language_model/run_glue.py b/scripts/language_model/run_glue.py index f5c3686032..738fb325d7 100644 --- a/scripts/language_model/run_glue.py +++ b/scripts/language_model/run_glue.py @@ -10,6 +10,7 @@ import warnings import sys import multiprocessing +from functools import partial import numpy as np import mxnet as mx from mxnet import gluon @@ -23,6 +24,7 @@ from classification import MRPCTask, QQPTask, RTETask, STSBTask, SSTTask, \ QNLITask, CoLATask, MNLITask, WNLITask, XNLITask, LCQMCTask, ChnSentiCorpTask from data.transform import XLNetDatasetTransform +from preprocessing_utils import truncate_seqs_equal, concat_sequences tasks = { 'MRPC': MRPCTask(), @@ -115,6 +117,132 @@ def split_and_load(arrs, ctx): return zip(*loaded_arrs) +def convert_examples_to_features(example, + tokenizer=None, + truncate_length=512, + cls_token=None, + sep_token=None, + class_labels=None, + label_alias=None, + vocab=None, + is_test=False): + """convert glue examples into necessary features""" + assert vocab + if not is_test: + label_dtype = 'int32' if class_labels else 'float32' + # get the label + label = example[-1] + example = example[:-1] + #create label maps if classification task + if class_labels: + label_map = {} + for (i, l) in enumerate(class_labels): + label_map[l] = i + if label_alias: + for key in label_alias: + label_map[key] = label_map[label_alias[key]] + label = label_map[label] + label = np.array([label], dtype=label_dtype) + + # tokenize raw text + tokens_raw = [tokenizer(l) for l in example] + # truncate to the truncate_length, + tokens_trun = truncate_seqs_equal(tokens_raw, truncate_length) + # concate the sequences with special tokens, cls_token is added to the end in XlNet + special_tokens = [[sep_token]] * len(tokens_trun) + special_tokens[-1].append(cls_token) + tokens, segment_ids, _ = concat_sequences(tokens_trun, + special_tokens) + # convert the token to ids + input_ids = vocab[tokens] + valid_length = len(input_ids) + if not is_test: + return input_ids, valid_length, segment_ids, label + else: + return input_ids, valid_length, segment_ids + + +def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, + vocab): + """Train/eval Data preparation function.""" + label_dtype = 'int32' if task.class_labels else 'float32' + truncate_length = max_len - 3 if task.is_pair else max_len - 2 + trans = partial( + convert_examples_to_features, + tokenizer=tokenizer, + truncate_length=truncate_length, + cls_token=vocab.cls_token, + sep_token=vocab.sep_token, + class_labels=task.class_labels, + label_alias=task.label_alias, + vocab=vocab) + + # data train + # task.dataset_train returns (segment_name, dataset) + train_tsv = task.dataset_train()[1] + data_train = mx.gluon.data.SimpleDataset(list(map(trans, train_tsv))) + data_train_len = data_train.transform( + lambda _, valid_length, segment_ids, label: valid_length, lazy=False) + # bucket sampler for training + pad_val = vocab[vocab.padding_token] + batchify_fn = nlp.data.batchify.Tuple( + nlp.data.batchify.Pad(axis=0, pad_val=pad_val), # input + nlp.data.batchify.Stack(), # length + nlp.data.batchify.Pad(axis=0, pad_val=0), # segment + nlp.data.batchify.Stack(label_dtype)) # label + batch_sampler = nlp.data.sampler.FixedBucketSampler(data_train_len, + batch_size=batch_size, + num_buckets=10, + ratio=0, + shuffle=True) + # data loader for training + loader_train = gluon.data.DataLoader(dataset=data_train, + num_workers=4, + batch_sampler=batch_sampler, + batchify_fn=batchify_fn) + + # data dev. For MNLI, more than one dev set is available + dev_tsv = task.dataset_dev() + dev_tsv_list = dev_tsv if isinstance(dev_tsv, list) else [dev_tsv] + loader_dev_list = [] + for segment, data in dev_tsv_list: + data_dev = mx.gluon.data.SimpleDataset(list(map(trans, data))) + loader_dev = mx.gluon.data.DataLoader(data_dev, + batch_size=dev_batch_size, + num_workers=4, + shuffle=False, + batchify_fn=batchify_fn) + loader_dev_list.append((segment, loader_dev)) + + # batchify for data test + test_batchify_fn = nlp.data.batchify.Tuple( + nlp.data.batchify.Pad(axis=0, pad_val=pad_val), + nlp.data.batchify.Stack(), nlp.data.batchify.Pad(axis=0, pad_val=0)) + # transform for data test + test_trans = partial( + convert_examples_to_features, + tokenizer=tokenizer, + truncate_length=max_len, + cls_token=vocab.cls_token, + sep_token=vocab.sep_token, + class_labels=None, + is_test=True, + vocab=vocab) + + # data test. For MNLI, more than one test set is available + test_tsv = task.dataset_test() + test_tsv_list = test_tsv if isinstance(test_tsv, list) else [test_tsv] + loader_test_list = [] + for segment, data in test_tsv_list: + data_test = mx.gluon.data.SimpleDataset(list(map(test_trans, data))) + loader_test = mx.gluon.data.DataLoader(data_test, + batch_size=dev_batch_size, + num_workers=4, + shuffle=False, + batchify_fn=test_batchify_fn) + loader_test_list.append((segment, loader_test)) + return loader_train, loader_dev_list, loader_test_list, len(data_train) + logger = logging.getLogger() logger.setLevel(logging.INFO) logging.captureWarnings(True) @@ -155,6 +283,7 @@ def split_and_load(arrs, ctx): get_model_params = { 'name': args.model_name, + 'do_lower_case': 'uncased' in args.dataset, 'dataset_name': args.dataset, 'pretrained': get_pretrained, 'ctx': ctxs, @@ -198,77 +327,10 @@ def split_and_load(arrs, ctx): model.hybridize(static_alloc=True) loss_function.hybridize(static_alloc=True) -# data processing -do_lower_case = 'uncased' in args.dataset - - -def preprocess_data(_tokenizer, _task, _batch_size, _dev_batch_size, max_len, _vocab, pad=False): - """Train/eval Data preparation function.""" - pool = multiprocessing.Pool() - - # transformation for data train and dev - label_dtype = 'float32' if not _task.class_labels else 'int32' - trans = XLNetDatasetTransform(_tokenizer, max_len, vocab=_vocab, - class_labels=_task.class_labels, label_alias=_task.label_alias, - pad=pad, pair=_task.is_pair, has_label=True) - - # data train - # _task.dataset_train returns (segment_name, dataset) - train_tsv = _task.dataset_train()[1] - data_train = mx.gluon.data.SimpleDataset(pool.map(trans, train_tsv)) - data_train_len = data_train.transform(lambda input_id, length, segment_id, label_id: length, - lazy=False) - # bucket sampler for training - pad_val = _vocab[_vocab.padding_token] - batchify_fn = nlp.data.batchify.Tuple( - nlp.data.batchify.Pad(axis=0, pad_val=pad_val), # input - nlp.data.batchify.Stack(), # length - nlp.data.batchify.Pad(axis=0, pad_val=0), # segment - nlp.data.batchify.Stack(label_dtype)) # label - - batch_sampler = nlp.data.sampler.FixedBucketSampler(data_train_len, batch_size=_batch_size, - num_buckets=10, ratio=0, shuffle=True) - # data loader for training - loader_train = gluon.data.DataLoader(dataset=data_train, num_workers=num_workers, - batch_sampler=batch_sampler, batchify_fn=batchify_fn) - - # data dev. For MNLI, more than one dev set is available - dev_tsv = _task.dataset_dev() - dev_tsv_list = dev_tsv if isinstance(dev_tsv, list) else [dev_tsv] - loader_dev_list = [] - for segment, data in dev_tsv_list: - data_dev = mx.gluon.data.SimpleDataset(pool.map(trans, data)) - loader_dev = mx.gluon.data.DataLoader(data_dev, batch_size=_dev_batch_size, - num_workers=num_workers, shuffle=False, - batchify_fn=batchify_fn) - loader_dev_list.append((segment, loader_dev)) - - # batchify for data test - test_batchify_fn = nlp.data.batchify.Tuple(nlp.data.batchify.Pad(axis=0, pad_val=pad_val), - nlp.data.batchify.Stack(), - nlp.data.batchify.Pad(axis=0, pad_val=0)) - # transform for data test - test_trans = XLNetDatasetTransform(_tokenizer, max_len, vocab=_vocab, class_labels=None, - pad=pad, pair=_task.is_pair, has_label=False) - - # data test. For MNLI, more than one test set is available - test_tsv = _task.dataset_test() - test_tsv_list = test_tsv if isinstance(test_tsv, list) else [test_tsv] - loader_test_list = [] - for segment, data in test_tsv_list: - data_test = mx.gluon.data.SimpleDataset(pool.map(test_trans, data)) - loader_test = mx.gluon.data.DataLoader(data_test, batch_size=_dev_batch_size, - num_workers=num_workers, shuffle=False, - batchify_fn=test_batchify_fn) - loader_test_list.append((segment, loader_test)) - pool.close() - return loader_train, loader_dev_list, loader_test_list, len(data_train) - - # Get the loader. logging.info('processing dataset...') train_data, dev_data_list, test_data_list, num_train_examples = preprocess_data( - tokenizer, task, args.batch_size, args.dev_batch_size, args.max_len, vocab, args.pad) + tokenizer, task, args.batch_size, args.dev_batch_size, args.max_len, vocab) def test(loader_test, segment): diff --git a/scripts/language_model/transformer/model.py b/scripts/language_model/transformer/model.py index ea6812a7e4..f4e2836696 100644 --- a/scripts/language_model/transformer/model.py +++ b/scripts/language_model/transformer/model.py @@ -55,6 +55,7 @@ def get_model(name, **kwargs): 'xlnet_cased_l12_h768_a12': xlnet_cased_l12_h768_a12, 'xlnet_cased_l24_h1024_a16': xlnet_cased_l24_h1024_a16 } + print(name, kwargs) name = name.lower() if name not in models: raise ValueError('Model %s is not supported. Available options are\n\t%s' % @@ -141,7 +142,7 @@ def transformerxl(dataset_name: str, vocab: nlp.Vocab, **kwargs): def xlnet_cased_l12_h768_a12(dataset_name: Optional[str] = None, vocab: Optional[nlp.Vocab] = None, tokenizer: Optional[XLNetTokenizer] = None, pretrained: bool = True, ctx: mx.Context = mx.cpu(), - root=os.path.join(get_home_dir(), 'models'), **kwargs): + root=os.path.join(get_home_dir(), 'models'), do_lower_case=False, **kwargs): """XLNet model. References: @@ -191,14 +192,14 @@ def xlnet_cased_l12_h768_a12(dataset_name: Optional[str] = None, vocab: Optional dataset_name=dataset_name, root=root, ctx=ctx, ignore_extra=not kwargs.get('use_decoder', True)) if tokenizer is None or dataset_name is not None: - tokenizer = _get_xlnet_tokenizer(dataset_name, root) + tokenizer = _get_xlnet_tokenizer(dataset_name, root, do_lower_case) return net, vocab, tokenizer def xlnet_cased_l24_h1024_a16(dataset_name: Optional[str] = None, vocab: Optional[nlp.Vocab] = None, tokenizer: Optional[XLNetTokenizer] = None, pretrained: bool = True, ctx: mx.Context = mx.cpu(), - root=os.path.join(get_home_dir(), 'models'), **kwargs): + root=os.path.join(get_home_dir(), 'models'), do_lower_case=False, **kwargs): """XLNet model. References: @@ -248,11 +249,11 @@ def xlnet_cased_l24_h1024_a16(dataset_name: Optional[str] = None, vocab: Optiona dataset_name=dataset_name, root=root, ctx=ctx, ignore_extra=not kwargs.get('use_decoder', True)) if tokenizer is None or dataset_name is not None: - tokenizer = _get_xlnet_tokenizer(dataset_name, root) + tokenizer = _get_xlnet_tokenizer(dataset_name, root, do_lower_case) return net, vocab, tokenizer -def _get_xlnet_tokenizer(dataset_name, root): +def _get_xlnet_tokenizer(dataset_name, root, do_lower_case=False): assert dataset_name.lower() == '126gb' root = os.path.expanduser(root) file_path = os.path.join(root, 'xlnet_126gb-871f0b3c.spiece') @@ -294,5 +295,5 @@ def _get_xlnet_tokenizer(dataset_name, root): if not check_sha1(file_path, sha1_hash): raise ValueError('Downloaded file has different hash. Please try again.') - tokenizer = XLNetTokenizer(file_path) + tokenizer = XLNetTokenizer(file_path, lower=do_lower_case) return tokenizer From ea72d10c44249d056d3740f244a1028aa6b8c312 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 19 Dec 2019 22:26:54 +0800 Subject: [PATCH 24/59] fix pylint --- scripts/language_model/run_glue.py | 6 +++--- scripts/language_model/transformer/model.py | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/scripts/language_model/run_glue.py b/scripts/language_model/run_glue.py index 738fb325d7..1249aba24b 100644 --- a/scripts/language_model/run_glue.py +++ b/scripts/language_model/run_glue.py @@ -9,7 +9,6 @@ import logging import warnings import sys -import multiprocessing from functools import partial import numpy as np import mxnet as mx @@ -23,7 +22,6 @@ #pylint: disable=wrong-import-position from classification import MRPCTask, QQPTask, RTETask, STSBTask, SSTTask, \ QNLITask, CoLATask, MNLITask, WNLITask, XNLITask, LCQMCTask, ChnSentiCorpTask -from data.transform import XLNetDatasetTransform from preprocessing_utils import truncate_seqs_equal, concat_sequences tasks = { @@ -126,6 +124,7 @@ def convert_examples_to_features(example, label_alias=None, vocab=None, is_test=False): + #pylint: disable=redefined-outer-name """convert glue examples into necessary features""" assert vocab if not is_test: @@ -164,6 +163,7 @@ def convert_examples_to_features(example, def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab): + #pylint: disable=redefined-outer-name """Train/eval Data preparation function.""" label_dtype = 'int32' if task.class_labels else 'float32' truncate_length = max_len - 3 if task.is_pair else max_len - 2 @@ -283,7 +283,7 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, get_model_params = { 'name': args.model_name, - 'do_lower_case': 'uncased' in args.dataset, + 'do_lower_case': True, 'dataset_name': args.dataset, 'pretrained': get_pretrained, 'ctx': ctxs, diff --git a/scripts/language_model/transformer/model.py b/scripts/language_model/transformer/model.py index f4e2836696..5a3dc6fe26 100644 --- a/scripts/language_model/transformer/model.py +++ b/scripts/language_model/transformer/model.py @@ -142,7 +142,8 @@ def transformerxl(dataset_name: str, vocab: nlp.Vocab, **kwargs): def xlnet_cased_l12_h768_a12(dataset_name: Optional[str] = None, vocab: Optional[nlp.Vocab] = None, tokenizer: Optional[XLNetTokenizer] = None, pretrained: bool = True, ctx: mx.Context = mx.cpu(), - root=os.path.join(get_home_dir(), 'models'), do_lower_case=False, **kwargs): + root=os.path.join(get_home_dir(), 'models'), + do_lower_case=False, **kwargs): """XLNet model. References: @@ -199,7 +200,8 @@ def xlnet_cased_l12_h768_a12(dataset_name: Optional[str] = None, vocab: Optional def xlnet_cased_l24_h1024_a16(dataset_name: Optional[str] = None, vocab: Optional[nlp.Vocab] = None, tokenizer: Optional[XLNetTokenizer] = None, pretrained: bool = True, ctx: mx.Context = mx.cpu(), - root=os.path.join(get_home_dir(), 'models'), do_lower_case=False, **kwargs): + root=os.path.join(get_home_dir(), 'models'), + do_lower_case=False, **kwargs): """XLNet model. References: From cee158df6732c5f47f1d775496f33188b1d6ec43 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 25 Dec 2019 16:31:08 +0800 Subject: [PATCH 25/59] make test faster --- scripts/tests/test_scripts.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/tests/test_scripts.py b/scripts/tests/test_scripts.py index bce0b72162..a177eae0a1 100644 --- a/scripts/tests/test_scripts.py +++ b/scripts/tests/test_scripts.py @@ -330,8 +330,9 @@ def test_export(task): @pytest.mark.integration @pytest.mark.parametrize('sentencepiece', [False, True]) def test_finetune_squad(sentencepiece): - arguments = ['--optimizer', 'adam', '--batch_size', '12', - '--gpu', '0', '--epochs', '2', '--debug'] + arguments = ['--optimizer', 'adam', '--batch_size', '32', + '--gpu', '0', '--epochs', '1', '--debug', '--max_seq_length', '32', + '--max_query_length', '8', 'doc_stride', '384'] if sentencepiece: # the downloaded bpe vocab url = 'http://repo.mxnet.io/gluon/dataset/vocab/test-682b5d15.bpe' From 489ff5f827dcb9a5524209c4ad5f85940cde433a Mon Sep 17 00:00:00 2001 From: Wang Date: Fri, 3 Jan 2020 15:48:21 +0800 Subject: [PATCH 26/59] fix --- scripts/bert/finetune_squad.py | 3 +++ scripts/language_model/run_glue.py | 1 - scripts/tests/test_scripts.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/bert/finetune_squad.py b/scripts/bert/finetune_squad.py index 4baca2f820..1158de3421 100644 --- a/scripts/bert/finetune_squad.py +++ b/scripts/bert/finetune_squad.py @@ -307,6 +307,8 @@ else: tokenizer = nlp.data.BERTTokenizer(vocab=vocab, lower=lower) +print(vocab) +print(tokenizer) batchify_fn = nlp.data.batchify.Tuple( nlp.data.batchify.Stack(), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), @@ -689,6 +691,7 @@ def preprocess_dataset(tokenizer, list(pool.map(trans, examples))) end = time.time() + pool.close() print('Done! Transform dataset costs %.2f seconds.' % (end - start)) return data_feature diff --git a/scripts/language_model/run_glue.py b/scripts/language_model/run_glue.py index 1249aba24b..4cd444e00b 100644 --- a/scripts/language_model/run_glue.py +++ b/scripts/language_model/run_glue.py @@ -283,7 +283,6 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, get_model_params = { 'name': args.model_name, - 'do_lower_case': True, 'dataset_name': args.dataset, 'pretrained': get_pretrained, 'ctx': ctxs, diff --git a/scripts/tests/test_scripts.py b/scripts/tests/test_scripts.py index a177eae0a1..4a3f35f1d7 100644 --- a/scripts/tests/test_scripts.py +++ b/scripts/tests/test_scripts.py @@ -332,7 +332,7 @@ def test_export(task): def test_finetune_squad(sentencepiece): arguments = ['--optimizer', 'adam', '--batch_size', '32', '--gpu', '0', '--epochs', '1', '--debug', '--max_seq_length', '32', - '--max_query_length', '8', 'doc_stride', '384'] + '--max_query_length', '8', '--doc_stride', '384'] if sentencepiece: # the downloaded bpe vocab url = 'http://repo.mxnet.io/gluon/dataset/vocab/test-682b5d15.bpe' From 0fc5bb8c6f303dad97ae9e1e9500af7bd2a1e230 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 13 Jan 2020 17:48:15 +0800 Subject: [PATCH 27/59] fixed print --- scripts/bert/data/classification.py | 6 +- scripts/bert/data/preprocessing_utils.py | 130 ++++- scripts/bert/finetune_classifier.py | 9 +- scripts/bert/finetune_squad.py | 30 +- scripts/bert/test-682b5d15.bpe | Bin 0 -> 253253 bytes scripts/bert/test_squad.py | 18 + scripts/language_model/data/qa.py | 690 +++++++++++++++++++++++ scripts/language_model/model/qa.py | 293 ++++++++++ scripts/language_model/run_squad.py | 540 ++++++++++++++++++ 9 files changed, 1698 insertions(+), 18 deletions(-) create mode 100644 scripts/bert/test-682b5d15.bpe create mode 100644 scripts/bert/test_squad.py create mode 100644 scripts/language_model/data/qa.py create mode 100644 scripts/language_model/model/qa.py create mode 100644 scripts/language_model/run_squad.py diff --git a/scripts/bert/data/classification.py b/scripts/bert/data/classification.py index f246d7c371..bc0b0da10e 100644 --- a/scripts/bert/data/classification.py +++ b/scripts/bert/data/classification.py @@ -99,8 +99,8 @@ def __init__(self): is_pair = True class_labels = ['0', '1'] metric = CompositeEvalMetric() - metric.add(F1()) metric.add(Accuracy()) + metric.add(F1(average='micro')) super(MRPCTask, self).__init__(class_labels, metric, is_pair) def get_dataset(self, segment='train'): @@ -119,8 +119,8 @@ def __init__(self): is_pair = True class_labels = ['0', '1'] metric = CompositeEvalMetric() - metric.add(F1()) metric.add(Accuracy()) + metric.add(F1(average='micro')) super(QQPTask, self).__init__(class_labels, metric, is_pair) def get_dataset(self, segment='train'): @@ -175,7 +175,7 @@ class STSBTask(GlueTask): def __init__(self): is_pair = True class_labels = None - metric = PearsonCorrelation() + metric = PearsonCorrelation(average='micro') super(STSBTask, self).__init__(class_labels, metric, is_pair) def get_dataset(self, segment='train'): diff --git a/scripts/bert/data/preprocessing_utils.py b/scripts/bert/data/preprocessing_utils.py index 999bda972d..ef92ac552a 100644 --- a/scripts/bert/data/preprocessing_utils.py +++ b/scripts/bert/data/preprocessing_utils.py @@ -8,6 +8,8 @@ import collections import itertools +import unicodedata +import numpy as np import numpy.ma as ma @@ -43,7 +45,7 @@ def truncate_seqs_equal(seqs, max_len): return seqs -def concat_sequences(seqs, separators, separator_mask=1): +def concat_sequences(seqs, separators, separator_mask=[]): """ Insert special tokens for sequence list or a single sequence. For sequence pairs, the input is a list of 2 strings: @@ -91,6 +93,53 @@ def concat_sequences(seqs, separators, separator_mask=1): []) return concat, segment_ids, p_mask +def concat_sequences_2(seqs, separators, separator_mask=[]): + """ + Insert special tokens for sequence list or a single sequence. + For sequence pairs, the input is a list of 2 strings: + text_a, text_b. + Inputs: + text_a: 'is this jacksonville ?' + text_b: 'no it is not' + separator: [[SEP], [SEP]] + + Processed: + tokens: 'is this jacksonville ? [SEP] no it is not . [SEP]' + segment_ids: 0 0 0 0 0 1 1 1 1 1 1 + p_mask: 0 0 0 0 1 0 0 0 0 0 1 + valid_length: 11 + + Parameters + ---------- + separator : list + The special tokens to be appended to each sequence. For example: + Given: + seqs: [[1, 2], [3, 4], [5, 6]] + separator: [[], 7] + it will be: + [1, 2, 3, 4, 7, 5, 6] + + seqs : list of sequences or a single sequence + + Returns + ------- + np.array: input token ids in 'int32', shape (batch_size, seq_length) + np.array: segment ids in 'int32', shape (batch_size, seq_length) + np.array: mask for special tokens + """ + assert isinstance(seqs, collections.abc.Iterable) and len(seqs) > 0 + concat = sum(( + seq + sep + for sep, seq in itertools.zip_longest(separators, seqs, fillvalue=[])), + []) + segment_ids = sum( + ([i] * (len(seq) + len(sep)) for i, (sep, seq) in enumerate( + itertools.zip_longest(separators, seqs, fillvalue=[]))), []) + p_mask = sum(( + [0] * len(seq) + mask + for sep, seq, mask in itertools.zip_longest(separators, seqs, separator_mask, fillvalue=[])), + []) + return concat, segment_ids, p_mask def tokenize_and_align_positions(origin_text, start_position, end_position, tokenizer): @@ -241,7 +290,7 @@ def check_is_max_context(doc_spans, cur_span_index, position): SquadExample = collections.namedtuple('SquadExample', [ - 'qas_id', 'question_text', 'doc_tokens', 'example_id', 'orig_answer_text', + 'qas_id', 'question_text', 'paragraph_text', 'doc_tokens', 'example_id', 'orig_answer_text', 'start_position', 'end_position', 'is_impossible' ]) @@ -284,6 +333,7 @@ def convert_squad_examples(record, is_training): example = SquadExample(qas_id=qas_id, question_text=question_text, + paragraph_text=paragraph_text, doc_tokens=doc_tokens, example_id=example_id, orig_answer_text=orig_answer_text, @@ -291,3 +341,79 @@ def convert_squad_examples(record, is_training): end_position=end_position, is_impossible=is_impossible) return example + + +def preprocess_text(inputs, lower=False, remove_space=True, keep_accents=False): + if remove_space: + outputs = ' '.join(inputs.strip().split()) + else: + outputs = inputs + outputs = outputs.replace("``", '"').replace("''", '"') + if not keep_accents: + outputs = unicodedata.normalize('NFKD', outputs) + outputs = ''.join([c for c in outputs if not unicodedata.combining(c)]) + if lower: + outputs = outputs.lower() + + return outputs + + +def _convert_index(index, pos, M=None, is_start=True): + if index[pos] is not None: + return index[pos] + N = len(index) + rear = pos + while rear < N - 1 and index[rear] is None: + rear += 1 + front = pos + while front > 0 and index[front] is None: + front -= 1 + assert index[front] is not None or index[rear] is not None + if index[front] is None: + if index[rear] >= 1: + if is_start: + return 0 + else: + return index[rear] - 1 + return index[rear] + if index[rear] is None: + if M is not None and index[front] < M - 1: + if is_start: + return index[front] + 1 + else: + return M - 1 + return index[front] + if is_start: + if index[rear] > index[front] + 1: + return index[front] + 1 + else: + return index[rear] + else: + if index[rear] > index[front] + 1: + return index[rear] - 1 + else: + return index[front] + + +def _lcs_match(max_dist, seq1, seq2, max_first_seq_len, max_second_seq_len, lower=False): + f = np.zeros((max(len(seq1), 1024), max(len(seq2), 1024)), dtype=np.float32) + g = {} + for i in range(max_first_seq_len): + for j in range(i - max_dist, i + max_dist): + if j >= max_second_seq_len or j < 0: continue + + if i > 0: + g[(i, j)] = 0 + f[i, j] = f[i - 1, j] + + if j > 0 and f[i, j - 1] > f[i, j]: + g[(i, j)] = 1 + f[i, j] = f[i, j - 1] + + f_prev = f[i - 1, j - 1] if i > 0 and j > 0 else 0 + if (preprocess_text(seq1[i], lower=lower, + remove_space=False) == seq2[j] + and f_prev + 1 > f[i, j]): + g[(i, j)] = 2 + f[i, j] = f_prev + 1 + return f, g diff --git a/scripts/bert/finetune_classifier.py b/scripts/bert/finetune_classifier.py index 0497b0a05f..42e9fd4fa1 100644 --- a/scripts/bert/finetune_classifier.py +++ b/scripts/bert/finetune_classifier.py @@ -574,6 +574,8 @@ def train(metric): all_model_params.zero_grad() step_loss += ls.asscalar() + if do_regression: + label = label.reshape((-1)) metric.update([label], [out]) if (batch_id + 1) % (args.log_interval) == 0: log_train(batch_id, len(train_data), metric, step_loss, @@ -645,15 +647,14 @@ def evaluate(loader_dev, metric, segment): ls = loss_function(out, label).mean() step_loss += ls.asscalar() - + if do_regression: + label = label.reshape((-1)) + metric.update([label], [out]) if (batch_id + 1) % (args.log_interval) == 0: log_eval(batch_id, len(loader_dev), metric, step_loss, args.log_interval) step_loss = 0 - label_list = mx.nd.concat(*label_list, dim=0) - out_list = mx.nd.concat(*out_list, dim=0) - metric.update([label_list], [out_list]) metric_nm, metric_val = metric.get() if not isinstance(metric_nm, list): metric_nm, metric_val = [metric_nm], [metric_val] diff --git a/scripts/bert/finetune_squad.py b/scripts/bert/finetune_squad.py index 1158de3421..afc57bb86d 100644 --- a/scripts/bert/finetune_squad.py +++ b/scripts/bert/finetune_squad.py @@ -41,6 +41,7 @@ import time import warnings import itertools +import pickle import multiprocessing as mp from functools import partial @@ -220,6 +221,7 @@ action='store_true', help='Run the example in test mode for sanity checks') +parser.add_argument('--load_feature_from_pickle', action='store_true', help='load features from file if set') args = parser.parse_args() output_dir = args.output_dir @@ -307,8 +309,6 @@ else: tokenizer = nlp.data.BERTTokenizer(vocab=vocab, lower=lower) -print(vocab) -print(tokenizer) batchify_fn = nlp.data.batchify.Tuple( nlp.data.batchify.Stack(), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), @@ -609,11 +609,13 @@ def convert_examples_to_features(example, # if the question is impossible to answer, set the start/end position to cls index positions = [[cls_index, cls_index] for _ in doc_spans_indices] + # record whether the tokens in a docspan have max context token_is_max_context = [{ len(query_tokenized) + p: check_is_max_context(doc_spans_indices, i, p + doc_spans_indices[i][0]) for p in range(len(doc_span)) } for (i, doc_span) in enumerate(doc_spans)] + token_to_orig_map = [{ len(query_tokenized) + p + 1: tok_to_orig_index[p + doc_spans_indices[i][0]] @@ -653,7 +655,9 @@ def preprocess_dataset(tokenizer, doc_stride=128, max_query_length=64, input_features=True, - num_workers=4): + num_workers=4, + load_from_pickle=False, + feature_file=None): """Loads a dataset into features""" vocab = tokenizer.vocab if vocab is None else vocab trans = partial(convert_examples_to_features, @@ -666,10 +670,18 @@ def preprocess_dataset(tokenizer, max_query_length=max_query_length) pool = mp.Pool(num_workers) start = time.time() - - example_trans = partial(convert_squad_examples, is_training=input_features) - # convert the raw dataset into raw features - examples = pool.map(example_trans, dataset) + if not load_from_pickle: + example_trans = partial(convert_squad_examples, is_training=input_features) + # convert the raw dataset into raw features + examples = pool.map(example_trans, dataset) + raw_features = pool.map(trans, examples) + if feature_file: + with open(feature_file, 'wb') as file: + pickle.dump(list(raw_features), file) + else: + assert feature_file, 'feature file should be provided.' + with open(feature_file, 'wb') as file: + raw_features = pickle.load(file) if input_features: # convert the full features into the training features @@ -678,7 +690,7 @@ def preprocess_dataset(tokenizer, # we will have multiple examples for a single entry after processed. # Thus we need to flatten it for training. data_feature = mx.gluon.data.SimpleDataset( - list(itertools.chain.from_iterable(pool.map(trans, examples)))) + list(itertools.chain.from_iterable(raw_features))) data_feature = data_feature.transform(lambda *example: ( example[0], # example_id example[7], # inputs_id @@ -688,7 +700,7 @@ def preprocess_dataset(tokenizer, example[11])) # end_position else: data_feature = mx.gluon.data.SimpleDataset( - list(pool.map(trans, examples))) + list(raw_features)) end = time.time() pool.close() diff --git a/scripts/bert/test-682b5d15.bpe b/scripts/bert/test-682b5d15.bpe new file mode 100644 index 0000000000000000000000000000000000000000..9677dd042fb01ea2a165c0ce5c49f69d1e65b946 GIT binary patch literal 253253 zcmZ6T3tZGy`v0F9Zc2ieI+o^rv$D;8*>7#L)Y8 zUP{Y0B@mH|$VEj!tt@S!&B|8WwV!0R+Gb{(ZMNC|-{*T~2KL{p&+ECIbDnd1&T}r` z!N};yu;0hd_}yG3{CAx_Qp*odnBU^*zxN%8>=7BJ%evLRi#Y*Hj?@G-v^>|!c9Xh!Kb*1YQ1`SOb-J1zvNBNGm98p> zt`11OA$&&(~V@IMw_)>kdTrjO<3P z3CXT_r+Vgt4S|ZMsd3X@3RK`qRmGc;x<}fb32OJYmjiWWI=pIP1tOhJP?dc)T6u20 zNqu@RtQn;&cZRZ6ZL;#Ds$X4v#mY1ECe`buEtYbcn)SWm9O}*oO9Bl~PjaNI52hny zJVnKCc_Uyk$>~wQ%qX=I?pC7{w*}fsYsTMHZrM-HQV~73TUk7+TOT+YKXRmMUbZ8c zJ6)aYR%K*NHo=Ot;;nPL{yZuMG9+*M>_|mzUQDOgfU2)qn$tnDjW+ zXRcjV?xAYskUC3w?@h`!rasVOuUkEk(P(8!R(<~lXIhr%_Nc)(?zR#}s@G@0VG!Gm zQ}+h)q`K49=jWRO4PE5)WT_{I?z76wQ-7EU*DJC+E|Tm@PfvEL_u|lCWsy@A{M;O{ z;?77{C&sn}tgte&)PH6pn<2#|Pe0h&ndgK!O3{;fh&P-Lej6-HA>NWOdBfM!+s$1nf z`<7K^ftq*A+kr9*Qr(&A_oIVyvTK1;&DFAhwyA8$(}-r_IO25Ib5;h^-KkFX_T7hq z>GK?{o~`3h_&q_pb(xmU3Qtp~{}2@7)&4Q>1X`HjR*Uu``O7(RgnIA02O@h%Mlu~N z0%vOIX38pYLDXQ)~vKfaIry6+?$#f^vospbC zWC|+cEk%n~U@2ZL8+#;BN0I{{_~ub$)1vewcgDP=bd`D-6`3DV-;H_C;wGr$)8I@M zGc%mdROLMSr@)9N5TK?VYzvr8chRJ>_3c)#yiWC7`}={mx)Rm=@knMS+=+>*DIu7M zudntYY1c_t$Ik`xd7VymEacBt3Tyk-2Vu=9xl#$k%N8M zKL-nBCeifOmrb5TH8S#pKv9n>c?*HcF-%OV`tDnkM6RqKVNJVkQ3FCg3XETRk}GwAx=kxg!6YXE=+J6pYG-Gt+G$uK z&bybp6w?m0R(%{;&$BZ;dOiQ|^=Qn@#5w0t??YN)EPLHd#n)BH##iRK)eUt>rd3{5 zJ@8l{Ym7U7fl42btQ(Q4W_Vytm%WZG^+-OVagzr;S!qek!;K%JGNEOvBR!qSX=#k@ zo7E2=k?eQTER}i;$s{GJA(K7{*v`cEbI&50oN-xd$}%L=e0Cm>y5%IY>EFT(SGrRz z2>nYSLrMmdm8@mr!yNXh_j^%H6L%h=DVcJIfIlhgUYt z2x^ku?ggA9{+flx6rRT>^wx)nX5faY<3Dt!Inz~fx08YPC%Qady>X*3vphco`TI^e zMa$E>lTI69&y0m-en*bHNgav{7IUX^M0opcWaE~T@CB#Wt2`?{3s`ou8LCBErdf~a zxV>5nW*k$UIyitt+NE)xuzV^G{=#s1Hr-$6#nMu&7xQM z=hm3Ts=j4#CMCte#ysvrWK-E}HMqy=Kq5ZwR0mwhCUKtnayf$W$T%klBd2<|IG8=z z&DxAy6_hib9Flxng2j{6#n3M-Tj_cgKe+&bUOkkgo_pEonZ!7i{J@z&M`9cv#t-242=zXuxUap`^9?nHqnKBkV>!I^mTh+7T% z?O97dQSD8D^BZ9+z>{upqVZSh)@XYy1BL+sVESzmh5>#+;3Mnxla zY`)|?7f9w9%Dy&F#b=^26{PD)_}#O}rh)`wefmqmhAPe6U>lkt=7s7D9i|0?enVAt-&HG zPNH+(uAuBrC7_4a2W4j#QGNIUWG2z=R*%p6ht>6TT)NT?AEG8s|JDjKOPNk}>epXe9i6VSe*b~O6~XpwW%mQ$-`l(Mk`o1vvtV3`3wT{^;lAts@1${B!{XLMzlTryk(t^ zpS}f_jO;I}_uYn1CdQuAtLJIVb8`I47 z$YvHDI_$949@Q$-k;jtsAu&%YjHkylBy8j}Uka9*=S;V@0b6OXls8FV+3zYt;jc-D zlM&59Il)aV*nSY%A1#Ro-<=MMQ*r&XqrSH~z&do0;U|KGa1K@6nrvGz4?7;^}C2 ztuSqHb3n61|42EK7pN$mY~(}-aZz=a=ln-29_TEu&3)1->XX)$FP^NVR=Xa6Kyd}ES z;1QRtwLL}sbuk>h)7P1wml|F#jYTWqO>OavQGOGmzj(5G`V%DMip;dbvp@~)Fh%uE zVD8uOW`ezG+&k3$zy2wZ-I=OiEL5V_7+2u{YtA7#TIcT{joElRqNy?6r5tHUG*dsd zj$af^VR|kt4odSJYTv6tDZ!;?{9sDy>-(Tl|2AtqP1QaEi!lfH$f76U{r>D#3tvVu zO^=4koq$R5N`eUUf)D%472Gu;t~ zzrTAm3e(YKM*7jw!F26MS?cLKgXvzDYVAG2bca{p;?_RenaFU8QvWr)(LDA%={>QlNH}`~RU-5X< zdCi)!@(?3sVo>xl-Wwh;g^0K6!I)rDBFm#^ETU;rIx#}e<(@}TF_+BF6t(3j0(A_# zNsa#&*7PObt+rh@3Ac?<*OmRp_z6B--3HHYI7dD80~}@uk6B7Z%RzrP(~>zoym2kE z-}h7W234vR{%sAJ>D%6^;V9xnvs-J-SjDkBs(*LN1b&CQ@w#Aorh_ZNd@VCpBS#KX zR}XD_+QXv4GhsFU{@-j$xEe(eYr(JR1{hiGc&vBRl;{YEgEsZ>YH zgUOsgQ`LrAWYZy@ne@2+es5=iB!}9(57CTIs_Oj}0!`KL;(P73Kt*~=RB9h&({yeY zs^jWlG8f~$@1s?lSE@`6-nrn*zhW~jNzGi;W^bY!9qfxp!mOiLZ9-c0s^ zE*c(B^jof_2^wKZcgYgRp@TyAX*0$>?w_iXJ8Ly>ufJ%8akMn2hkf3wE{sD(rzg*v zquw76Z#p@9>SPsqFQSPMj?~3^KV+@ke_t?<*O94OCLl6^))}I5q7fYWrZXTWm@%2j zKL1cqOk{CfGaHeHI;?_&A4M{~Pxn~!^z<`mXqG-q{A-cPYxej*-(?TXOq_!ky=j@q zrfi&(OCqnuT4Mrsynde08)KJTiSA?_$}EjP4rF+&ZLUkEG^d+`i^*h$oRbm*+@%z^ zA!~oIRAwqCxp|d;P*)#tl`IvjMU%Y1nU<~`owCQxVJ^8-PIU9omD(vk;^sq!SIcG= zc!71LrfN5?>C7}wpQPXHloRzzKBr~V?gVQ#u3a2##XP-mzSOd@8t?R^^A&=*t@=YS zQ@oC?Nx8^oXgLY%_sD-0qA_+eliYfLcsW=^-}%&(6G55NOM>dz5tP}Qz3RH3kO^fO z@#=}6VVU9#UX<>;DkR{y-VD_v8p+h6FC1R=&%r3js-Hc6GYsAgFLx|;$Bl?Mpwq#F zO3pJTpEt>+zFP`!#@(xfdftZ4a%@~Db_T^c55=m;!-%HJcn*5pAKOsScUMXpR}&q{ zUeg+)>Jq~l*NJh=C!l8;l^H5WqDOyJwX&P~37l!_kGiJ6PE(0ks!w{_agbfhy?6kko`;W_#q^bvfZms}jRYyR&sbY}Ft*7@ehUxJyqC3D`z*=Bu)&Q$8r+w9=e z$oNLGez{%wcO&R`T=mjd2vj$NK+yXVyfMuO9Ad>)KcS#$-1c=$J@g+$6H~no_3MOC z%Yh%^fc+UPM)W7_m;M2--*EsTBc~6d$Hlo zU_R`c=j0q@rJ8=so;qQ+k^4sWM8~8}ljV zLyDy_9p)UsspqxLjGJ|n=#t5i%mam2UGaCE zrq4@eqqJoIJX$X#mclg8t6hm}m@eYalB|2sB@5x)%K-h-X_e>81ZAHv zb#Ik4V>Le`B~87gEg4_Iax$^y4XrUbcz{b*JG6XFWE4JYU2@FTs77l|b=D5yVVvLT zEIChK4322ocu^(~qVH=l;Oq`hnmXE{5(LZ4LtV;&-a)15*k-_xG%Tt*;c^Vl9;0;^7el}P*%{5P5`<+S3;Jx5C zKf#+0@*>W;iSZ2k8J%g5b}Tgi`VSgoJjG3n9os9Svy{GGlc!-YI)A^d*Oo7DMPUlX z>A*bUA!N4F`D*S=INX2sRQ1HuurxE><52y2bqf|?mei^d$fh0UcLb_eYEWZZtF}Fh z22(?asowdp#&kEQMJGJnwn* zC9zAMIG290vK0O!lslCtzYMzGB^PTY)2}d=oQny!#GWq3y*#t)wsleQ0|!?^pgvQO zuW&rfcuX!cj2ylaRGsdGnXz;?&yM=7($bkcZVnBqUgt3~7fa?w%f@>aa3^f;OeT^n zP;YjXc}72`WivAR9gojKTZ82jx!7E%MSqj@!EH0{Rk zR-!gBI*V)5E+jLuX&FnF=zH#lhM+3Z>9igf|M4y=Gf=z}sGmL!ihNpBJ;y`_Qu&Uf zuAhd8Z&}|GPWS~Hkf5?>lS0)F>&GuQB_NsM()Fkh7X@YgPO2VRYV3H@d9eJywf>@3 zAb9zYNUWLPspGH11wF%hYp&gb$_#6QJ0ngz6bj=SI)EnV&~E7huBeX-+Rua8_%P0e zh6{*mhS!(c`D2zKMaCvAv5;VbRd(_lFA?tW% zeX#%Z3nYEQ&_}tC)Kw;~ayfWsbPux?FdXtaJejO->TAmoD4$`ygn#lClBsOQL$j47 zn)W}Y&!d)X+RIj$l1d!0RQ}zO&6#Vvr)#zZXFRHe|5hbidxKRnFnNiIuJ{Z-9a&1# z0O#U(BbpsK(c0szt;ot`nl;bmNjABRrw~FD*ebi^NOf}jv1GsJaHaA$+Z6HYw}OfDXB(dx|ZS1U~!$+S&V`Qm-sAoRx6ld zr}c}li&`>+H$@*^E_KS&bq$tmikm9`-D!_^2*T6ge_+APQ5tI_MT^FuH;LEVnoil1 z!CdU`lyRC=b;H-DIYj#VlT?hBjIntcF7=cajWL!L*O5*+NxuVb{XUqUsCT8bOF@|* z#;{O-4$6F~_o!7NQI@akk98^}6v2#e9COZVT+3=S96j{=OySMZ7O%gr-taGYmWXxI zslTVEHGbxjm^>DVBKl_s#gpNUL30;Xr_+KOkNJU8>UE!%7#8Bh|$tZC6d4hYuF1Mq`}FxrkM!*L)Y#&c!)>sv%)s+-={ICnUWzjrKjPq#WeS>xret-NGnzCf&kmlh#?xH!){3xQYDA-t{Nw|;QtyMFRwJXSQqdR4BT9n;uUBAS!`jn0%yJo*B=ywkY0V$GSQGr6X68Q`%jPvv%D~sX!<`BqbC~Nt zb(Tob$FzTR%G8;q`fi{hW_5d6y&rapNo+&|ULsX@i-)!(m8&Jw=_KwPYHz2^AtqUU z?$4d*)Xzw^mxGo$(5PV>5m}WF5i?RWk69P)h_`5orR>oU9;Y^$Y_s`M(v?@>{S!zi zajKKAqA(5MD3hcPG$NbEr*rb_-m6c*FA|d-dee8Snvv*C7B*n@Kfgz$X7gtgu^uE- zfpy$ez7iCE=g!dYTk6|gXafGoOC~;3cNi5Wd9BtPXT26pj_K|=>z%|>nRY%lZTj@C zSsvq)f{V?}%3`XW}7p>f%7w+1dmT*N{L5OlzI<=6OvPGYD+y+}^utSn&&ZJLzKEm8zXLoq%B} zyBzVkPwI8vrAiK$)+4#4GF7CQr@Stic(hogUsWuX8KP8ITwHQjL;NDx8LZM6(w~so zytx6ft8T!ug(msmy53c=}4E7JOae?~z9jD&ecHns8h z4xF31WbkI-Oj{{I=Hkm*j*;)RE2-_zPWX?@ZSp=>Dul z)2t-@dzUkvGWVxU^;4(J*Mw9Rek9le{XmeVuI*H?d^wT-s#BqVV3=hTrV9F%s)p+f z{;}a)!ld5XS;XtmZvb~_h3SNML6(ZwqOp*utN2;V#2G&D)1&`%W?sPehbiv`&2uQ> zR-tTv3KnNaUZ8TdY^=v6JJe$zA(}zu>uh?my7SYZB3WNw)wQFpwqhuAkf7clqAbDs zqla5OeFH7iuT*OO0c6v+1ed;%sj94%#Cy28=;dYXah*%x&|zd_FUAqah1<$t7Uj$gM}l6udNEPT zRHBdJR**qsmT9~r&BZ5RlgR{06aB29MmA&KnH-1LS(Z#D_H<634!r_{)}3Cg{a1aw z3F*=4QjT8&XZpieuw0Ux$3cHN(|A`32RN5r9ad4()CDfKL@Q7STc4^we+F7_=GtwA z333=q)=%8{5trYR|0u@!LEpr1vM%=Z0@aV}|DVci13`ntt3>(~7)3K0d_+rg2P-kn z<&&1z6=;kZMLua+krQJ^K?ufEtqRQqc^!*_P4EX-0&uGJqlvEBSYo)(e`)kZ+GtEC z^;b1caXRK$`HVSErA(7Pd00x*RGxr2I{HK?ujLAkcc^`Qw=D+0GQ>Q)BHJ#sA_=}stna2|Gnha)?z52acpQ^1o zjL|s#lm5m|A51eJfh>)`6dRFs74MRZKUd%wteQI8I)Nibi(j0=gDQuw)=sPZ$&S8f z9n>0AF9$)M6FbX4!Y>Nm>P$}2yV!rp4DZ`I-SkZ#X`Skw&H|j1_zO!d`@;cmQv|g? zcjkCRe|G&_ryQq0^II|}!f7)eQ0H};nNa;ZNX|Lxf>s4aQy+4!bShZTxJLgYpykYRR4+dPd5LbHXpYCC-9%`gD(7W zyX`Bx`Sa=A5c6l#{V(p1j_j72lKymR;)3`-r}sxoy z1^hUXKWP4Iqkm5GV@2AVMPBJHzSqDOuzVne`-$X$aRbFS8pMFfz&8p#c!PY!&~cB4 z$k?eNa$ltQ#wUe{EuSXXY!b3c#9k~Cwq7K3jY!01k?@To-M5Q$+ahvRl}Kc%NYq}D z9xWn0K{R~tLn6Hnid@|;(&vbXIwo?>QITs;iu64p((ep^JKiC3{dtl8=R^ix5*ct& zWbhS{L6=1wcXbyh?To)KTH?O#CeE9CN`fz1<{j@TiQo1VS6UBAy1j=i__l}4Ki)%9 zrbbEffGBbI>LaPiQ8I;or1iL3yyaJm=j*E_+CpU;K181HE>cd}5$IRK{|~fyREVsBZ#ym$i_L7%(qDYj85^GyTT$2? zC$fBCh_v;xOFR9_9YOx|5GlzCmqXDRX)ZzU^#?v_E5)S=cIP-X4cO z1roqXo!5n(zlBO}Z=0M!o{#=K^a=>Op6){H7^Ak=L#3U3ZQX3rh+Ik=3nN^%LIS0e7@n{bXliZo~F(ZP(Lccd$3Pwjr z;fM&yn;^0p8{1fSfO^_F`?4k8ay&{N)Y&g>s*ee-%3Ii+_W!?rxK_(6+Or-)xulq?OR` zyfBf?@Sl@b1ubu2E$MR9+W>9*nfwQA(gJ@->+?gTH+{)D6C&SGrtn;d{7H{xDdS7} zPv|O;kLkP{S<5FwWMvC!H_)~rK(FW9S-Zs2LVUp&%h=8l`JQsi>EBBH;0$F?g7>kL zlTDt5tfwK^*vb0O4w0kiOL~XMdB!tmq)lqCh-@RKR80txGw4c)Nmrnkz#k#+ z$QAOmeGnmS#M!U`_(d`KriV*=DE^)XO^4nWA+L<=DQ)Eu(q0)M1u7zx6FZv=e;KT{@(Hs)oq=RtE~@SQC-xek5F720|fy1kup4yKI_G zoV0x>Z7P|=-mo2?1UVY?#RsYu+GGKFwrabp7)R=v3`!X9xzJeP1UV_>2eo1N9(9gJ zmrp;EDOXN?>)}^{s_u;APTI{{_JQT?tT*%xparxw6DQcGi-`wq1GF!5J~+gjvToYO zf+YA!c3B=n>?MEU3FdAjb<@Wj{3w^YJk)uNw03Y(+r%e}7>}Y3(?)&9Xv@RS!!{|I z93pvR*u%+pUg!NA^ETBkDYX9_W4{<%`S2Btf$rla@+?JHa+JA7p8?v6LM0~y+aO1$ zHDF&trG2(tu8_YFT^{RX6*LTgjsk5TL`&PP^hd&^{mC%-nz}B4wwI&X=P2KXFOMY; z=Z5xIp&$2@_QgimeiwPpFz;YF$ORu$HV>-L028oZT*@ADg7G^NA!Cqdf@!+m-`M05 zd3`5o>p7A8b-cVn+uM;Z>oJTF7x^5Zm_D}MY?Cbbg`k7>>+*kv`sR|?iQm|0g99Gl z^Gy>OMZW%w)eiFLv&pUScYrzQiJQKIoQoc!&fJ0ejKcoiFM?P|Jj0&~M-UI!M96ov zZ-onAIuR~K(BexGGL16Nvv;pN!G4atAIUQUd;f;6*@B%3*a3PSAEAz1>>P!j043`= zALK`fJ`=RPz`UOdliG8%opFxE?(cyE%mcq*uhbbK9h7U^6C&lY5poVbg|v%M8?m}O zxU6|}B~{!Tpeu)mON6E|BHL+KZVF?xku1tB1CgL8it%Kgx8d_^u>BmmyBN=s;Wil$-8qMJ^m-i~gjOk=Jdb`9_6pGD zlYc1u2(XR0@kLP|K2SJ{@tnq-^)q|(7}BFDw}$*xGa2`f@#7zDQa(Xs0%?KuoO3=z zULwzCu#&c&$8SpDa}KfoS?k;3bK$2^Chtgy^=yNYFnW;d6zs3$+N9g z2ZlJUev5USp{R3V59`^pqaM?ncGdj-*#u$D(C^=95`^e8(XfN_X zkkcLihg$j%>{3n)xQ2G=I&?g3dxm-w!sQ~i27sgFT{FTaSG4W+!EyrmU-0Lk9oo*@ z7+Rlb&;OkmRVrvXMG+<_?yXIfJEe!eIK(yd|+W#0-|4(!nyEx|VMzoxCfL4w9RZ z3mXQrH<7*(Fiy)`kn`b-2g^n5>2uaA^tqVy%gB094QDQBiq99t{9NIF4_|U(u#{vC zk`m^w2kBL#2g^3cAlXKJTPUAL-fhql^uMOwD(vSiL^pJ>)Ux5$Qs;WoJLpdz%J0_m zUC(^)93T&)ucbeaLSIGSkN)=tBfwDL+e`mJ?oh^&Gg}_xG75b`F>48*C}Is4pR>sc zY(IB}aXUktr#;=t^ZZH5Fy?EBAvxsrX>5T9m#OPNK#xIh`dGZ)Ca>g>)=oj7-&)$g zWnG-%je_)?^Q_xa9T!=jPjU|*$eRV_%P)%zCH*G&C+R~R7=f(MU!$NUtec^vjnOt{ zQvQ3&e_)f5==Aw%Ci?${p8&mA>nJw|ei|se$a($*(C7L!tXp*H*k5^>HP89Ja4&lJ zF_ddd;GKYVzmmKzUG^w%0c%J<87^xskzYck{cGZalm0MPzoX1T@-4yMWL^F`#&QgE zjxGzdJ&CR1BDXQ0djHjD{*o~vG79-OU<-M+&A~2j1VyZ+l9?g09o<}PEl-J(ZJR~P zs?bAgqsYhpc#O1d4d}rUT_^D)=U9ZirFqi-44uI6$3v@%MO;u{J9(JLwn=(_A0&B~ zxEtyGk0VcKK3Us2SD0sPCh4&uZz{B{+aOtak(j`FsGalBOzal326Do}Wg0y0>ze~y zU^(;P8%i4e_vtZ!AA`PFj|1(>VSjZ~XU-MuG431DuK~5dH?g1f^LD@p?)+f*(1p^uX_9E>|+&fN&S?j8=9_Leb=^(v`G0BOcEnp^P&!Kw^ zx@Js-{FZ!0ha#jDc?D;di=@3m`wP!R$U@qj!~X39catyoGH(gI2k0}fk2eJE3k4nF z@(0T1;fuxOU%{UL+(yn1S;(8irHZk8eh%k{lnA+u?KU5A<_5d$myjO8c{* zdD!u-V!W8+Hhio7Y^c~sTY zaxgwooRQ%V6V|Thd{N~h^GL>b9BXum3%is|1DcMXc@O2AiyCVgDVWq#1hy z_53ep{T}C?0DsVDt}kdy?iI@S6ZsYCh4{@7o$oiSi`zrx@1+0HrQ}9*E7@P#d4FkR z{nt*j$xxc1-y`%p(CZtOczLFqf-dh8{l1y3!G=tcopU)#4K5&<0z zj#5|cdXeFjt%L3Yqv7uY_knxBZ6JfZWwduV^kv#Va42s^u_1ClY5m|8a13EAl10`z zqvS=g%d5=u=2Dw%Dke^Ymp8Hwp>0FhcSC90eb6VN3|c#Hv2D!}lCLYWaVL%9ofvyf zl*tbhSqMJ|wC3X*BX|oK-cy{&xkGzO{s@tJ>d7Zoq@b(g9=t2Rr_>&?$xhx5c5^pw z#CIA%J;-qqgTQhJeyQ)(T$J!BrtYdNtPew}vx{Z8CXn`IAwO}0e?uGs_-6jX2Yp$c>RosdC%o^v~uW{r{*hjyGv1-8AMv}gpIrY(p9QxCCGwYdt z_}Y;#M_co>ocZ%*FbB2Ji_l{>*-m+mAwE6NweUXXT<`t4*&=I6U)Mp~7_W__aXj(q zwpJfr+}s*uNM9^c3%Z~kbZKFW1)J|=B5n829oIvvpEw8`3+ zM&O^{5=(a4WIAIum$W&cMDy5l0w1wEhq_YWGqg_S|6#7q!M7d9&expDwO-IuXFK1O+IdrGpN;(_>Q3f-NxFlzwr(S_<-k1%wCe=@ zrmgJ>XVF#R%OKy$9Bzi6i4LsmATDv%*Rf&kA@1>%KZDJpp~fH2>-w)_4vu55 zUhsR`%b8&#W0T(<9rO};u0T&h!$1^JpgXXED#olgv>#~O!+PQDp*K|h=A!+7AbM>p zA3v~>ce@@J_Wm3;INfi3FYb;$e-3Z09T6F17z| z*69O9rlBjV(MQznRzF#URz^GTSGo;{b6^bzx-tMTm0o?boq0P-$i^E zd)u{Mw;#lkHcqFfkj|KDUs+yilPKMO`m5{j4Ic)ElCSMBLm z@LaKcN5KiLrw_V69q_}o{X@vd82M1z$FZKsvvw|T#9Bwb_B8xFJxtcw#>yJfvOxRI_y}|r z(DSq&nhi!%&j<~zA8y*z_9Oj#n>EPV+eN!_*w=llkl$vESkL)0$u~`(TkzLg!Cm+q zd3~<&k^`n;Z!&1VYpk@*9xFLV$I9Bx)5@rp1%3cGY%Jdk16FFDstX-`g#L4X-5ol(dF@yfAL<4g`WoU58cbT zjPzdMd%X`Z&-g-nGy6Pm{cWf8eoxzzpnQwfclGwg=y~&R=k34kEc7tv-Kru>@pMEbAT^woaAS7FQC0689)8aZvg>A}osWLzOt@FFkzJ(cK%G$EaP%YY zm9=Xahis7>$-9pA-*%k2$a&H?&@UZJ*3#D_n?GhgIe!aCuz?aVgl*sbe?l! z>Y+ZJ{}A~uYCYrE4u9Ei=NNqL!7$m$e%b=R3)*lZOzPX&t7*HAA$!@Yx4^Fl`xyIn za4mhaVIvIs5pr9RU9N&}KQ$g-$LCo$Y3Nsj3FxzdkAAj)O!^n&WjS(NGyIgP$2os8u4C_&BKoKIAC7&#BF5t$XeQA9yB@0J+bht54%RUJ ze?X_vjxCz@;vUAF6^+2Jh(9q}2i5J|2A@BYJIU*`CzzkCC^H@4G1e4=eE zXRguM;%r>N`Hr8W_=<=L1<-Ai!Xys*+y^j+!kt==V;UOq%TB%z8~!*PNc)H;_hCA&6oL(z42Dw!q9Q{@U>01GalIY zkKc>hj=#Nin%8>lHNqEO;&%xj^RC5y$~DAyfo<<$4Q+q8wq z`VaU9iLUNch?G%g8*O;;xJUxJ)`KDopf5o^&@A8sdi}ID-z%*g*q=jgUfe@k;G4eh zA^T?ckUh{B(KmwKdwNI{w1FR@?0TIyir0I{7UbIdLZlkMuVIbv3FG_aRK63TuL0$g z7+1#NQ}}JrHyFdxB79|Zi0mF0A}`X;2I$7Q-1TF)^Rvb_xp)JBz6$=BZ4R|UF>gn$sgDAdm{X5GkIUV#G6a4O{y33&PKhhVBaj&hZD z35OQ^$eYU7A#$qTF44#}*sFelIT=fS<|`82OVoMlIB_H|L^fb!zx0sJDI%{r_^ll2 zwfIxr!~AxH{C%}e;^8UAr4GN>`|B`tfpq@WFkgQ#2h|8D^L}iX`tNCzZp-U-xy`TJNcr8Q@jfSaqPrWczz=u-hV{mHjz^vd>PVmB zr@x23%9w2iG4Qj2{)Qe0O#$iP58#i}*iWIwAQ8L(UIHh;pTSq)6nG6R=PZ@YNf?>FCDz(yoi>_|2BAVy-X2cL=_Y51d}tOZ5HYrDDzy*APb*6JsS* zrrgc>O16xh0ven1n!e2@yZ6|-ibi@^42Hdz9#+%B>d znhOd*-Eh`0bTxPp?B>4LIEMG5Q8w9tyb0{R%sHSp???UcY2>Y-3{(K$PWGN6`Ubz8 zeJXD@vGU1K*@c|P_i*1y_S2D!Cvm71-)chN3JO_E1+Rum9eeMa$nR)9{cRl(CXJj4 z8cu}Bd+7GD2RB^|kxh?(WXn6nyCG+kR?=Fq+YDOeIw@uCmPLig`{d6XaGiVz{REUy zu9AJc3RIJ)rXTVf*W0W9*56)1y*q|eetJJ?o!VbMC4Va~!1bs4+Z#R}Xm7lIko^L4 z_8Iz)*Lm-}tH1p;F{?>9S0)Uww|+mszF&IT*Mxp#d*O>8Y^NsK4h>`aQ{7pUAg>Ev0tZ%6oXpga~Q&)Vlh1p; zk9B@sh}=d!cYr&=-C#VJ2=?%fzc(X7nzAEgAG8^{g?EKkz76aLJGY1PeJ)&fWkpCG zsOMq10W{8yklnFR#= zK>rko1Bsw)4r73RKKu?FeH$4rsqh|9b3Q_WXf+lpW;7#xjcn@?`+T?xc>1V>_L+B^q zQ*dE+xO@iv9JJQ=moK4Tfp0)T(f}zWrhf?T?9k`ts-Yr7z#OB>#JeUZkfCs@0FbiCGIYJ(V zKB4&{e7e{!Pr=85R$qTfgf^mU0Q2GZIT>S+3hx2?Ib*aUw=j3jVE4FiX#@?RerUMV zjb^QYTKslr_i)*dFSpJNmlkLD#}%Y9XYIrjrjbUa4Cm|lV=fF4N49Tloz3;&@!+AzCc+2oUbZI43Zt7a?Bv9 z0@d&}!v{$n{3i0f2DXAS(8%Ld1E_%CcVdt<@%U8@zYFYl4VG5NU}=Fi9~>m*7YE69 zPyu#;%F);#M}BC{$U)LbnI_N*-URP}_rUw$L$HU7_+BpCP0)Q@jGMVYx2&NKuz%HH z*%{041Lh8rUC_EKgQcE)4dicx?&e}&k~LU9!Oo}PGw?YmYZ)x1;7j-&8;PCRDkx%4 zpH57m-_=8DqaKSKF45D71t510{Rw>J6v|D zaM>s;6X40bY1_mke>igfkMvi1y6=b-t+{Mcyq=ItQU>DH8 z9dCsCp!$2o9@g_-(A1sz1;O|XzaQ*8#y8%RcG(53b20wJ;|9M}qyAh)2jR z#F1ubG`c>ZFX#^jfnnfAa3OD?+yd?3TzDJw^v|I@HZpDl&H3dHbmbR`MW7;z*c3*L z0#)6KS;WH{;^3X+xf@vPuUCkSho1VumT)-PfD|G)k&L1(v z(pX~ZTw*J<9=Tx@F&FFxCH$6Q26kqFN5K=IteD?FtcsAQ;CJx*#R}v&_(V{H-D>2j z9L^8W`RM98IA1_Lpz%z&EP^fp4Hs!w3+-Yomm=o^Yu{#VHm_$4iT~vT>HlzZM%e+a zq~BHaqnh^C(AEOVtN^RQi(muD-OHK0mUnmPp7Z!$KK{1~|ARJfX8l83k@rJ)Vy`xe z{xhz1XYfDfy#d+?b{}K?mxjqEY`zAzf(lT!B}_^i!lW90$B{6pxEv-0{HU&Q0DJOK z*2Wmt$SBsz(J!}n!{$UDgIf!0wWvVRrXU?wOe6W9luK?`UF`!DsBo&4^gc5<}r;?cMcT0fWHEYQD3up5-mr2aY7PreV78UqC5*#bo*w!|yJp>Ng3IYUG;H^fQ)zqA#Dz`g7o;{H@;( z{IoI!->k()K@G^`eb#;-=ME4FqCp?f7xV{v7P9_;wa%f<$5?+SS%1*|?X16}?0;Yv zr~~ygS%07r>;{9dQFqQR!=SCcP~PY4awGgLz=|Ke?XnNthI|KTU4vcFY~wd4;7)XR zgA(q*ef6o>=!K`QWoJ^94nV&d;+*4KLGzn1w2ty$bZz)t+3_6+B*4$fbfiNEKF zzb9Fr$B4fz?ET-ewy<;hnrK-94csf2!sl|X*KbILU-Nf%$OT{pSPd>b6D==7H-Jqb z|H*854Z0PaPT+SD(2jcUe$WfA zzAfxw?62>`e+c$-XK1Zr?*h#Y{H`AVyTI?;0{++fWQcr1zJed|w`AU5;XebPgD=5X z;Pj+W`3BmtlKXpUxO@ly1NaI20?N>pV%t7}d1Eis_myz?NKivtsz`FE#WdAI}<@(9`B?t=-;%^f5kfpx*gC8P|%I>d={U~rfmypV-{_-h06@` z)K%f9&__YzcJ=`1Q=kETQwn}b9Ew9u1g-ev{?YiU6F-gR{K49{)<1Lys03BSfa=Yx zk@ehRDU%93U=dgXmV#VR0QQh)FL|4`uuehqLe@WM1^dCy^Q`|%tbb58rTXdhSR=rv~MKs8%X;U?aL;gP6O2-@4jdG z!3z74|9rU%UF+A+N)xnTYBq0jvkB@7b_~C_Mt2AJf&Jl5XvfZFw1xa1KPz`5-w4Kof<5@#$HY?jDd0gc z1Iz-Cf(t{Ql_#Ke#o6){G!7(!`Ji!CHur~YNrm4xd8sr35BwsqAHEf|#4hD5vXsA7 z$(AMPmx5eS0Lo&rr4+1y-;uvmD!^*^7eNhtHKX13)F%7OG~BU z@=|Fe&u&mcdz!G*3f=_ofU<_AQtDhL@4@d_PydUT%KPvif*RUYO@FHxgG%rTI^F+I zp`U@qLpJ#w`Xy*MieDbVFTq#H-+)&9b3gl2GrAT~elnZ>XVd>|-e0n%lC@Q3!~Z#_ z>a*&1l=%Vt1bzY5JKp`o!QQMhu%|cczaQ&=DC-|IA7fuWNgRXjXC3XFrhnti`k%x4 z2lX+m|52=eup6}UzErX>+xiBf_y4kdyF_9?8kDYKT^6%0K>^=&3wy9UC`Q{eHiG!^{=4bYOo75g0c+uzsc;W@H=X` ze{YG9=Z~aFD{{?6?%!*;e{baejeh;f3|V(DU0!NWlMN%ivSx0QybzltYfq%hi!mjh4Ny*Y+2i~L_Q?#!VBGH17q{jOnzTC zH%r!c&yr0G!{n8gF!_Y^Pr+xPgWp5E${WyW{#}kO`K(uTCHz=&E5H9PwRPj~cKQ33 z?cJrUrMqk|RZ`BknjN6xiju0my`{3Yx76@mr+P~-spSt_cb*w2b?5rYF57j|V7o!; zv-?TjHOw=x{(fRT{zIJnih2qj=5MqUM81Ll4m2HOUQRMk2bm|(Ix1B5gJ#CL1?;L~ zUK^NaXl*I;x|w;s#5}_{tY>aq5xkpmjsn|3HK+lVK>vNk57hA!_yyP}v4??35Dof( z^4?*xeIRcPW5Q$ys2mw4Rg7Qtv@ofe5GH-e(;o~1!@!N;7H}K51MHbg{BaR~xZCdo z&D`}{fOSu3;QY~@zcB^7I*31@K7;s^P5goGPC-{2DtSU2;*S#U#NOSY@o1>;5t5q=8TzZcuJ)G?a6z=P;YE``br=qymSK2)B7J_SlULWy6Y#4lo3d6X2i z^^odIylY(HI|pZ@DxmNG9lwc?)A#W$;_(Q6PsMsa$$TB-doVT9ne`9L>ntM0=zBTLxybH95Mm&jYN#m+n-LTg zMwo^X%F3zAs?7SCSwAyV2qA8f?dTcmCKvzVSTg z-D*2aI^HKSyr|9X!^6&mvjxG%M zr1Jnbqq0KAisc`D^x8LEoBZ~lt?Z!wP1hzavC*||E)0Xk6=E1hU=)fl7UPlkSBASk zY~qQ+lQ9+Lf8!d}4?Q1yG0YV1{^gKR9`wmKcvnBBpH`GU|IzZ+tFnpl`w!M1Am`=<@z&y#Lwvw7=cYg;Kwjp#rRL$A^TC|5kC$3P~4xN=@THEkVXfxh`Y90s3wO|4uCJ%AAawuCJ0jfkv3}tJ`3QaJ zKgT`yo(&`iqY%T8eysh+D0<|RbfEl~1))edchqywct_EN>H*sK(NBEL+DT7*>O`^j zeVX=t!oU!7o-+0Q_8aOZjF84yRJ^2IHMqay_`fH$)o1l{lU?kB44DT!fVC?bxk-V=pWbUWV*OcE?tB$ad`;S?xM%Tvyz6)uN7`KytJEvqM(+&q}Pu z8mz+xY{C|_Y46*w%D?OCbUj@+47jg&INo{st$Yt{}3)l0Sg zL;UWy$hVJtkop0B7nl9o=y2WtZ{im6^SYz{=7Yvph1N zF^%*2AZX^BG)>a>k7EPO(Dn~y1JJ9L*BaLuN9_&ge(V~CE9;}(>mmKZJ?D6YKJ@>e z)WH~xLJUJ2JG*_5`|S8m|LdCKz9R4c-&g-@6Q241o=wYaWph6NpKSFkDtZ-&-~EhHlXZ@Z=KO9)H((RCpcG}Oz%Eo*y&P&1#@NYP zWA1fiLO4m*4}UrAam+p(z#$yLF~kr@+YsabBaQ!$G5(J(eof@tWOdZ!j_ZG2Z;t>p zp?S9P|JRNG&o};GZ2W(kICBh=(rH2(9g!55LYCf{GJgRl=%whNe zJgWqm6s{+;KlF}xK9~Ie3MxLToPV=lxJJK@wyjbB6CV$q=t4%`vv=hm4Trsdv-LlD z=bFiu<=zc4E!;}pl*TRG!96@c?<@U7@B2mJ5xx62qeIUx`-h&-6ou~cexXlX|1T&9 z7>s6R===J_3hBcz0#Uh&-oH`wPUWk^J6c2^i=1%wvOWUkt&1ElZXzb5@;&b|S@n?r zO`lGmi4w#wDxd0uy5Udihx%KzCXWh!&k zb5p)@%2~r!_kXebzuf&NTjsm}#qR$!_uv1zKVB4#&1~!WY*E-n?!i7Bz#$yLF=W3| z5Mt!^joJzH{_^F#PZt-LMBe^*i=RV}`T#q~o*#M-$P+k)-j)!~kmqm#myjA@?j4zq zpkE=cq1m|bbuu$jo2Ko`y`fDbZ;HEx&OzF|G1{-k1Hv8Qdw76Hh%MKCp^sj>P+PKD zU;bS04D#*QTtgNv za_v0XC7jX5XSMCQ_59Y8{MNJlShD#lzm-f0r^(jM;@gXs`xnoq+cDzD zi_7P;pZ@o6DR=sFa`K%O&XCc4ndq3x#7Dwpaw=j~=8%yyQ6)Y;TfXM&;}@QVq;NE@ zU$ULz2L&II~ zADZra|GbwiC%u1-@`zS6->{}cI;*h;>yYjqApZlx270G*+<{H>Ey!(De)}oEgOuMf z{X?m^O7jKE$O^=~H@nC^s7mnb*DF`pC!GBhyM4B{eTs6tN;#gd{9aUkBY(GBAC+UG z`9DX7D$74xFN& zL5`k923@FrUHNw%an}{im8qNSy0)@Cm#vOk7|bN+V$mr(xo7s3_t8m^;Fd2Uy( zI}pwF>r&n`!`=U(?mrq%>;FWPx~>^5^c2#LYu%{){|(>&2mG)${QzH9zNC2zcW@66 z@CbeAKQC;1U;cZ)j9)7o!cp5B)q#~e%-JWa-mxA){4m5){F2X^ckq)JLaqEoc}$=V z^@yHD^=|&}-VglW8a^feG{HYjlJ$J=fzla)Q7FP#jK@SwM%zZ??^}((lb!Yx=sL;2 zKFhx*bC3D|(rn~QH=%h4|CisJ;-{z4ie~=rROw8|Oq3u!$uXEk?_95MVQpcUL!XD- zerYc?{=LxnH$QlRxJuzg4x?PCkbl|IWz& zdB>r4g8F}k`X5PpJwEI|ukrtN*nsRel?n3M{G08bKe~2!{<}SYWh?4uZ&<4Qq3L#1 z)|9`;${+c3{>>I?L}PZ*oZnJ<^IT*4WW{IIH@}d^6luAR%ou5tozjlx`W)~+R5-43 zWMSAv?!i7BKetnqxSdn=Gh3}!96^{BlMy7ll?>g z|5@9B!6?KqjKC;F?Qan|7UR+LzPX5G_h;F_Wb@+Z!c=lPW}*b?(aJw&(K`nz|1+Ko zbLjK%Z2sRvw$f1Lf4lOp%)}2X|BcGVMP=lwvO(6LRX?4zK4g&m=?`Dv|BJ8;*>4O8 zE66s_wjCXBdN$~CPcz8UbLxWzwngJ^<$s6rzt;2j+q3z9j;Z*B`}4dySX!&G2J5f^ zQ8|zD-)%A9CgC3Udka~LGE|^)u`yI~5BA{z^7A)G8$YGj4wAPS)*8`|;o1DZho1jX z<^N6X527}@7IpLll83cZ`v-<4QF@3Yi6*4cfh^kivF#7p0=xOYJG6=Zmm#yrrTD+M z`M-{7ddycLTZB{R`M+pI^Ig6zyLo#`J%$re`tLcWTN*#tXER0G6UDWsR{|5G=#t=0dhFR}xj`UktjM}7DCd59CN zOLD#=IEFa7zhd3pZ=VZ2pLY)7CiJ`-#r>Di{d@Jca0jwDfr_ub7IuEkTK`wZnE%br z5chO`;TiflTtEjqqZ3`oAd4I>`Rxj>;W}>O7Ve<;cl?j9y&Ae-HAhA`nt%9!e1tyq zpYPcZe=ZCp2cr9y*xII>?Y2}QzDot8v2$Dt0@qf0`~xRMZ`R1#{* zy5f?MAd|xNWXyTW?pxz;uYheBA&s%f&r5pO{dHXXl#dt}B;uZu!cP|G4$D;mE2P;qcwhhIbdf68>T7v~XzQv~ck5 zXTlGLObLI#YDze8{4-(y(n;a_^CyMxb$>eSyF4N6T{i%>bbDRF_mn*W21}1Q433dxc=4fqm$<9zFrVw#tr|e zdr+9}c^$pac79La&Q;@S)_f&S4h_l6#tdhCG*qv8KGYazjN1&YcF>TpYs%#CcSrwo zD3x{@_O$=!@SUO4!?#CI4ZG)0HSRn<{B6yD^8TnJzETh>j{arXIec;`A2~7X*gG*C zdZQ%#gXd83Q|;IJ>q*mUJ?$>|F7Zi+g~$g z`(MNUW&ucLoYVISsb#zE-;R(UBe+9!7;=TNBh#^ z(6+GH*h_KfSXFH7rI=k<9CGeamTcT!92%tEEWYWTVr_46XjxwzT7}bO<(OAPQaVjY zqXV%?uZAjQ>9w!Fs%?BVoS>gVeY@iuU)7KKs=Daaa7NrYG;e(_Tp*)$kC(_Rh}J<~ zBd??Lq&f(hc6Crn|Dsf!acwDyi^h% zkx~C&UljfXyZ`=kq5p!gZKwNG@%4f*kRDsA{4abi45rueapEYX4@3PpKA8F;p?;_< zR^LnK0Y><36pAnw<1rD{+M1fE9oelN*`eLg-X+MV>jN7eYu{hzU!xf?E7+tNf|&vNM7;D$-hqw)5=2%j~~y_8+?Tvj4PO+5POlceMX( zzQ&F0|E=tQvPC$>Hcz7!&D!hL(%FDb*n;#{?Z5a^dS|1wYn-26foSetRycFixyW7O zD*Fu#d&qr=v1JaBhfpOx->-O|{V#kB`F_RqY`@!2`Wd@F#Rm}I^F{4M^gEi6MhCJu zfm1kxXzqKo2DSHH{^sTo&WXE#@}IJAz28^pDKd>G_CNpg^VaXzXn(7``x#~6sNay= z?_C_~K6}2=T*gFny;Yurxa!$6Le2Ua;hOZW<0fw54({Or9wB?%doaNav{!@Hu7 zr}Mb?ZW_DnqW94KXdS}t@}8yWO;gxh#q83B-dj|&jcX9!&b~k$qIpHn=0AUZNa%~s z^OYf?|Np{H!(bF*7@qn6%RT?ap2>XA#&gcE^B^0Jvi}q8|0?&Vo&R6M|3A$C-){_p zFVOuZe*e$)0ep5~=wTa;kme{9VJymjWlaD%5tA_$)6x5N&)`?)&&=%~W(t>J7Um%) zt_ln2wG-rRocHz%p2II*aIfWUl02eLKTLl9J6}J}=dUH}_`?Y@DO^u(yJ4Tf+b@PP za*p&CVF{LD1==q1|M~qLWaoSQ|A+j4Wa&9HT;>0p6VNo9ufP1o&_br>^Z&_K={Aq! zn@eXk)?gjd;!=F}4fM_((&poCqHjU&9cgcO4zg?Si=k9prG9`ivH~&sE^-g5^c}>P z%h&N2!#?2yNS=I=U&sGD+1#8)ZH z=!X(+b4JR;INoh2px!pVSWABDCofhsu zvrsNd}-*~6E=Mdm&=Jlr9B^>^GOAK(%C(Ep3d1A4yc z`xpF&?pMr55H7?pj6nW=Y15+qfl5@)VJtZj)gwPx|2y0GKHI!bTb0n3CO5M0 z58MCfN5ez*E$!6v?2DJIL-(Jln2wofyC3awWc-8d+|8%o?*1c-92#y||9ji|U$U9c z-$GBFw}$Vc^}lEBGa-#xn1kly)(epN{<*u#4}B4q;E&dCdJfrfo`Gi(tFcDn3-(uW zOf+9{1zCBpe^^PbMsln7=yms6UqHe$i*NLviCg2hb=ZJS*n(2@{>r@~E3gYaA1w@f z$nL*3-=B=;^5*+n+|z7>IgipiB!1_|tkKf`AE8HiOF7>$dgllCCPt4Vchvv9yP2!* z6{@v0H7WIOM!iecCDg;(=cM+zo;=_`Ni-pi4rFlxr_iQNZ%4;z^)9+DtA~+2t6n~- z|D6rcxS4%0S6h!3$EL=qm)QiZs8mm%kz$L`?TYsm%zCy3f=tEC^BwV9kN4@qt z$!v>v&Z6%!HK7?T_m%(0 z$~f8Tf6dE{AxLL3reZpxzWLNfa~|l?nvxEb&}Shx!@D=dId-TAP&LW;jre(p%KZXz z5h@3;gUE#It3$ruPd%_q+<5n+_N;diD}+~~{3qJu@Y}GO{^9$#MtB`IAj>!2LYATo z6%oI*?%%wj-WNU+b_wsnJ{-Uy9KkWf5J&fq*;ZuFr@a$o8Xd^u1Ww@$&fx;8_2btd zj#|_qfh6kDpx$UyXEc$`>W>!s)A>iO>XtS>e)}}GcX8C0#Qx~V{!mv$Yk+g=f@baG zCFi+9S#cS3HQGmPzV{5h@AA=>Td(sQ_v3Fy zvwcQ~dyapAN9aTUMe=WKZXns?UJfP;F$^Ox3Pl);@#tmem%nfP{~O*B;hrz*^CYKY zI%XmoGc6%!VGiaYT02oCegXaI`t3#ZZ1qdw->%=To?`tu;;2R4a^upojfu}UPEFo) z-DPL(=^t%g_t-xA(piT5{GZ>szbA`AyYcJ}vh%7jaWW&EC38!yzc04_et`A&L#)3i zTSgmOH-8{aZ+$GTy(p}Z-b$>-8l?Rw}`8h zPAOT2*mmQov91+k1b*d;d`1)p_NB ze$#KaP%e$^x7h&N!aKq}!5o6$zZ~w-yI(B|y}vMr;43eO2f~lghyMSoeT@dNL6xIg zGLFO$b}3mujh%|}uMY?V9XA+-7={rTg(9@S=N;@;wjV0vWJb7)%&7~qWa9+$f5x%B zi`n2~lxy~H3){E#G9P8Ab%)XzkBO+DJy*|2$UzNH>Z4i~RMu z+M31MY*eZfvOg%$AHy!dEdQ(c<$$mg`SpeB?z*AufC1Wu8gUD$(tIDkW_?q^;%;v3EL=6ly^ixb-9WDWZO4chufK3LO3 z?fZT0`$6qH((h>BxAViuc66*|pRHnZY-M|pS>YTRjje1uXwIc`MRQb-kuk)PMD-B! z!?(JB^UV=Es(r4~KC@Bs;}hEIh8_I4z5KcwejOX8#qTMljW zVi-oD2zh&Rn)kcdesIDS-{nXC+Poe5L_~9LCzDe#9W#;Ne?xo;y>pUsiCOeH$cEjGOR%Cu>LLaE9tAT2J5f^oA6=# zWs7iW6o+zcZpF`B|7Y}53hzNyKmR`R01lyBeRqWHsU8@Pk>&6Eb{*OKGuKHb(S$TQ z5RK_&$rCt*Ge|4rk^Oy+-dU{tP4nG2`X%HZJ08&-fi7GT*Zq6@Dv>>}ek5EcZ{im2 z;2yGHe<3^~`_O-}XNAG&{h0Ba<;o8#Z|EaZrlNhH3jH<=v4eca>K8(VK7?BJKs=-F zIjjys{YG^Vs`=J6i1WFlaq>Fj?umu`f7GMGIUitz^hTiwV=*2R(RSPX|N9@z|KDv) ze!sE#!{#lMIpKyW{Qnuo|N9yLN6SOLJ<|FGS}z)Z-|YN|?|xA`%`ZR9FX!JU>Gka| zhRMz`71J>jC76Xdn1?of0PP3$5ggV3fG%?iGWr>^bM-%j`vuZjge6#pwChX_FAOW_okQhqkUk1}`49Pg8|6(n1PSHCTvK!|14`=A-khdFzqcN~9veL&) zE{Ko%53Z2c5PN+{s2VdQT&LIWWf#1|&Y<5yy*4{3oM4+oYXG0_!&5fjnkK(@e|1>s z`75@@^Z!GL*5BO|*ZV44-L&xJ?yKIF&$rS(Vsf5OQfY>?5usj@_Q z!(bF*7)D?e%G(BnHut)Ht1?b@4pOGo<@x>D=(&ELk9wd{9nnNKJFewT&u6*kvzY%k zzh5Yl&RC4cL`=q1^uEHU_;F!~#{Rqi>P7oYnj`dXVVLf>vZ&qMs@!9ya0#+M(f-c1 zUYDLC)ARLZ&?`PCzuzrjySYAfMAu{2M`pWS-$nbJ?teZU8Ss2Ke04~8xA=wd4^v(U zhpYuUc>AN_2gM%^e}8dkIM6se>>u#4@ckRZ!}n4n!@emW4|`{PJbZV^OW`|jyc90E zkInq&1<|>+jdRFFNNbmukjv1yUptD-VtoY2A&V7$TZumzU)cU+{Jp%|_cHyy29@Im zgmq*U8JlE&8hsPCpcG}Oz%J~;J{-Uy9KkVE)(i;8&KHEEmkYuVSDCjqW_0-5Y4$tw zTO1YpKOS~o{h0BskzogR4H+K(uJL0b>9;2AF_!in-$42H*^l}*^U$#GjUnNC8;973 zQXA~|11ASri&zl8zxxH_TZ61&xBd?mKmFIR(=pMvShsDq2kV%Ez=sY;PkI*qSK06P z;n-#V{P4%&N2$l*pKRJ0>-Rfl|97E!|3miieSNg|L!$AnzVhFAH-Dp#_HJnGz8xC+ zy{~Tlb!dA0zIW?AeGKn~6Vg0|GdPC}xP&XXhU>VA>fHsQW`9A5qqeFb)MW}n!o5j8 zEC}^?3)tVk54Rk52lwy*kI;wy|C{=E^8o8V23Y?wAas&l#vC(bRycRt-sA@hLL-{y z>Z?MFx&A4n>8+?7T@bSW{QEFaT7yxD*wTVfHLV~Fqt`BXZ5Tlxh5EzR*dT#A6p3rr zpFWnHh_w1=u$>9Wc5|~pQ~**URJx7&7t2h zv7ZgX#;9*+-|sg5qr8+UFVm$t6D631IcRfiyJI`Z&IFq##kR??ambwWHn3qDFY2E_ zv)^09Kb;@YDm+gb3s8BUokLdLe=aN`mmxO#xv+vhw{VR_{{QToki@Ljy zjf*`FNz`L}^q-;L+mYVe^Zy$52=7DrH{}s+8~fCMed@nH&%e*}@ALfo`2T$&cfT(* z?0w8We;k_7jF!cH-qAkwU!VG~&zkVZ;ehli>a>r1$V2q>`o|%)`EfWx?_B&ibl@00 zhTK7E?{$vZk3B=zEv|C3>m{3_W2QY0X|e-V#eN?uU&sodK=NW=sCSKZ{p1tXXa6f6m|>F5nWb;2N%@ZMXcpo{s(UpOF8QYioCnHS&*!?mlI-Pye&L zKI}6;MgCnw+BLK~u6gOdhMUq!D^It`JCT0WG5&jx-Z@U%ctC%I+)!yxat?G&>C4Yc zs~r8WKF09m8O5BZ&vE_#TK;eMh4>qNp%&#|RnHhNPQEE$i{*2pe9{LyuIH7$P)K&a z_4vv9zvrI|!-Pj*6pB#(N#p;YEC^%iy3|~Dg9*A z>7Cl>j*KxXdI@q%y*J8QhV0U|*Nj#Mhx7l(JsGEp=JwPnH}%@Dq_Uj-sWoUos9lWdT%~uA=%8C1@jmBXffuIsWgc;4I~;b?q5>W^C? zou1!Vze29Yhy8~&!t1aBTTqHJ^nRM{L{`4Xmmr_6|Jy^)`zHtOF;0)xEXLIb2k3{8 z?|+l8d_MQuqfp#2#1KakO-Q4c9a;Xv0%L^#8ajk~$~{B!1Ww@$nzb3{$Q}PYGF%`p zAsSmwJ>*-^JFD0xj=4s^j@)5sAS1pj!TvYqS~G;7I`ql7Yu%Kn|C@h3LHj$-UUvRd zcG~*C;~()63jOWEw~+7u`@MUAn}2;@+pgX3B)fz&3!k*@(YS2Ga{l=kKK2a$xpZ4b z@~x4+%0Ito{lG!{o=EQ=9^esTjn)seTR%XL`sAbifBG+FhoRp8qP+(a;-kF?28(NU zU4`T@r1d$BAV(qUhsdx0TV;;F@L1&61qgR-HBPb7`2QsNpXMCJ^6&cc^YUjq2cq?V zu@Kv~VjK&0U-$oq1S* zMTni0e|a1j<=?%F*8eRLzYO)l$r(?cQ62e5`;X4{E;}Z_zK6b&Uhz5g>Q~t&6WD*_ zJo94DevEej(f*^W9kT}c^?%`o(4+n6{iS{BKB^rN-}_zT((LCA^zNVQQzEyZ6lJKu zE>s@xANG({_}aRO&>4i|6A}Cb!pzjE%a*Jv!AyAp!I)ugnPd0o7`m7|MwsY zzu=uxe?Fq`{Ji}I{>oehdiu0;T;;ngQ~s6xj`PMG=!22V$WxOn8ELCp^z$v;OOJ%MBm{~QgM`PaAk*AMy9_xacR`PY2+G+I+HhBkhE z`y1XrbfOCxWRXMV@*#g%|2oUgM8n;)(Ft2qy919_bwm!AzyfV zVVEf{I&a0V?Ni*PPIoOO;$|UrL%+ag{Q~pNT@ap!>@@uX1uG7Di_KjziU@87=`Hlyl-QW6+h5#|113n*5*WePjvsz zdNc9Q_A{OoLXmK^rgAJf9uqMcQQu2c2T!GUD(BJMf$8*_$cawS%y{I~Wh_?36DMqeJHKE1?#@6kc;5!N_n9b)Q&4df<7 zYeLE%8*?YOk=yTUE5&a?DaufRU6E{4p1$k(4AK8TQvd&2{r}p{I_+kHOroCu-Y`l3 z|4IG-?1Mj9U*{ZcersnxbX+z+08jVr&mPtXu-||DzQ_6Y;Q$UHTAP=;XulqM=a?sR z3y#ra$W2fuip!8)(_ELhN^O3UY>JMdr^yaf?e+WaC)b@7K7sso%S+vQ`5>!@D}USF zH`JmIiFefhjqWQ>`Og`g!v$Qz6h<9|{6zd(2qqV|6YxeWik_P<(PP@|5CPg76K zRWB@6FCa;;XTRsak2pwu$^Tp-ot0RFXm81imtJCjy8n0G|NHL$Zuft``@h}&M=s<3 z4~l#Mw!h<=yFb|fpG@C)vj6`&X>GtJG^?++kg2Qs&)5T{^fFYSLtdh_%w5{=%nbL< zea#`Z+xi>7??KfLwhtMN$sZsOA-Udrgu1oft2d(Z$=!UVX^rzc;twH!p>36#l|T!U^&e&fpxLzDF16(VFOr&zX<#lb6C3 z;cK{#o4AELs5VAiGr;*#CAx_;K*53>G#hV}Qj+dgG ze}CTkdwcJ9J+%M+ZTs&ZwEsRD##w(q$@+U~HCu1rLQgqY+PPZICurXBawwF}FpR(` zq~CE2is(_A9o;X7vGnoCotCz^jJPi6nJBLE_{(83ITf+9FNf*mOjOb1Rq~a1Ig|*` zLXuv;TRv;#({I(r?`sf$X#5{_^aPT{o||X=;rYMiU+Ev_NMjy$>R+jN*Zz0s*?*Un zv2OM?{$&2|eC2i_e?6y1z1O~nqaO3xd}0I1-6i-sCA9;^IHXW zVGs7<01n{@jvz6~c?mp_<+>U5mO2=|zKzKC3cWmth|{_je1HP&Dqp3eW>K(C!7Z~9g?(YK&}IKK;tG5oLz`d;|H zHGJRrV*c+c{_j@4C7E9*_l7pXe@an?3gqoqc4M^v?JnWG|H}vNc=&<;&+p9~{*eFM z&HqOenjiCj`MfEt%6Q&0UwyJ{-UyM8Bo(4-7}>o#Uj93U=5|eS|sv64^1% zL3X+JV~&aX5MpE;v7@e?Y(kZOgs1xoq=h?>T*a?nE1&D-lg#`7#g2PN{!u5b{P&OO z^~n0q37ofKCX}8WAs<_9emuLoc=cgqtNp+dw-DK))$N=$D{Ytj{DWP zFp(bZ#hqCAq_3}jmG`lq@;JylyVUs>vnk#m5+*xtDyAd*yCGpFS%O)}e@B45kbOg+ zAKGr~kLOoB-ES=V7HaAuzfk^K$p-#%zCT{LSvcC?YmRiH?{Lf`%fDn#eR2_&;OKk( z!w-k+r!n8)nEdwYLtQ3*1y*7;)?mjg<3qF;!8&^Cp!3l;&?|myzs2wSuHlZTkCOc$ zPnqTVHOZXt7RQvLEIJmuum|6{`rksd|4;UpFNW@K4-EUn^=$LJ*g4zTE@e0%?%>*& z!w*ir9RA_-UxY)OM}@;1M}>D!{zdr5hwNwX(cU*lgzq-~xA48Se;)Ro{qwN@{GWyI zzd6hvmBYe;vws%;cJZ%5+3UXwo1}FJJ9m!`6?;D(b{O9)-#;?^-O!hOqwl3~#Bay& zogKr&o`c4|tR?&Q{?Xxk3qKb2&Hh-3`7MsWf5RS_8=nvReWdUE4~HB3GM<&;@!KxxhUy8QRW_}|>Sx_m zzt4oa(XWR337;`W^BHx^E5^TGQI0-k{Oc9{yVlg5|5P|7%`-TM3%GC^5Uucd1 zI?=Vt+<>*l&B@$o^B+d*AF=+R3C(CZsek0G{t>cuvpEqLUkrWH>Hifr6$T@H*D-!C zq<0R`hoO&U7<~kC`d+gA^j!?shk;SzD)+t^ipa5urCto<$%&}iE?vi)egr_1o zTfXG6ZlQdldb|A}5l1cRkU+B0w6VrcYq6D)r2W9Q-sQ-KwaT0KylJ-fPdn7&*wkq4-*9vAh8Uxk-U2lD8xj_gDRLQEuFgqY zpP2o0L0BQ&seO*-;btB>2m3ickKnGh&u^=-29>jigmq*U8Jj;OY@k>CQhWWQf)Kx< zJ~^q)yQ;o%ZTau}xZY~lR5ML|@YiUoo zwkr8&<9BV;{csBBz zS^T6fs+=|;ED?VSmv9BKz1EbD8xXG1YctO8TQJw@H&Or4`0IV+ultR^Vw-wqJIeUk z&AaVo?wDxqT=Wg0JM?>afR6e6|Ji)@<$U+o`S8f$k>C1oGb#gbC<7RbLJULRKG8859S||(RTn^-qUx0^iuN=^-r|%^V`vZPDFDLGOi(-dk~FJ zR36s{;yj~Jgt3U}f2cZd{~vnoAm?`eiS)^+*XAaXK%F-F!~362^?UXQe0EHy&qRea z@jHKF{yu#c=3pKcU=gad*EMUMLw^3f{)Z)wS%wu@iPczxb=ZKm#q7`J>`$`uH2?Q9 z{})+$?l}Lqk^j-o|9FT0ftL0929Vy!{#+~082u*cY(Xi?kkq>UMkrt$tYbih9-K&SKQw(sVnFm$f8Go-3hY$NBVEb-Y+W) z7Uf!eFux&6C(DpDzx> z=%Y}Cw73+;(mPKpf6D!M`b6Y}v%(p&>%4x&X#bxdo7-dkU$p04(L-|7oI?;s;vdE#~to&b; ze?)s7v>cWH8u@oGT2bkKr=`<@EKVRcsKow@CE*mkcBpF;K0`l;`n9gjbtcxk)_x`7 zg1Ae#f@`>ro4AELc-CJ%AcT9uS$1wY9?|>oN8?}0T-`L~PZ>;-^}_xCPwzYiqY%R| z0;5obw)fcE=y->Hi!Qcr23dLz4U6@^Emu~_=J$-fKV*+1O>gZMw_G`qPQ|CR)BkKg zetLfYuFP{`BE3^u9ed3&rcXum?XWB|=-O`I|M0O;InrDMzs*EU+9l*HRMF!X^($Vr z|BvuIL~~~9QFl^4$(}EjgzjIwW{=$x_52H`?po!Trkh;H}dhW`A;7;SIsdSunAjGiZWDS7xtj-o#N1rj)V3v*lX{C zc6%5gd)^)fXc*xA8RGpRn@4+p$dquJY;|1qVefCkJ5uHS&3Grsgm99q-)jGWx`O;3 z!uy=-01n{@jv?RgF26SO~w#+0MIW%J|z!mAwDd?MGC z_y1jIRPHNps5f6{lOpCE*BpBtRrj^4m*oo;Ur-Nx-ME^zwtl&MZk11ZwKVek55D;! z`(m^F3)kbObZ+4e?%@F*p%4APD*vmL|Fz0L*|}BuCo{rXGC%$_lK+pU?edQnq>x4{ zqHhNdl+IujVi?j79dp;7KlIM4@{dvUBINk`S!5RT|1nlv^bMu)7)BKn3> zGzkhRHJB`{&e_H-a{AU)Ty$|M*3$O@Fund1x{&y&ohn3Ck z$|!QF|G4~bTdV(}O8H5szZY9SE6r4+`T;BH<;EdalO59S+%Nsz(w`!IyBH`=$KNJp#r@P`X+v2P7i$# zdfw8v!58Unvku38Z2QC=z#$yLF~m@A9YOZ5hlG6p*fjYmHbyHxi6-Q~|MzA60Q4+Q z;1tf_94_DzuHYK3qnE#5UN^-0AMc@X5BuO2c?b9K0MVT9M`R!RuTobd+FLl%;}B5WI}oFN)p z7%SYi)c8K~sfQZCrT^E zUmj0RMrFd7KN)?CAo?ajeS%MpIj(2A_*!xC$bW9Pf4y*i|LYgj{q$&W!PL?h!z_B| zM%RWp^m)kff3wIqrfU`dcfI^0j#||1HMams)MJ7FEW#2j!wRg#YOFz<`_#UhKSp*Q z=Ks?(QJzx#|ILyAC;wYyMhJ<6HJq+{j+i=fgwEkGX@4-Hl8$-&X;&Z+W^DA==#2vvg#1Kak)$j0s_bUJU zmH&2j`9*g5N&YYD@AF$7lST)!5pUU7lRSkpXj6{bm7@;izrR6dPAi{xl}|M6=Ks#t z{?FIelP$}&|72RYm5k=EoRiK4T*4KkXS1K#1=r}E+oe6%H*V=SkrU1ybdJ4GY>ivu zDwkSwNZv!NpYc-i5vtS;@geec^109_+<&z^UsVSP*O7^f&-uQN{{7p=L6n!?Hv1Pi zrklT3{sUtgKew;3aQ6LzFpTVdK7Ue`XLOWDlK_iV`AL;e4Ln~s?%!7R+dJoNt3b$q=bbpPCYDBSaseJ;r*ScVm- zoT3keEdQi=z~malCRqPFrhiyRf4a_l1APnf>wZof2Rd#Xh^&53`;X`wueGS7ClJjs zi}var=s#s=pUf4A<_v5SE=9%1pUe+vJFoxyvNoz)`;V@@+NQ(WsQu=@y<`5{T{Qd$%`0wI6(IqZ}r~CVsxt0T-Z3Vdtd$2Ex z!y#1PHGhFWALr-SUN!E2`a|m%k2>cB;|z{Fh8W^VFW3H#VFS=RXUiL!qIl#o@`ene zzWxqzm21V3ClDLSo*~bmYSojzgW7)fDHOhdaa?Z&@%8~@&6{QDsP{~dLEMjfBx|FcOOiuwQD=Kr7> zEl9m*{F{H-ip~Mb*JZxGb$~lRt;`NlzQjG9-xGZ^=azHbLAl@Wk?9-Cr@G+*J-=_} z>(ZDki!to52~rL2g6ZoA6;+UbtkS`1Bv>( zY_-So@#&Ys2*-{>5yoOXCSo$$x9WpLwD)!gqP@4fkfTT6I&0kT9=IpX^W6h_YPoxW zR$=>ng{jh*j+rRIEX=_?EWn5Tr$xfcumU@O=bpVanq5#3vOj&v_j?A0mEu=p4c1`; zHem}&QHBcaLbMO!9&#TJ;7}B2EaC`x46!II%{UpY4@{EL{(ntTSiPQ(!e1R2whjHD zJ)R(skKF_DPv@6p=_gSB_0irB_0K`~<{kHFp?iZIDyI~MC5}0TGdPFXQv3feED9Is z(KqSibFC9MCp?;SQGd87L~H01sGD6BE;*)o(97Wpc@1g$b@C=s^o{}M1q^vP+!DTn ztZ+_z*KlL_;u~(5A8_0J0J7P9fEF_Kp7{YI%@07^1oI2b8|WyO4%s!-|3;ewAdQ@~ z?n(0jZhH3~k$vd@HFgjNquRWL8sz5zWd6y2Fr$F()l9#f_C zGUUG-x`Q2YkR5>vag~nWMeadNdi%%&sG4REv9Tp z{Jnh@zCU+V*gvu;9CB^{pbm@sJ&7iCe_~j8_pbK+6C=Vuj(Is8F@NOn!M_OaPWlVq zg)@$p`SWma#-E2DH2!(``*;2<9H{xTupi&wI5~W8#$Q_h_LpIA<6nmFPWw;cJBjh3 z=WkvP7mN#Zf7zb=%2?Vtw%uh{AX}~N63*fTqUYcH>6gPP`WbZps3`OpROorj_Z7O# z~v&!Gx^`WufTcK&tTcLT(*3i=LZ$fJL--PMXiD8)kWIx5ud4d1@ zZQoZE9)%)|#dsW5pIRFlDz}F4qv#uU!>yUV|3dhue$R*MG4`80`Mf^#Z-&}I-_&RI z&5#(qF(fyCLqE(nLiG*y$6fXZYA3Njrm;VY*&k%R@I>iP##BtlOq5_2=Ai8{`(w9u zcnA9ITswNbm_xMs3tA=_rT{4bXO`Qq*t*rQW<#339( zdaE^hI7aVmw6<@l^_KKFatHNo?YGA7sI`7Io|CxBR6%GW(}>M*JefrmJwDoVy+0tF z5I%+EdC&E#=X~39j-GwHF*tTW?P+C{OsEs;@O1x=GyZc97jOwza1GaS6Z!f7eEg{I zF~9%E;U|3%c|Sp1<6>iwi24?q5cNH_c>i19COHgIA4JsmP>T`5qmciuz*c!Y z&OX96cH4ILTN&B=Gkp#0f}YV;6`|6|cz-Y<+NdwyYD zikyt{9}fx5r}-1~l=dT912COF6D8=-mUJex|Lxj;Wa+so{we>zaW?<-b^a+@=&AYq z)AjsQv~B0#q63}iLIzpn@W4HqCH*;=hYH~ZKF5W zckpc|xd+2Te*Wt+|A~pK+Hd|ny|$bGYYzTO`fAj_=fC0-_oan3;+l7u|Nk}h(@UNy zeVw=sNbTkO?>7J6T>nkNTacZiUjk3(|5x*)^ZOsnb$zp?xm?<>I|u4F^1=P53>Dag zJ=li>IE1#N?hiVKxW5D3-_h=GKlc~;_1{C>-_!2zW%u`@``hULB3J&ov^qHi-M`nguWb%@L21WqAuk4*DEsrSzapTh-|duOt! z_zXMbcOhI6cMa9Y+2H4uJ=C)6^WVRkrjJays3)&G<|c084({Or9-$9y-tBhfFDiST z=-T_lH_v~6litA3Z(Pj(U#_f?`Te=>J}G;xWM`xQ)+k5+w|tcIKZf%^grhw|yU1p? z!$7|cMj?hFy68N$Y}omJaPdRVF{L@;w$VH`}eP)M|**+BvV_J zzm3*b(br%dI@sEs=z8e=IPX~?hYfzKR1P-U?17@u|hkzu|wW^AsmRpUmP3`kw=hKhxC4CNH|81q5Ku$I`dOzXdjU8 zpRUn9Xgi;MpZ=)*bKcXwpyRam<)rrIs`ll$_650O-`$hehyByIa1u?povmHOlQ0i@G`3VnppH^s8_6F7x4Xj^N1 ze*hb+pZ)=DbC-5GYMZkYjPIjiyZ!|0i+PrrwY)qWJ;M)A;7nxqwT!g1rAf zj?G1X>i=J--$Z0nWZk1o;}ie?mbl9G<~oq~5c&TP$VaGp;}8CSpK$-LyEl*d|J@(> z|M%JdaE6d$<8o}AJv!dR4V(}(?%?`zO2e)pE~IAhy&%8T$+Ovg-=U>4>e zntw8nT!4Mvy+veN`HSYCETMO9RlYa+{xy9Cax*-`Y0CFv3X#8kO1S4U&xJGOIb6UcG{0$`EqM*saTDn`_*mj^(evvs z#+WxozlUe*e?0f7|NViuN9aRT5Ab;Ot63KiR2w`+*s_I}*(esp_3-s2yt zJL)mm@!kA{@`ATR&nMXrKOYzhqqxr$gkfaw_sz}mE=1o*i{`3~5H|`%(Qn2T+IE+Q z_Wh-7oYK%)RjN;`RQ*({ekxTzm4=2@rJ-?cX=vJ98k*OahL)wJ`qoQB+HtL^(ons) zG}OFNY7R$fs6`zTNTMENo#%kOjgO!Y#6(QSR8&i|265D)ZtvUP|F`wOzpelM?J(Uj zGf{$Bn1gv(fJJEA@izPaZTA1$`rqHy|NgfAuebGoy&ZDP-}e5$t^fUP{qJwJW^7qNH)%2Q$j-%IZ74Lp0-0$Q=&-bo+ zUzytZiZ;Q2%TR$`*n@reqy1<234b#GXFvb{F#n%yp3nbZ?A=_heIld%XPU2W4F{xi z2uE-X>DyaF3Nd=;QvCx9*$4C_^7~)zW&a-3Kkz!6T3qG9tszZzAXc+AWXTh#qQ{4` zD@ObNt?(HnFS6~gvNHy-^~rPMF5nWb;Mx4l86jK~zK&=QqFdx0+`|JrLLZ_vNB!67 zkJCR@@qR%VNbmiYdCX)XqW<%qSIkMKcYn#6C~_2vFc#JNK5F#W$E#dt#&wYi;b?7g z)XqlV-yQF`iI|M3n2wn!LE8}HzeDxA;o17?i~e_2`%X5f2O80|*}Y!x{x5a^k)GlH zPZKxEo;}i;gLznhr}Kvv(IcC^1Iy?u@Nd`efB5^?(VpNd9n<^%+o8A1`%CZssCwb& z_?J@t#M@y_6!%x9VIA4?$cYaDey~BPxj;kU! zky{X>N8j%$rI(=syYTe;J$vY#8MXuV(GMUR6Uc7Yev)1LwSN=zeKZA4)dxE@x zXdlA-x)Ak$e*do*ychIn-B`5tE`w+JUn>86^Ex&|G%q2UQvPd<{}=VA zTeyS&aQ@L{@841HADYpE{2t?by??0WkKdEd13W?>Vtn%|V?h1?ANgOUY$(42>4Q

KtTCz>4ByvqIG>i&~yv@W*)zqwf5U*z+*M16oCc{2ZR5dTg6cf>i4A&w-Pa7TNP zCVR==uPUEkasTY^SE)|!5MQ;K4M(0pY`uDyJcDR0MI4ETY>2zwt@qdvWXx~VaSj)7 z30H6p*HOKgf4r0rdX#^h;d3VVp8NTr5BbOU`44;fw`f8$p82ZaU8`+`&Dhr}2>o=v$z7ZuDO~qW2-USlz?AeQT`*gB9cR0H|oa72db;&PoCmrZM*oZ;*ZN8qP+&T-G0)K zTjpMkkY;}UeT((9^dhvK_WTj;vC?_|1OLuF&K~sq_j~?3+#k>W+5T^ydrCO}{2kXk zsW6O{&Uj43WTao0e|elr?^FkL2v4WaL{2!X3}?u$M)QirRlaHcI5`Kg0fk{6xd2u2 z85gdVheg6mki6m93)kIeN0NV7f6tHBpH+95ZA~UopJKQB&kC%>YOKLJY``Y8v3dT# z_IGGF%m1(9|MTaY(Lzrply5eEEAsZbF|25hsV&m${jIV|mLaP@r{`nJp!U3bn{S5Uq{>_l|GQLCoa9jQGFe*dp2YUSt^NRg;4%yES2<3k@AY7nd!WCS@b=<@) z+`)(E&)gI4{VPnqUWj2Bf!?Exd)dI1QSh1{E}*f*LfSf(F!} z!Y$mwE!;rBW1I<7XrX}?8kmCXGxo91@y~N?AKS+T6*NHw4QN0E8c>Y~RCo(6sGtif z^nTVM@TQ&j-udIJ=h^G*z0O{H@3o%wte@kNo9r7Ko(L1@ouhp}^hxw7=;`GfK2(OH z3-|ojG~qK)hDyh?$k~{Knw{)3`<6P$K3!#>9)JpZ!*i?A5Uwbs8`XkR&coq9l=o4A}__^EG_ zUCwkr?!&Jqx6W7Ze#7_UJ8WU^w`%~&ZubA{jfZeeWq48f=ow7(|Kt4sO!Wh@fY_E_ zvs?O*LLKU*C5;R_-hfr&Sc7%gfFonr%&Cd+Q*GOahn9q=^FP(8pYH#n9C*^c(^&2K zs0TEY#r28l#rYAQX`KHV*AD*v!Hr>)xVNAjaSxyB$;x(mobwRpFz%r5Lc<&GO<3Hc zG0tH;^zO&QKOP+z-v8jaa1aL$4-P-+eqP^|H5@wzhkb|Fh9AAQHoW)R`mncYZP;^o zy)_)yhaX(s7ol(UCP?;uNV@(*Rd|_q?b?M6#fqT{C)tH zYd3^<_HPIk&ut3ZS8oUhw7>j>T{+~pBS@mxJk|H#(7*QaQDM(>FN7bi7!*>%`@cBC z+^}cDkMQ2+L1FLIKMS=xM}(T~BSKx`6UyolA-#53sLu`y4IPt0Ch@s&c(c6b+N#5) z;Yh81*``t9XLnx;|8(^)Ly}(cOFsHnUkuI8$svydj^PBF%nNJ0Fe$X?V{E=YA!P54 z53PyMgoU^f5{%e^8a=1FOK6J|H|LI80OJ)o_$t*W&u6UIm}}beKC4Ee47^@w?TH@ z_5Hm(I-Ga^v;DRd%aPnaI#ll-9ahro%m++i6@3jFHtPS@ubLiO8tSK-Yc8y1^-Ey` zxe2)yFNG~+IkLj@`d2$q>39bU)9;$BA0IV`_0J=PI@BYL3>vV@HG8lR2XF{S zkVFdYY2yR3{zu<_C)wq=d!g|G^Zn0U{hQ)wLNi*>T5Wtmt?>o={@eEI|9{!K`Rb-| zOrwt_WMbT7d}qR&>2)hgLkh+Df4^rtS4;mg>0dAX$T_EFe`&~*1>}?i$H)`NHVJ=S zxqy?7PopqXy+9e#HQhTkDQTh5}Xj}bas9EOyA%!~BU-15r`M~=-`ck;% z9`E2D9^erM{Gaj)24M(>A^P}L^GAmf^y-bHLvrtcFp6G^v3NQ^|2F&kP?>O-{q^ln z@LTWi*f+B8tweDzj&1AZm(9lZ2rI5X@UxOIjXncqn1$Ks|DL|b*VX@#TEf07$LcqV zADM3PXVpD^Ixx&}?mR3&p{pb;A{S#R+9!Jc-ecZ-?IdHHUiUfA|B~l_%JV;^{(nyW z|B^mPvej|4&AHK@KiONM|NmPrg+5~kdcSUce{rtFDy+deY``XL!Sz`0|DF95zUsSm z+_G0WMee{Z>_M)>e>9ngMSrsXa&dn~Z5chg-JA42S$}!FbE@VK42Q^lNXCBOC{ev$ zyZui6xB7cijx!O*4eW6}ndbj*eb?APl<#N%+0SO@Jn28k(eo%oIhXxaC-~F(e>JbN zzjGhk`2V#2z=~(WG1s2JNu0)6oX166M*AE5f8^g3Ke{%u?Laa^NvC=$)^zA6P(Ngr0-y$mm8Fw!dW#6I-*`Z%dKftUaGxiR$(8RYJbPD#vS( zxgcMm9%;l6dK=>ShIM`~Xb;##zEh)q-}QXhLN7-pc3>CwU>^?P5RM>;6#Bm5*^$xy zHj}X}Fh}N5z%iV_Nt{N_oAN(W8{H@B>1jO4|AqSJ!&&E@$3;ACpRCnB>9~Eq=l_Q1 zzfAr|*GTyvPyPS@UH)Gy|Dz3kzli;H`ue}C?Y_%?)8ef7ls1QUe*Kr?{O1?K4Pm!% z2XXArJ@P@+zhjL|a=?1!DdPC!LFAM1$3y7DFaq%%b}sSG$Zqw40vX32Z*YC}X7i=j z4-VJpqnulcv8Y;YKKNeq!4H}bPBy%3e7^7rNI9-0IEf@zq6oUrWOA)$=k zd6@rxfqzP$jh<@$J3qU-iT_U44CjB3&W_<{BJVjxPdJChac8|=D2IUHbyM=-nth)X#s{{DUR@Z}BvXFYYziD*o&k{qOX)k^0}OUkHoEQ~6oz z>V4Jvne?Qv>g)?)IlZpe9Qyq)gq8GFXt-(~!hLh+Z<|9;t`XL9-Cl*{2IO|zqn+G> z>{a(Aot@HI?zj?9<^^``cb}+nAGPkSxc|a*>38qxh0^c7*oOl+gd<3z z{jTp%{?4NlU5IkUVGoeSSoqUiZ-N zcYTNSV`vyBZyfA`3nBme7^Vld~4%jawGMXIlsdEozrA-|1E8LUBbIj zxS+2eHQTNKwO1b>>a-2i>+g%>@-t`ndMNt8UvTbE|64dKp7Xef%eaE}wZkoj3Ij zV=;XxdOVkc^0r$$Ko?oGKm5z$zN_8rIzP3zzYf_j^4T!WHOsLQtFQ*^umPLU&d=^( zZ}WrM-+}D!X!e&Zpl7P~cQ&>O&3y6}=eF_}v&O&W(1xms`VYiYj!Nu6Qdr#AZWlf7 zIh4X4`aU%5<^R|6|Fr|ulOHV~4*2~Lj-cXezOld2A4gB2^4IL}SAAFXX5^4Z0evMU z;TYN5Ixw6dPvSJr;yfv2Aa9}NrnPykmDr{Q=GLddqS4U%fO-#yz6i$jYC7Da3WxOT|@XOy5|tI&Hk4 zF?!?a6Hxf9Ir`cd>JY!#^l0BO*)W)Gebx8?=S)GYb50{$4jVs1mZ9*PygOIETrb~~ zd2Mfv?DeuE1E9J-(<^8 z`TMT?{ZRfU+Z?wmV~XRGoZG2TrO8Q*_^JcP=xd+vWdGaR8O zkwQgXO#+YI9EY9O1a=X1_w*4}_v)i|U zEA(sVSuYMe**~(TduFILpExzLEX29y^^?m&dVX2Rpkc0g_pbTq_?{b%Z{ZH^q20Xp zj!R`BPj48V;zr2!M*iZ3!rPY`&{u#m>_HtreGTSzG0mOvNy~OWn};FXNFniY|OztEWjcx zMwRFGwEbYMdE&y7W8DAC=84nmq%|d-6`%3k(QwrLdtT{Y_kX=C)JSVBQmDHsOnCe) zBi;rS`~NFKSS7ACSceVRge_>_?D;1=e{`Y?-PQ{zOfECdu}t~v`LFlb*=n~9NG}){~Z#~5hM}!0m?1pe@r#L&$rO2PM%-u z{`{6foKqb4)_AghO|!5#|1U@8k(4(J%>zxQyaeHlgQjwZlK%yjPxTa}Yf%HPwjKZ}BWe=d@j(SBX|i@Z8l=XPcKL3wMNca5H``q%6HuZ4V( zCCXp&PuG9!|D-+M#8aIA@b3e|HG0L@*%z|^7iHlV*-PFb`#x`t0%gd(7^dFvfXr!&4KR`3JJAt<&(EOTF%+}AnxdBJ@u_lNmy1gebhKJWTb^iqsP(zxzw zwqQIx?!}%OY@9KD5*o5>6MGW(YOg1!2%Cl(D8onlH)c65d`q7$S?vGTpEHL(4-2pe zH66zPA*KFMr|wYy9NVtmkeO`!-^CK>ri+7x=$t)M@Uk z+n}&t{SS>x)IY|kf1qWi`iD5P$R*T2(6Zeg2jba;EhtA$8si-ON_yv2W$%9N@AO^h zxoP|Yy006*aM!p7Vf%0Zhj0W*q|pC^l8_;L|DF9M`+lRny0%K)}nkrOP)tuSMnlx8CiO9 zT_E4y701_5@N9ZeoQF!*$Ro8epP&x)S>M0>T3jD1t^Mma&xaeXyM;TrhX;6s0h>I3 zeQzDe>v!wK)A~I<_Qf@xlm9Qt|76RX?DNJC{R4X2GUMjOF$hCYbyr=Qte&j@4I}8I zP>MJPp>DYQQ|CxGeQ5v5C*!V)^ULDRGRX1&vf06*ncjJyA9&jy zZ}dERM)IxoD|b7m3k6|S7p&n&oS%e@NMgzmqrb} zb{YQ{b*RVF{q5)%U2_>%a1A$b3wLl2?Q^X^FyA(`$ zmg_Hs2jVGgO@v3}KID3xBYeRBE&T)e@EAmY20iowy0u$$p}7Cq?PH)Xvhuga z*Z-@&ALkCk2vmGqA1zsm-e2kOC;PtQxsz3Iz7QsmlQ0F-kX)wyLHG=M-FD9gW%OBS zpvSct;uws2%of%%S)V^S4>{l50&)?u>V|PnY}^B8vE!vE(0kIp(xdw53d~yCfd$P`P977(*{=_jr^~#Dg{_*hh;n2oG;ozHt!uw}F9)8j^ zEgV=sE$lz@h4ACvDd9(}r-Xegrs(T?CA_z+Ec|e7S=cl6e}vs@XM`VYoDsI}RURHx z9v;^J??3wgN`3h|FN7`hz8CF#=UJB1dw=Eo{Tt<_{B*?gK746N_-Ti>oLX%Jey{j> zNyr_h|JHc?|0q$eTf;+m_gZC=vZjZhQlRe=mQ!}cwt~E}sQi-ohl&4TZUmq8=gRz{ zuY`9-zGD92{|MWc%m_QwoBmoAIYme(gOT4I!RsZSk*!S}1 zq5jODYh(S3kQw;r;qYx^6r5M}!L#AWQSB0mPlcbYcsBggVg0A~|I+v~4s*j$;KTNy@#?_P zbkqDhvgM)u-{@JsW$v8)-@5JpM!zhcE2uEX@EV!Cul{j+V7NiAbAPEZ)>Vq@|7r_w z80p@GrBOe5V7TR+mSuc!@*Z+ytcgoLLY589Ydh%N`D_^Qmi&tXy=Sd<3hfBp^n!cs z!IS&W(>u|Q4m{bXu648fUN0RRr2~z_rAt_5iRTob+iUKLo~j-c>d2bzL7_H1C=BwS zLof^@Fbbs@i}5J#ub|$OnyCKsy86#t^&jsi)5QPB1m{e`6imYmlwlTTqkW`x7i=`pfSz=->Lqgp3`#B_(yuyc{z25HuZ?A>XJVi|2RjS$*U!y`glp0 zN3VNbn%>l|L|=@CtTYQtqh7yJEK{zahJCLURveEvU;U%rI=ZgkfK7<&-)|wyQHd`3IOg$~x8t5)f7%|{x?6n% zPsSTH(woqX7VHqmF6_ZR96($*@DSO@CKSH;VmLxiqW|C2#mEerkwewH%8-eV>vXYi zATKPr)BFV#=ye071;^+o(Lm4ON&8>rr#-)4j1H&$b{6Mx5tq?#-tQH%*Ls83$iDA9 z8*Y%da0mC$qHK9UKEi;_zH#K5)G<2rInm>ol-LJ7ggy*U*Z+Bq4M5d=`^CF-#vKdjG6d;jl8hDpwuf@zq6%Ad=( z-eDPi7G|T@nhtZwzF+D;PV4_ZgZTa7XFweOQun3E> z6w9#^?Pv7=U()}7Q~&=}{r|oC|4~2>8qL>lny)V(Eoi;1{~tNDp+5C z12XM6)8YTKrD2u-Sc7%gfKAwfa#W(-H63aF{MDuUH$78zyKb_$zKc3x;~D>tX0)JH zoY`*U2FNzowG1w0V|;_ygMG*e%i;jNbGW#1h<*e;-b-O^sW#Tq(1mi(zv^}GVWIb+ z4L(UPystmexz+U4eCfhFZ>gJn)x4sq(&gFK$2AO4Bk$F+=cyIy*PGe(H)H+UH;M*S zXzR>~s~I`uQNS^rz@O%~u5;(lP#3NUtKzR;BX1zN-daH99aLBUuX5y`;|F-suTcN0 zbfV^h^pEkpP=|V?kwF6o$CMcU>>oa)z$6&tCxF5p@(d| z%KyK~|G&@wzs>)@Aa5ckf3{_{{eQ|j6XNOn>`P%Rxve_5@esz-%gJK@hrNC#IG%(l zn1=pul!h5(8D?SoZ-1AVP0m5z-|7<}V>z~f?AKO;eFp_N64+xy^U{MKHR-KF6RHQ zD`)Auum}6l|9nX}Kvs$W5P1Yiq@whH)xtCMxF%uB{WQ~aXc#RV>2ca0Cb~Caaok~n zJQmN{{A@Tuo@7w$b`&hSEoE_rElktD& zU2_qaaRt{o1AXKRPGXFP+BZugO8fB$#+{}TJ_Sv5PaMY>uM=LO`nDYW^&s=?ND z6Xz}5L0ot0$@ssRkL#8__WvtCt{l}~5Z5G(dl;nI@3t4Nbi>a(SQ-I8HG}e#du7>Buv2{uYaws zyL)AlbVaV$1p228)tjq~wx<7e+ zf!`KkF)EB5D7HU6=lh$kEmqiatil=;e)W7?)B$A9lv{`)4H=F0zM3)|32 zW*x^h`r01K|K|7a6IUNwe}FuMBS<2JiW+_IWHWLoH1c)HST`t;$BIbf^u5`!=V!!QD)P>TLW|E|r^ zxmjZX@@KRQpjeLndn}i=3#`yi!1wRoX#GF^`L#Fo@$1X4Lp@(QjSLzvM?CYe0E@5~ zOR*d)(asO;==F@yiLR@j&2`U6-+mAOym7wq2WUndC(!zk&+44qP2&&9Ds2?2#IpwL zumQ;?^Z)s_o9K1oNMQ@T91UaK+jRFhQ~I?7R0@mhKI|ZOA$Q*zmE=BTkNPfa<$vYF z0mp|>T=!wG{GaxnR?Giat#Z!+jpBCw@s*OW+%GN8DDZq8$8(~Up2nW{`6#YuG~x_ayD{ZX0dwnf^12!Q!m;Ha;xBzDZqG9)k{r|OmW&H*p?f>85_g&b7xPL?e`{)Ny?0@*4IaTx{NFs%rgCC}^ zTKc4=URsOu8_r4JDd}`hGjhnIfMYm;lW13_cf@>#PQ*P`yXD8H{&##nJ3sWlS1A8C zdpBepqtQmTOw@)ip0hZQi^$DaM;K@Q0eYwBkZ0$w(66EAu(&(?M^m)n>JGxH)Cq2p zcaR)z{Q>d;it7)|l&-{^1GV})2BaN*c)+*o&muruUeL0nX{rg00 zFDQIL9&tZ4E7;%l?C)mzi>zNQe`mGtck8Dkhd5^#Mqm_5F&5)70qyfY?0?jk(21_0 z^7&}-50~$sQ~%(De00B`;(kDD|5yLxBymhZEHkE&*_$5=GsrUJW~z^nv(Y(HS(Q+> zJ*SK#d%Bfb7p{{Q7aZ!&XH5->Y4| zSO&bO55VthunwD0xz+Rf^~1y#dU-s?4)m)V_Ul)wnyO9H@gD5M0VMBg=No6<6TR+? zXRA(gB!(l_nKF*kM<3U9HV8{0gJ$&q>|r8D=21Z3cONE>k-cC3ec}Xp5~pz%=h1TY zQR1WR9~Xt?gk|qON?fLQF8f0wf8kN$3ccb#ypuQokSGZ2UjK(g*Tz32Y92mH)Q)?U zNKJf{s3YrVK1!s?jN=Bf@HuUQt}Twg*A{StehYVS5AECjkmx{u{~r>a=z3S&S^v@W zheS{M4~fPlj}lE+|Bz_D`G-VH?;jGa^ejDh;SY&6R8{{W@jyI}FyQZ$-$)9p7RMlZ z-5B>K-XZj1Xn5JZ2}@6PuhSnTMhI)U{wOhuEJbebqr_NpJhH;`O^*_t9gh+d98W@V zUl?UT*X~D&Zq$sHerZifYn`;$lj-^X0}YtsnrWDUGR(qk%)vagUr_!ae^dF>D_(S? zfF3kXmVWoyoOqOIL90BURR-k9w(D{H-^0WL@f5zQpOsvSHG>_~ZywDbq%g)#vQDMvg_2KAxO_ zI{7_y+k0!0?=PskWW7i9{=0lF{=UC13H^UxV!svR`J?~DPnhJoIBsPMISn&VhTg9_ z_dl$8`kfLs-##S5iu1UCrQS`CbL+GG(>e4`_C5ccIezp7=wVk2=uT)q5dL(2;a+x% z4X$S|)5qCeG~BiCy=xX@DVAd;R$&d+p?y97d$a!_J5Q-!oMX4xvjW-UxRLE`5_j`K z{x?}%|Bl^^Hnt5_qo38E^P9v5ac#mDBwsWBf2uhR^t$xrhO(V?+=bZB{ZWN3cX+?vE^LhId;VVD2fgMBEkmB;jL z#j$2b^gC1y91!B3bcegmK|HQ*@0==r+!1!Y+M0_$yEHib)0yW&QomGf;*;h$e>~J3 z|71v=8m?|LG}P-WYDl~oGV6yLhtQw+yO;YDhs1FNmA!Wp?+m?{sF;2uv3=)_#Ey;k z693S9I}z8POZk1*{fCL~D^tEV^G;&tvO9@AuRlus@Zcjh{rOP&E$dD-JxKiM{=o3w z8xInDabVdq;U`DEPuDghhdkbobtJMT9oGQV$8bvj(9;|bYq}HqpU^W?Kh0YGG-yT(TIcJ>K@M$mg}qi1u88Lv zZr~Pjn@d7g_#Jv@wnW=bNw`OUfS$c2+8;~!PU=W_BrNV-H(k?eXy*+>=~SG{JfPH7n7 zoH>|>HpB#PM2CQ-II&8ouJe_~hZA~1|>AC0f`ztQ{#{~N9JEOO3kyZb_@TJ7G&6W7MuLGD6Q-+Q(2 zJ@mSr+6GXpe|$^bnVvx!_1Nc}7WaREJcKy@{|K2xwpY7AR=T@mA3hs{0^1+wG?37kYbyWhc|&!f1$W0w72FK@;F zAseSkzqW$r=cFI4Y=7JvB8Rqf?0@axa9TWPaUK^@9RFWEI9#UJ#kK+C1Fq1opL)YcpA;SImv!X3oD<162iSAJ6x9yor40hRIy24M(>p*SzYJr>uY9Ou1A zf7?O(B$8Rj`EKvkb8fHpp6P{&-s^Dh**oqY_d*yUj!`JZSd7O6Ou`hTsy$clpys&e ze93b?=(&2n^}^#Aiei7`aJCV3bJ@l>qzxH*!)y9KtV=viJTp*+iuRH)i=2%)Xn&~v z<5kZeo#?_JkH6Ob!DnbbrTyc$_K!>I24v208`=A>{QplF`)>`u-si3JFU|#6gvD5j z`TgxGwJz$4L|%_c-p#ssoTUSNY#JwJ(r$57~e6Y5T$w;jYP`89C%p zz%iV_)AfHI@?-C^|2Nrx_Pa2b{U;m8vHug2 zK<}C8JDKb|L1S9`@NwI*9N2a)~1dnU{fUWQpHn8Q`^3qCZxrI-G5 z^&kHHJjV-=W2Y98i}7TbKtLUz2_I`K$c4{&emunz}t2uF}a`LXi`D^1t>9Ozg1bGstaTak5blm&oJpIZ1h03pZF8bRWy#Lq+#JvmkW2Aq({_oxT ziBXr3>Hao#;Y03g|&&xBhSc(Y#Y% zH`#hn-!_?Z+(x#HEDiU>^8k-9;P1Wnh0d8-8lItdzN-IwV`&&fAA%mog*Wtn&)5G= z4ii?jq%@2mM>e=0c^aSTIBJaq$&Blwy6j`pX_Xz6lXPo{_K|CZ)jX-=W;1K}5h zBSUXETKY%p|BM%BrL`^I`Hr)d8agJZtZtc3l7NBaqeH_T@k)>fVxfJnP%=S!|)9a4v^;Q<~YCoH?rx`6L`eq$cjdka5RFL&JYvhP4x_f+-Z>H7bs%l}C2l>fKO|LkX) z%pm&6#r2==8iR--uCFwv`<;@3VHkY`+P&WnWqh9OT;iR){-OPs$9qtm|6k4ir{({{ z>_5FYKiB`XZ8k1c9HkhG@tA-~n1cSlH~(AxsrPH8VVdK zU=fnSs+DVt>2>mW>UI07(U+s)WoZ+Zo+_=sGT5+M0{|>(U!mn78Jd zl~{!}_-OynI>+r-`QO*s`TOiVy6ACid>kXt!{2Tk&Hg&4`6l~IwmxKk^&#ZYhL#J? z{TJ;+;@X5Q$T=_We^*ZL93yV$6vzL4SGhmV`P2Q!WdB80I;YBi?I3p{jvd%T?n80^ zJHN#-^amUtLT0mcEt5_%_Mz9@H2*+7B_+N()bAFK3>t96HA$r6wP;2Tc@)sTUHW&r zM>6hJ*hP}H!v~(!P#d88DaT>Xy&KdbkI7{zjv-3Dl zzlgY|e*xXj=^Ce>Cy!ml9t`ZLA4_*Qwbs@L-yZa*DiF|BdW!wf3EU@n?G65n+S zg=NG%B{xJe^J}oadk;NSy!uPxO{{M&U=LZuZ#Q3^4tz} zdRBow*^ zhbiPV%s|&%d0AdBAkH^xzpgzH`6cdISnCJM0^}}e??dBA_886d7L;F%o7x))X`~Ty;PS*!4 zJdXLvvOm-4agJFTS@~V>#~hHJ>Ar)>Y|=RI8=dsHCh0YKc9!#IV-6}xv<1Cu%@6vM z_3swYdpf0Hl&28q{~hulN034W&B!5- z0*dSZ%=AoedbU?R(cd{8kF^)X{h7|v&*LI4BX`_?T;g-mJDc2Ft@lm8 zf#Ui|1-X#fS!ZKUuwT^TB0A@c`f7gThG*QP3F)xT$r@X6VC!H z!eYcWjJW^bQhMip_II!K2k9%(!>$(Yv%k05UvibOs`bV&kn50SQ#X*CP~89TjOTjE zSO&-C$aH(I)t)n%&c^l+>;K(_SKrl6 zDV}%geJlE=+KuPvxFG$;dBm|EUAv9#knX}>>3>)H=lcE@`u>*q{>aDkPqh=>w*Eg_ z)(;4~#J30gZ~(cie6qs4Tzcp3$8AVQ=t=ZUm9LZ&-P^Pwkttzu56TQ#VcbjdwtPm< zp;|smY3HgtCl1F2WE?l>tEwl{$Njd`zJI7iT#K}!+Zbh}x7!CW{-(XF4#!-30w-}A zXK@~#o24C3_5tj<jQjIj-xsE7_v8CFDi^<`a+mzs?N}nG2t=Zo|0ncN52j9pc!9 z%A=nP?+l$7Dn`E?whwjNM^-TDCN zdfEB_!>tc6)cOE}%feQ5)omyrXq`d-SI}4A+c+~^r1yQdt`P?niR`KzYa7)-7+(YiUnZ}>VEA-C0_O{0(`hZ=&|4a6^NB32G-QyWygHUx! z_%BLA_0gGOh~r^M(npY^P&ad?ww#$EJ#l8JN5gPyWgz#a{%*gIMV3CEY$2->kp-sOIhO&y0sIL4HvX;C<~tSZ_IFh8D`-f zYsFN)UlQ7teI3Y8)y}R=`=jxXAEiWFx$1eum}5a0Ech{NfiD6ZEPfp{_neN zBMOK4zi1R^(_sGZK)&ux{_kD3wf95+|B|p;e(J=>&v@%-<-Is_!s8hE488LMakI0{ z^a|q(ivB;F-94TCWn1Ik;Z@>2AHO^2B=khqhwDjN}{Q-JbNb^SN-z@!jB&=$o zy4d&C#Sqs{9z+g7Y#WGUC+jfG@d#vQO8*$|dAf9>hHb9(J)~Z?{ub(|`~H!kH(-=& zN--7_FbPvoXxHCNw%_D$Z+Rn*e z?D96m`M(8i4Ku_U=RlN^vrrsY@;l>7=yNa+3$O?k{Lk2Lu$aCS%drx<`~KrLzntF5 zKhI+geI0t1xwnn(Q5__X71|(d6SkmgqV-P5YIW+Q{9H-jfn7++t9AI(^%44hVU6-n zSijF&J#n7i*0s`uSf1{2UFFZU3w&*GI6yyye)rrT%*CZAkwON|$RUpcj-mf&as9`^ z);_oXgX6wWSVxvTjk7q9mb?0!$ji8bYsjhd#rnq$dgpxKCjA!u4xajhuliPnKlOiS ze(3*hgkki$q5NNZWt{(F3|7NN z_qNtOCiuU?M>uB`O3{*iA&e!n?jh$M#=GC~jwc{*tbZrE^zA>rKlfPkCLa_jan>Qu zDM%xO226786imYmlwlTTV-DIqr;eS{Pj-41UG#40D@fPl_CM*rE&XUY&i_9u{m7vW zRrmGzie~{9VKI_JrGKQc6!f~O?hVW7E735}y&>(K`f)@4$Ud#|`x>l6g*smWm0vb4 z&6?<&gl$1NDzO8*P_tA1-z)#4j{T}v9z+`v$A~sOv{$cl_Tc~yp(lJY93hiPp?$Ob ze^eerCyI9V19?pT>p|mo_U|?Mf3Cbrwl0x3$(-XhGOquV5l=I6$Va(^jYNSS%alC3 zd5nGn#eF^Z%fGw*7kN@xT>s}Zc@{}+2j|I)h<#wicEsZ!3zr>VK}K27;JBWQ>kQry z*7r-kRf)cM?SET8;Ez9i+}>aKg!$!uFMPHn+#&Db0pc2*kH`VLJ5_!W2xy43uFOW@8Rw```AGVaDkBMjXdA0*dR$ zu#>TTSs<)svUYy5;+N+1k;{>L+4JLjt)#~_N#Yt1tLSS`T>ncNyN}BMWX*NsuaLTL z{1xiyX=K=j23(WQMXu8a6ypAA>*yP>30u(426tRwqk7qYbnRxF4zf|~YuxLn@pbkO z%}dxicB*wQ+jg7%LmR5r8fPG$O6P~4YDyy|Sa&HsGouaxiNO{O2ue>_dEyX)Sr>kFWtN5duehBV?_jEllrhU)_$uOK(mJ0ou( z>p$}s)Jt&7@f|!Jf3(>*u-^MW?)`t@{a^6@weQsX2GYp*hGJjQJ=Z+IBMjIRwtg@A zx#S=WLHkYhUwJ&Q9O>Mq{!8zs7rKozQ8qNnn@wnTUW@u?>qFlrnM0ehCayg_Ogtkn z3Z+PH^!|MpW9fAVme^oxzI91_WSFpR!S@WA-_^~l?WEm(=(ycH3`E zaIZF=-rpM^@FnZ{xIX2a3@U%DKmT_xhN?F`YyGUv!g9zX&Yg^9Ux8ltoaZ-F`AI*4 z2H!&5J237sAJ;&u**!Yc?zhJO;nATE^=C%=wnm4{xzVBFgVAAHj1MPq8fS4H7jYTw z8_j=5{*?Le$Mwly(l<{Q&~vvmGbwG^=&WF|4+8PUK-j@89#8& z_<>8t50G8AjURYu{J?eN2hdn;{rM*8L(5L<_wO}-|E~G_&S@iS^mo@HHBtY)zV!NW z`tRu(vO%0z+~YOez%AUt3E%QP@&Q_|z7!sj1AeIfj6ulV^dA^P@0==aV~jJPk3i2_ zY16OX?LWKxNBNuj+77=Iw!Y!}n{WPo?Mq>l^GZ=oPpxoYt|`vr`i*BV-Em!m`ep9Z zbEui@{*ijsc__}y_gpfrX~05_!W2xy43we$W&O{{PnXZ9`Zg!Z*JsqF5$Ae6 zUH{+vZb7RuKa1Q*<4@4y9nTWaY|Oztaw-r~(_s#Jd zM4hO>MVwCCmPOz^1TW zX=LaPM~yLd?IcXWG|WI5W??p-?!PsC^s_Etmi0d9bxkP(BeF5SgK&jW=7Zy2QjJWqtL$!VaGM(jbt$tkY$2B;Y`h7X# z8q_PvRjB;3ddoLLSVLcj4cLS&D96+Ne>(gR>T9JR8PA{rmCo6LUD$(tIDkVqf_Bfm z1I2mvWY>kqb*qBHbLXvj1qEFaIHjHpKpiqbFAyj zhplARaU7%5=DckSwE?{Dw-szTJ@z}?6Ym4$=tckkKOVO&Uq3-`{6rG0Hg=)&?jg$6`Dt zU=pUF|3&?*B{ssC7xl~{!}ScgL6^I-#7Glu_*6zXO^_9@d7`IvmmIM;r*b2ecM%2A0O z*o8f4*U#U<*Us~~J6H3s*YmIStry51zIG%3w&^hc7cFQ-wwC|9oBxXz`abcT_YMz` zhmcD<2S@0gEBMy#D@jkGXEoniSoeCqHe$Z7y2&q!^*?&@x_ftCGpgw+?FF$9J?A)& z3_b2+@MNBU%`)j1PinLC$a?Xo?~3E0^Tn*qC z{|oXsavk!2R^Cs0Z&~)gTVLd1Yl(|9OUAYQrqJX50n^C*81bVE-7hO&hAIP4b<^Dc zpO=K~t^Y@u;k+^=g;iq~y>7L%p^|+qwjU{jGDxqGR^hXq(-+LA|N6JAW%ELq<9Hqx zU=a@2ek}a-p?M-ljcFXL9-(HmZvVrXV znmP`-1?8y3NBb9cINpUl`2Oy{4E^t$|M4qj)4!TuBCPMXL&72Q2r56NUFQcSVgJ>? z2tUS8u3Kwx`k#e^53R#^`_IBZ%8!TS%iS-(6n=2(FT)SVy%hGSYwxu-;CnCsdHB)D zzpxkEpNH@Dej@A~_ldA;;wQrQ?;C3+?tR!X{gdGz_I@%{T>n(qKJZha^6IC;JGVa> zl735}eDaIo?}on^-mfhQ8OP1YA&&wMoGA%EF^}p!>wE0I`th(2KgxbQ9P`@=?0Mt) z@IxHl9l}Y+Rf)I4k^TDbI^GID+x=Gfr~Tgz$==POcH72KbNIWV?%nT%RN_0n*>8vX zH@*`ZwtqWh4t_f{^?oZfCcdTr@SCA|^tbp1-_*zO&2Uq2n?wpVM{|_8L!hj!1r~deMHnaozm$iRrGw2$_|3?8m zXjBF_p&2bGwtq}jj?d)(Ya3`8{cIQ{uHyQ?=K2hw$GsD?>PExpomp{X1bq~GZi`zz zqPYH2^)sPVSk)WPhOy*$B-Oblkdsh7RJxpBhbfMyA#;j-5MOZ}hhBE0mjBO|q`LY4 z{KWck+W+QjlS2b$xTXxVFdG$LvW7D`4-3#<&HqpH5y;MNJ^|V7xIh;D|H=G+H1kJW z(8`B?vOi7RL4Jie7Go(|rVk9u$*ekgPQ7F$eHGRqe_#CQ;)i!5&Y|sD!LRUJavOX9 z(3oX<)e1KB>u-h1e|syu^Zr}fu%u;~H2JSMwl}Wf5%>Du;@oolSL^?7+pbLzmGS(W zd=PRM_Fy0WsQlN?_Fu05tIqe)`riR@9KsPK(ffJzk-s%3;Irx@zff=c^yZKf-mmYf z|5NNcy%{;=(Xzy|_{v+MKtF~Ph~t{#THq(?ozfrIW;snii?}Cw0o~5&Le;^|VT17V zxQNS0p4%L%vzx;edfk2he|vMdM!$iE;afrmY3I~q>s;mcLghDFnzn>n&dIKLTmSRh z;SRm>%f|hbun*VY4xQ+_|F-(!+xmdt4n3#d4)>h%0MFnN2K<ROl-zG@8pvuSpyK<9(#?bo|*!_4Mc1X%yQHN{p>@?ouqr zO02>fwDaLQwi^Sp)A%2<>mdJ*G@c;zkc}IS|3S0!TCN)N!uQLPIkcS<=KEPEo(e5Bpn~&IV6ri^=GlSB*4=$Zva)yfh%}BM+eZg1mNLp2H!> zN08a9UJ-3CnO^Vue=Z40;VBe;q1};eMhDuR7)FqzP>QkW z|J2)IJUIa^w+Dntzg}yH&^y=i4c}CUw2xrrFH0Ve$9zNnQx3$w;@&U6 z9cDYP?~7ZsKbC~@3-Y-=0k&4lSM)jYx1alV$liZ7j)z=?{)#POF_}{4)cJOn(wC#S zzRN-NJN3`FzROBsRST`@Os+w4uJ=uDK=o_p4oUws`X+2aIV!OOyHK;AUxCz3cH|+u zag|>|FRuS{iG5>(n$qkW8OO4>(xdGv?(Z|#x-#pH&)cl+o$P#F`v6Txu+>!%l`R2u76X$L>^A?!(Q{e=*j2YtLrkT zj`~3NJ=p!lcw;!d;VwHrTsr(-oFBK)d8m6=`kI7yOJA+?kaSHR1suZ(oWyCIMf>ag zf5b7hop?I`LmfMowT+(9)916(^YJXR$kE%jduDz=FP@9Ij4OzBgY0ze`1H;<+5eH& z`JvxJ&l2_@-ET<$T4Mx+Rf+E&`2fj{#srW94oLrI>2hw}MDsWt4?;#<4XB?gouj4y z+=u=DzSla`Pn9ocsw0yP6V=a8@zFmlzlOMe7~=e(!gq`fpg$dN|E~J^c4flB$3A*t zoBFx>ed9v)|0U}GWD7r_^}hN)a`d)d_3}^ii$7spsJO;rJSJcg`pxH?LiWlJ)5t!1 z`4#>jV+81Bn1wi&C(o}as7TLt+~dCr=th_LTb?txd$w!*LcUZD6VhS z;knR@?LX$lHVoIN=A7kNiB(vGb=ZJSXdlUMpJ@GoagY7@t_zR-_QENC{BeH!>VctY zJwF~TukqWL@Y`ove_$#<{^fz8*PO`Sud(-^WbeDo-|(!r)`qY}+;I)VxMn~({hhzJ zzJhfEcF=dB|Ci5({$E%>>*bGyJ&yO`01hGf@Ns?U2tBT8m^x!T13iU?&FVu)qyDJ& zf4+4sA2qd|pU6jzYswY-|2z1y8yuttH z6Bqq|{_7QCRs7&<8!^jaBg;K;dQ^%6yF#(g1d_(@9uk579H4RdjMxTL(aq|Ck`JYcv zKU4nS|Dk7(+|DK+WShwXda~+A*QFoL&S^m_ zJ$s4$|A75RoD;cEJO^+HN05u>@XeF-PWPTiik?BwNO3#vMi}CHud=u&e#qmF6!zuOtt@@e&R-TD* ze&b`|7t|h|UqjD+aqspYWL!V1F-X7f{f6IeA&&pKL*7Gi{Eu(E zZt#Hc!0{tw&MDU}Dd&dAbegYtP=3Q8^w&QVhLFQB0;5p)^^2jD9E%Bfj2JOu#DGV5hF&7 z7%{djMvU0v6|Z=KeLpo3j=I)9``kai`TeTK7&V^t_*MO?M#Xu>^1HeFht&`2ko@@2 z9%Xoo`T+x5LYQv9YjHh(kDG7{;{Q&{A2=uUy>0wUIL`m;zrnve>3x@K^OIH5yoX%P zFY7+?e$?#u?)&&A&>LJ+{j!4aF#S<9m9Q<5n5zyz?E&Ne#>?xc8h=LPMB~y(ZZ`hB z-n##ec^u2J3KdZvcF)F7x}=SdzSHg^v*MWKhyaAY~%N2^Fj0f51aqL&0K%9 zcNzajI$`|3nt#A3z# z)Xew{e19qThSnI5+&bg`D~oRVN4}1l-TtRQ{uR{)o~!$Bz&pb4A-Uai z-QhX+dCvB&^$zM*@_R-D8j)DbPr1dvPVj+aKEfyX3}4_Ye1q@MsgCVZ*LIU}E=(_( z5zdl1-*@v?b6*nX8dUo}yV(EQmMPEAcu0OO%n#C7RqTBt`d|Mfd<=tL_U)hHKXPPw zt+6!nbexH^F&2N2_u6I+4(HSJ|DRP?UqDZbOW`7V&qQT|I`AM_VxCQe1C zx+*{af8B5DKjGYJ^`A6<<^Qh^Od)+h{kPBdo#yB5Ha~BdzJLS#1K8%Nqxui_YagIf ze|*;j?TbnJ56Jww{bW`+M~-w2r^oAYU1ySKV=T_c1*jeE{?wWE(r%D$Bbkt9(m9*D zoU@RRgZ(bSWw-))`&;}~^!QJmZgu8#`nAZ-Q~%ES+5TQ=euTKH8QL)9O^EjQEo3RHw z{?7lc_&P}I9&~P(|98s&WY1ptpPsRA-oInB_V-@>e-qdrzUQ`a><^@nMhEhKZQ6tP zN$Y;(wzIL3QU6Cjx38>%{o!BmlJLu@=sGj3+5eCJzjbCNhG#bZLs&OzY*_HL0qNx{?f!j;m__T zX?rWu=)3Tr!lu1^i_ZI9_{%tJIBM5!o+572KZQU0FV;(k&kY-=o*Q0lIXC>N;B<4? z{vlKvGutrd%;8cvn={OT-qe}bd6>L@ZZjpy>v;UhIYuov_0QvyK z%JsqAF!iVLySmNRfU7h|djk8ogni7eOrifL{2+OsV%#)~LvtKLjdQ|UX|Jt`9VI6Kh4B&2mLPGgZpql9>l|V6tB#TYiRjT1bypl^Qraw{9D15;Z^ey ztILe56-)~aGfP74tf`@H-&AGpzrhs zymVKg0#D;vJdYRf5;}|Zzj)5w_U$2iOFSESK92F^cItna>;24kpJe|1Oz~{(cjEoD ztt<*JOXn5TBZ>4z|9|jr#_X*_LJz&0Uhx#WwO5a2Cl_k#emlG&&}c@&8Bd|GJs%kNMi^bCnyV+U3aiKdfbM zIOa|K74IPayXHOe1AK%|`*bbT##qAsAphq7!)%Ziw4x2|NFj|5d?Jm{@CClYH;8^a z-;w>+HOMX+5`Lingy{Qoj2yI8{=%tftK=g=o{qCI7SXpM)uLTQ?{RP4$gjWmsy;z_ z7AO8+9RCN|cd_+9hgk#AIDg(BVwds*7dZYRT!PDR1+Kz$3_QhW;bQYgMr$t#uYTjd zhU>}Sqh>1q2l@LZdc$_l7PruELlZrTgz`PwdbLIBM|D!ZpLnkNNBxyRvP(UMGaYjW z?!rB|5BK9iJdDoS>gT!Y=Q--<#p-7^eP*Hhd5QX&Y~G>%N9$g;?r!$)PWAsX^?RB6 zos4mhO6PGb$10=`+ebNGLGN*W-N^U~1^&z!G`^5R<{DjUz{d+~)fMidx zHXoT0&XPH8z?Y=?GG0Lq>XD4P@1&Kio#gpW@q7`-;Tv~3FOq0N?Eg>OuNzssj`U>L zFqKaqy=S!h$6x91Ah$$YLc1fg%>9>|7a%V7|9?P!gc|Xmke?yn|3Alh@rCeLNRD&f ziLRr>bvV!8^#9M%u0|3~_{K5cA^Wwpe#m1O^os8vr{X{D|DP=Xp>>Mm2a0FIPqYPbB!b~&9M{eR9RPt4oxWedcy$1HMK6&RF{xkJq;YShsBFZ1&mq3rc710kraY(+?CTSw$KZYtCv!=Zy zEVs`pRN!emi-GHWw`AY@`iRK>5!OB@U&bq_L7e|rPbSfdbY!MB-ag+fJ-^<|MBgj@ zb(~y(V2p2hLhS!FzkJGc*EU@{b$VzNPOP3Dl4$BO-`Fv4;;(oIznuT~o^Ypqy3jrA zYWDxt?EkAn2HDwHhun;-LvzCX^pyGOE#{}MGe3Q!`RPcnH9s9yw#E4`A4uyXe1e+7 z>H4mwhtKE@OI_RI>BsXwd+Ox+1M^(#!s+1)`?RgUI($WbgS2yhNB)3R*VX!^TvyrE z;V0o^$hxL@UHR*r>-s8P-)7fW=lYzpeXHw38Xf4|=l%}3KeFej`SbM5-s$RB^XEOo zK~?esPQ^%^jvwTaGs&}2JKysi_lxH{+Vho8@{s2%tt#n^wcq);02iUA*!|n*5_-c_ z$4;IeE~8(8rb5ReG0OehFRrZ_+niU~wrY&NB=TD1=l`9??=;T;3;a;U-Gp0k8}7he z_?!8E+WZOR*XdAC++&~na6cYIZ2vz@K8nZDS*-q5kN$`A|JLgNL+eEOAMNU<6w>q# z{Q|N7Z@F|3fZ|>{{_0&>c2p* zx-^66W0#-5I7<7VSo=U7+-ARab#4l2ddC9w=PdR6++VbxV!J{eoLs7&#r0Lb?O)U% zPrHU^@qE0Nhu994Y*c#Q|L+FtP0?#o#xGX$f1o$GmU{ct(Cg83RN6?OaV1?`od4HK zrXwZph0NlY^Z#BKely0GX}2s?_LKSff0_UJ{J+0C<{i9;4-oVJN8~5?3@7LRvH5>B z|F4CA9{yM6|9v5yuka1NLrv-N`F}sqPt3phiGB>fo&UFTiTsU0)t+;qevdmxgj4Aw zF%Z|**FNYw#~ef9{v)S^Gs&}2@ge)$+Is()|KC{qo{tM~5iY@HxB^${?`AmC=3<3U z{~7;j8qGFEf?d~$71r2aX`Q`QTdn_JJu*D)nABGGrMdRc(x1l*=$^?xfIo6CnXyhl zmdx?1u4R|ju}|w4uz$tUO^4&kwh!f=;Y_zwT%A(zk?pK zm)}8Vl=P89^UmMtUo@t>*Y%O@!YTWumsC;!m_!ZjL zWS@8NG}(Why?y-iv39+@iRKfO$wn*I?!LH8Wx zGkVzzdH)nLcYuv!+ncCw;s1@iUtFTO=V&Dj^4JP z?-}_Mj$zQh`F{G;k>ZEads@^-Tdhe$ABp_`gV(ELtJSr2>QCpY8>f!L$@%y6yq!v( z?wB)iHpb$7T!4#k2|D-bf6%AjP4?*D?<$p-xu16~4-e>7In5+FQd=s)$J$LlZ^d87s*Hx#@kYA@~jyip*x*f?b`5&?T zxW#d|;SSt|dvG7_M`xYy57Ec82fd}zMV6jJe*W)n`5$d{zCUTkf5WHg9b2_OcIn@j z&ck>VkE7IsQ??x7J{(b*>{z+! zPM7A{s5r~>xwSZqrJs)ra1pXUo)a!12fiH|20r!uR`T-^z5-WaI!?@AzLwq~&&N5~ z*VBKGrbFr$;rQ?KI0w6SiTUTt#)f)h*bT6n{v3vR<5xC{5- zK6I`%{*UgB=KmX;={2^WLAK2J|1#tM#=Bc~8b2r7_8R{uQ^IMo!+vdt#)kW)^B^9^ zqevIq2anTxhD+Nx`f~az-PyGLHRm<40Vy^eHQ2*b&v7y}h54m@ZztQVw zy06(k&%b}sy$d%9H_mmRw!f7>RwpJB$Joa>c%UZlrKV~TUqdzQNY z#m+}>Mf52=`Tv6jo};)bvYX7JX5rZII{7B59h+YlL0W$meh2ygAMDc3aNhX;gT7nO z4)2NYzw^}a0r?R=!DsjaUm@!sINy-p;SZB93O|tpZ~VXE82QHqmxSkw|2eGRb5VG1 z)|u0{ThdBhy}kNLiZ!i3%(DD3Ezdr1K)*)f^S2;HeKCrzMXqV8jJCNs~x9m&z)*aW>~1|V^gZ`&Df6)JKM*U;>1D8d@!W?6U*f8gxLgm!6!%Jmj&4bggDecp-QdzNLZb8^I zPrst?A^R4aMEuzpi}P^-F2cb3W5Xq6-+2FsAo~~bFCwqPbX<#`#oA}{uI;yvKX!_v1d)sNbu_KS*y_YYeN4Uql@L ze?t5Js5u&lYs56JFAOVM{4bCly-MGE^zVDvF{yd__a|F>hyFO0qgx%?gI;8iMNVB> ztG=x3QvWSe|F2f3ZdSLd$CLW#o5o1ju@!h4&*FK!fS2$xel@?KQ2NEvckV25Xf`I% zg4R-F4QQwTa(+UUbG;&+8q^~h^)mMNXl(|1L&CKoP47nhCqPm-`bah+EADl?iNE4x z{h;o9NBBK_g#7=9jVpaZ{|sN?E7Y#z|Fcp1e~pjjgj}pnf-rM## zzT!RZHn-6=j>PFW6K7*A&c_94+hraxc?shG?=ByKPTpN`+IKa%%< zR0pnDp!`Qo%Dfx<{T|hs<8@zLewkOAKi$3?SC`>ZU9Uu^a=&0lwD%vMTEL@*yCf#QK8|1D%o+b2b)+2L7w ze*WJX+FuU-&`%cK%=Ie&rXs7c0NeTdMrhj>~G-<;dn1{)=7w7u`!6 z+7I(zMB4pyxSv{M{&maP7G?Yd$vCG!v5WsCn((l5K8nZjL7jX=u0jQ##)_lX`y1|k zj0xdc;pdUPPTnD3Mx1}}3c2P-Wr)6j8hSmFXhoZQOq1Qn;&sHeUE&&NZ_<18vvx1D zt{eRw#X8yoid_ILg!Y46)n(veB)1Li;EPv#b@D(}W zyZDCu4%MaR_w3~NM_)0{_k~r;;kY(fY2_MZ~2BB!I8USD^7UjMbi*CV;nbtC_ufvu;6-;28mx8OG1fxB=I?!$@kwfpJu z8+nj?7}?JY!lPuxr~H!G{g2a^V^xg9(->$T5}qaF`0ev#|D8j^3*<|98Lyyr{r~9E zxAjE*^SU(NMB6%cHW}yrzC*r;w73t*kI=JUIj~aQA%FKG*QX8<_la%sKhxFQg|!i@ z==t@WJ*&7jPLr}E)}@JR-y$05u}t{PabIEJa{nD7*PLY>|KcOzJNgg!3CA!fp=`ye z7>WL14h-4%X+b!X>`w9f$(V~sAB(uQZw|fHY<%(p-*=mR&bRFXq}TIHCNDwCKDFw? zI@Hf|ZOE@nw#+pzRsSz>Pm7L*?5qDCF0=m?xC+y8Ev`rBVe20t-_J8ko}Mpn&ydHF zD`WqUR{s~P|H-x;>i<2;R;1}2=s%DBuMH6YZ`Oa>&|mov{$Bbw;TGJ1yAanBx`(_k z>Nom6o;e!sr&nzp6dok2*YZCgA4N?@nf2Ncesf3kExSkiB<8wD`^0}OB~e>;EYz($ zX4|pQfJSy!0?Dpp-rccqn{*z>a;!oHp2o9y9-XOUp(}Gt|Hr@U|M>UNiwv^Jp?Sx# z(1O;z$FzUgaMDa6z5ZC}K-;Ke;RWfugqQIO(!=djaLim5-(8WkQBO}I=X$fq6duz? zm9Ds|xzZ!MQ8U|i@^w_tblfcWh4Kprhd0F~r@AjR+AlHVnE9>Be|7LGw7IUo+9%~Y z)4Pv_cjy&&t9Q{YfA+|iz4B*<%nHZbF%+$ zjO!ci``fPmcaN=P+g|N|GXH;3vZJ&hTp*2$a0#lqj)u#~>Rm@e&F-V2{N2G}hH#b- zPV570DD>_4mJ?)BxQSdjOFo~W4`w)fc0T{}F@wWQ$IL>_D0%udeuJN@n~T)ZNEE1} zi`kK6(lO0_{DaVnIG3-TU7q6OlV2aa#J_zC`4>2@o9uC1FPRa}lDSL>vz=!S=3*Yc zP-e_07hoZ34{9GAcJ4aYfW|J@fF!-C+Bs(o3XAQx1WU0DC;Sge=_ma6%IGVRo25)} zOl(8<%0H{ct;IU5$G|g#LnXNpo6-M(z74YPX3vJKL-w)4&_b3!#BR}s$@_O7)~*p( zq0U%yYhmc34_xCrC;RR^7PgUb{@-?T2XUALjl4=df|nu#-)@n{B(7jY~G|pgZOu4&gA4qM#|PD8z95YW+WLg1r4* zppHg<{Xe#S3;v_^|El&34Mox!h0z#;xCUS~$~XDu*y8nw{l~?&HL0_cUD_bp4vpjr z-(UO&SIyHtWQUHkPpVA&WAQ0r0=?oI^|N&$x^3&3==!E=J0MHXjnhWjI5bSO-y}@N z6x0}-sMeP;mEN$=`R!9epMj8t@Ip|GfseQ_@6031yoGjW8YlS=au|E%Z$7Ih@ z`Jc=PXUW_=`QLMFF^+a)wY@s&Rfb9 zX~j8!Eo53;suJ}4f4$Z!M}`@%-z3fbu1Q&uA$#TR)nA_)uJBD(?Q|XXuik4LxeYbq zwv#*1kWdC}b8lOZmjO*=ECY7hwhOzl2Lo>n4tvQy>_dP1;IKc2wZ9LL(bsC_GT&QS z2nU4^;V|OZUUrDOoh)dUKM>~tR$NmQ)>u!l=a6^g{bZ`Wqb~18zRl8y+ph?tP_F-D zG&u&v7*C01iC)?qy=u@RfG75%mPN65ZsibC5q&ySu!3sOjLcaOXH z<6=lt%I&VC%gK^($iWG}Vm#YB03jJ$`=0{K(^%f>v2IKMB>`HgG;$2CA>SxiiNYn0^x<| zd#)fXCi_=}u!LNSWr$@!DH+Rv^pb*5Mqi25$d}KD<$ZZFgZ%oCnS!v^wslyK>>Fc3 zCAkrsF`&<9D;d}SuO|C1^{tT!w4nUbA$c1s?+>V7Db4{*iH~ytYt#W<^onbSS|^Yl z?3!XZ(70H>M{=hAfD-wCFaIxf!rvW#-#j$TmPQ8KP=2ZQ2e|_~u?wB+_|H`u=eBPT zng9Q7N`0OAdH%clvgM%q2W`mv|J$Cny@RY>`}6vH>)GkZuj{K%AgP{@W9_@0V-NPC z5Bsnm2XGMi^#|1LC)XcXs(x4hXVw2XG)t>RUqY+4LYp>2yK|(FF4aGQL(({mqbO*N z&%7}7y*4})()(W<6JotSoZhE@cHrX+^J7(2%Fiuh3d7cQ)^wSt{bKtljK&zG7ph~I z4GqQg9(8TE=R1x*0lCqh4KnKXIDbF3KmM@sNSJ8bWK2Q6{o&b6r8n&NZuj+hCl5;7&dG_ zWK8b=Hdgolgg+iU6rMYJIIKT#IIJrEcjI3_g%anQfr`;Tgf&xt2<5|n3abx(AD-U! zZTMGXYcp+|g=hOlgg=ZP8J?N)U07Ejy~N1y$CbmvY}@AGh5bXqpO%?_S9)rwJa}q& zY1h#3XJyS?+vZ_q@nD&Cu<`(?jFN(?eDD z*zgLr>WkQ`toXMvXY)NdJ5>8$+^d_<4fCbB01L4gORyBnP>QyF7ll@|B>p)x@4Fv6LD=nA1yxN|o9e?I$v ziT?HBY=SYz$KJB^Tmk>boyxx5%D}zKLb6>ruFV(w=R3%a(kOqcIBX^3I{wvU9R_aj zz1?o@&*&e(@2{U9Q-AOsoW?KUiQ{0G^liO|Efl1hmo!Eul*n_?3Lv6zP04eLZqhW&a?-J)NHXd#4xM`X3?;_*hlZ=1c z)@Cfcz0%nAX5-Rihj3?#e?Z3gG}$A*ceSzmxz-0TzduK|6&Htn&a)o}a1d$xryPHX z-m}B8`^SdE^rOg?8ZS47oFRL68ULTC+?!YIn^F%d18WYC4Fzr5U#PadK4IM6bqp7d z{|`$}aor{EL;4A_)|hi0z23Dow1{_1VvqaVX}|4bLy_Y~A@BeEKtUKyAA@3yL+5^Ai)deDo^4(0nnZ9Fu)mlkQ~{R5Q2C;h+F4;{kwlRVcc#bJW9CL$@^RC$i?(0>>v z6zktHM&ln1lWd!eno-4J3ONlD|x({+`w|EFjxOk%$-JpSCH z4`g{@)7ZBZYaY@*J2E7!q_4(WtiyU#Vk0&q`mS#!t5JspT9CrP$JTxzqYqz(>_4X< zY$La02X>-oneTbAHi!JOOE{-&$s&VZ`6A}I-QxFPFZz%!lUKI0J02?3{$>BTmpC`4 zfoyb7ajno~rTa@MqkL2Q9J6B2@o|DTw7>TYAH*Sa*2(`k+5b1swW?dQ$oK#4{Kfd6 z{o1v$Qb^M~w)*GLg?t?@=lAr8{C$@1T$)Ew(RsWdp!~{HLP5Ll8N-n^*SLrrc$;s; zTl^;u4GN=#M`H|%Q8QnE?7l%^96ipdjQ&3p=o9g)`3E!g|1DsD;>0@Qjr0UgtRsHF zwM=r{WK6+SlwbyCqEnsTg>Lj9zy3{&Iu1EBtEXDjQ>|p%QT0EW5>D??|EtTQ|KKd? z%*Gtd#R>nxdGr(Y=Y0AC_f1!<@LWD06n0Ci zir>c`vYH=g4ffLeun+YI_=oP}^AUYN$RwI}TgOfte!uPIZ^%&Ow7V;%)!8&L&IEh9o}!eFt_TfnC^*J*eGm{(tNrn5utZvN3OVX@34c zU;QS&>&;`>_h_}P4eh(wzI*up?_}SzdpnP^gN3{6%uh#ehV5I;2G(xOk+mz?|ENa; z8j)DX{$G9E|9`J*=)*qj#{nF~Asj~MX7+!A{a?-gXWRek|AYD=ns>4P(TX;-r`Z1) z_CML-xHk6nQRx(AB|k8YhUUf*yO#)`=87nsc`Zh5Z50Z zW54Wa{13&~&+*()K3-pr_~aDNt;BPj;W?sqDw_`V^N#P=DD4E2>%AXTsBhMkhcLmh z6EO*sF$Gic>-i_YvG2F(|L`re_!e5pw%OVrbA6lh*{5WOaJOgHqhF+x?3(#=zl~?p zOV2Fx+zLD+G*9*%k*0SPd5)tzN7Qe0-S(~B=RWAk8QvQj=Xj4Oag8%D6SFWIb1)b4 z&^D(i%qJIMAr>RO)cOOZMPUiOM_t)%%y21v8FJd!*-^*a*uCn{QgKzIi$WQ>5;aqb z!fJ9Ys!RB;s2>}S^5gk(a9A%cj(Ihy9~;$=39@#H`ahwLthQf9Jed$ql1*iN_1MuX z4<29tF>4LqN@;Aw8g)R0c}1N&R3v$ji~Rt9H~_phl`{>tMmMV>#|R_ytcDd9BP zVZXk6e;4{TnFDY}od5s3uvyxD(?*7^WdG;-X&Q%wYI+?8Mpzq)Y>E2a=Kr^Q2lOtK zUpFMQE%03EDKd?1^zGPz?seLyt6kq_*GFcB<64qciNdhX_MO;;-KY^)jXm@@cfNkc z(6E=@hbG~qaDr@{Gc@cIw;u;^5GU=UsUaK^K8&Nt`@fy1kEcUE!*CR#RvlcI(m#>; z*}q`8zWvGUe>6?vcV9a=jI!Toj6pHRVFD)NKlOjpH_$SH{V%Py681lt8qNMMX8-Fe zoFt7te%F)9{_hLJ6mlv`Fat9&3$rl?b1@I|G4Q2l&emNuN#55^jQ74kd|W#qwM{#O z-lOjC)<#%N&$1uloXC^)zq-FFH8d=>t>QxY{KkT?j9xR@_(Orde|p0L&wtd=P)1*g zra7MfV$UCOt%lX&+A{i)$aP37*VmJkh;>l6eBLv^AZ!%gj5zj_+oznE={q>ee`cxg zANlz&nllik?2Yn}Yp}tuD9PRpwoo_JlE~=7h+xi zH}n5~Q~vh7s&7a-{WtqAR!j@!=S(B74im&p#3W3{6ih`4W+3|h&m>ot`R&)e^Uobs1pb-fq(S(@)H#=@?Ob>NPpam&(&b(Uxi+TR$^7kw;uit$C zjQRdqGAG=;?ds5iRmw`{_M1r-yDFpdUnzo<+ue_o8a?)!~r%!#Ik9F3)hm z)uCE^A-!R>XM^GNA~ezC+6;+G&kCc&wRtC_$uUTen;weEaY$J=pj+M5bGRT(5T1zq zx|7O_UUgLlwYy3}9qPTyhW#bl_uk9Cl8`)55}HP_?+;s_W2dqG?d)Y^uyH=#Wa+GF z)PC+A5<1n#U6a(&6ZC)W*Z*}`9far`ltZ&JrKME@WG=O2x23HN`mj(=@Pm`$I9 zxtNFfSb&9Cj3ro#IIn0KS*uK`a}V{d|K$HqCOMaI^pPpGPZ?HXHP&Js%A1FT^>UD zi|c<^N;|G0wvpV5YGltF66(l-52uHLw|xWj7PNWJ`T39TKmYFmaa~A>@Aiy(Qm#pO z8?vq~x72l3i*t>W#BIiQ?7&X!!fxzAe*HXkSUvLV11HoyE7d{l+c*~9T*cCmX-|?&U|AiZkKP1?ed3$m=o6|Y_P|vn(*jX6%(W}if zsKI{v0UX33REcZT4mnIeih^!sHPY(L)Xbq_IK5}8bE{9Y_l8g;oL@gzeVQ4={vN0P zU2BekGJ7Rf=(`(bpZxs4)u)8f^oGOQCI{JF!}tL`p-sL=+hmtE3fb6qN*H6GwnO^* z$#F=pHdaecM9Oo?uM51SAWRaTjO;@7f^hFL`H!q+m)A}5d|hXQ>x^xx#Ej!@tfpl{ z!xYC%MG0nLCT3wa=Acu3(k0DqL|@ZhGNzj?cAnAdpQ70RqW!r~`%`_?PNsy@WQYB# z*!gp%GY{Dtvt4}1@whz5>Ad&2a}DH`Ob)2f~8o773$#= z{>`Pr<;sbbWZu8;?Ga%$eJ$2uJ!*~p)os-OzJqPAjLH9(dz<so%E%Ctu5Ar z98u&u9u$hiSIgg{$kC_~H-;=moC8ts+7t7io-;rNfI+J)@4W$aJ({K@~{rPy)o z^tk^20p}j)_z9SZNw^Zza1A<*t9LD7e=lT5vI~1hdxpjANHS+Uzga&(%WnMxd-V^H z?ZT<0>_~JtuIT;W=_&SMPu@!J8O8o)i{DPa6FIhNRybqayqC<|-z~qe zzuC@r+b?f_Z*^a6@P@e~!y<9z{3nvaO$XVhWMbyX@Rymx!plX&!)9#S_+P@GmrV&T zjhYfRZv5ZOSDPIEG+}ah0hKFD!iJ)f@W-`Rhv!FM8P=Cf4bR~Z3nzquMr$R6Az{sy zg7ARzJcLIu@MS@GjO^p%`UDxv`4v&}WhsyG!-~Sztp(vJdX=*86>Z|`okPN_qpTrV zJSNnjZfmhVf-&mmbNI07dtW~)G#WE&DmcUY8x>j(kJiUJT0iPyw)&!w@-MXZ(#7E! zX+DPycoCbh1<_CMl`+nHv?y%BstFf_e<{2wylPu5D(bYCr)YN-UlmrD{O{rEwbPCN zT^1T`YsRxJmxn)0z9KwRa#>h6>9X+rge$@ycMS{W*C{*HKQD~_FX2y~_lEh#|K=M1 z^L_n!t@rNOPW0k6Y+4z@8!=AZ)RmFP$iF*&wyY?$l$nb%i?0RR)mhO;IZbw~^ZjP@ zPw7LbpKG1L#pa~gH(~Bda*1^YM-_#8onw{q&QM9-K6^{}ZIt=0v-9@&3$ zK^RY7h>LM4+B}EL$ty7p*C6eA#xaH)==t?O7HA(BXIY~!;aB}9QZa4Y?GG^rPoyVTiaV_#voQ(W7;Q^MWky@+=8BJu%5 zJG*2Yf@V;kqo%qM_1XiFNPvIHV8oST;52W-D98l*vCV?cqX`k`^ zI`%zUXR+_mK97BmG`$0zrRE>ZbR9EX$9(6dXJ+#cAahgs{}m4n&pFQqyogPxnaBQC z4{xD2%vLw!ReCL&v~7||pb;yUD9^RoqW#%udo#*kRR6NO+UXU+ciH9F_1|Iy9gr)Rs*X|MW^ z%p#|MqdBGjNt&%I)&FQOJ>EB+UabB{Tz~6Z>3ol@wf=u3({p`4QwD`2^qx|AbLk+7 z`c9GaZDny{{mNnD;`;j|$kR|WYEU?XJPXy1tCvn}SDzz%9+HREJ!rIF0vC$A7}+7l zw8PkNDgAO>iD_7&E??>1R=J;Yc6>#%=ec2UxW=~Ya070{&A1i)4-5*olYPqKJIR4F z28FxHd$9-);2}JM$I$n!`uB2mFMGNFtfAovaVt=cr=qS6e&YQ;Bm7+4)>|9~UbF^> zvAMYZmS#s-^I>tQuvY(zwx3vkYZJX^r{{+)^jDFiXOTfKTD||ah1$`voi(xHah_97qBbw2UPW0k6yn+0Gy)(v{3gja) zsqNmh&bPbKH(aUxyR<8S^?P+)u>&1gX@+R$ET?0Y!>17rIgQ}`bg@jn>tI*MHf*(;pc$N%5}{{ymW z{kdV7^Hf}@eSbqi7(uUD%MWAax#2W=gMI382K_8FIWF1f9|DK`O91DHYnwDSoJWpF z^n19Fycp3BqT4<8;8NkskzJ-;UZ$PC)pekDm;2k}{`&YIpm7fW10>OeD;+Zp*Wfzb zfE#f$Zbj!_`Ct8(_b*!~-HqD+IMKh`JlXR>E85V0Nd7;n{ZDo{uFAW`9<$FASb_4$*Mq}T2)#}BJvA*5f|GU(a*kqqAcs1^eMl_=xosp&bAJL5- z^zK$vDFd@)PPx&X(*BobtMSD)w9nE1h%`FTcdPOD#&eJR*BFcMzgrs8?Hd-tYvjOl zgTou-TX-Aq;(ZLudUn^|#8$%e$Yde|O9OIPw3@&h^FY zWB&))S}dOw>z65Z{mR1K#qxP^XddqUp>>S+hjx1Y`_2@Hj+C}xo$m+rUA`YQ(i2FQ z`hKvYT7F8%Pvp0*C;$JUABTkR=|AETe2yasIb}SCVFXS??Nap<>d}BkWmH@@H@}W9 znP1<3AN&7+a*S+0s@x(^tnc5sm;Jv~{k!b%+y8BQhK4ho=PaCq^N@CY96ujV?HUH0xQs|4(~$uaOWwMZn6AdApavbh5av|pB(?Q->>Tb1=5$sQ+Nhd z+H=p5)%*3gsGB#?U&JQ-%0H%s?T9!(ev56dBCG5!=VMVzZ$vZNk@uH3HWKaKPT^j> zhBfSkIQ|~j1b$2SZM=*3kuH%(@gcoOd%Jto;P5g1Q{=kj?G|~Q>@64^J{K4LgT5sD z_0N1wev9w%BaR@v=DWr)jKFC)0|Pg*ALi=+lZWfIksI*a@sF%^!p^eqIXDmFu}Xf5 zd1?hZJ;N^KpTBp|>$#tN{`6*f{eO&qc>cbJl*ov#chc`h zTvIN2P#cqMoZfGS z|9icEGC%$!?=?%WWu5z9?Otbj|MR?mr0E?P_K`$0gdzolFElTKdD(A z-h%x9Ma$Ihqm>=S>UXlE$eaZGot!^-K$*cN%J&cM*FL~Q&hZEyLtBY{8ZtFW|APMi z74)oe{-?<9F8veeO{lw9s|$|mM?YkrS?+(X`$u^&f8R66p5+-l!av~1kkCJEaCpY? zasJ!6p zYWB+@*WfzbfX+kiUt6V{{G0WISN>1@ zAJzLEuKz}9obVUCnI3&4ZzXTXoybjfzmwfB8OJM5jAxwTncZ!BRf7LP>G5xHMf?Wu z9xqot0()< zjX~8F`Ud5zIJO_h_@i%eTo=1Zot2y~pDij2jgD(ZJ37&e*YF0q4$9;Ewf)KXkAS?N zyKru{^Eg)=duyI(?5*JV7+h+XGGdIrLo&U`xaLIlM`fIo;2HYPYckp+(tR7%Ddo-q z<&L!975)&(y~-We+Sqs8PN}%NEUbB~%-E*3RCqGPf4hEczfTd%_|M5N(IdWlwz@%F z^u5SURGv@OUa@W9TqmM^i10>m^{I^e|I;(bXnRl=})E6ud4QNCH zNi^Yo*K(obF2<#}9KV|XH)(n3>RTSV(Su%OkVOv7GnR*z*~>%gyyc;7?()z+WqC+V zSgw3s9y&%Z54DLWjXysb>d|oENjCM9Au;;NkR0`-{@W+RmCiQ}*Wfx-d~1D@&Jb>( z--u58bd7m3bdP&7^x$vi|1Etow4fDjGoK9YGrXhOPlj~Klc8gZIM;QvbZ*7%xD)A0 z*I2eZ+)eLExi{QPUxeIR_qN(Su6M81%fkcWs*0C~hsZ}zvuk;HjC=ysQ$5#%o-0-e zmm|5&bKU7Vdp3#Po{u(Ui@vQ^zIttFr>AzBzjx63Ht1wiccB}9^M60z>QmBu2G8MM z?`i}2A~scs^#(eo${}Kh4X}0f2!}A z9{q+ITjXE#{iv;CTmPNshKtF8?+1rV$qMW2t})-T|B2<{a^b!e1>s6^8m_^>=l>9{ zBioi1gd4~kaWig3Tw5izfE`Hhk;miOV0Y5*MsAFJZ+nL99aj+U6}Jcv;2}JM$M6IO z-gwf(cJ3p~Lpj<1t#FZYYMJ&%SqM*wdj`*;>acXfun_0`z9_s2H5-*lwS{2|J^KFE zcggqb<@-9eC7HDSWc{yxX+Yydb;BHW#9Z~?Z1tvNatGCaQ`G+@>VLAWNc}%r-CnHzCp(6l`?mFg(8iX|_rFcZDBrj^tj5#T%fi2QEeo&N_6D9^`cU}8$|u4zQM(lE1mizb3d0C`!(x3X3kt(& z^fS;jPaYDNK;zQFaF)1pP(EZtIFB5Uz6aQ`WdGOyXSkTW6qjS5c4)YgoQ7*~9qRON z*RIq@lW?Am(%mZEE`2oB&QoViP4@Q<_PY@`<5t{`JCXf}sol+|X73Q?${SVka-DDiGK#qVFO;oCbUhGAIUfe<5jX2X?i2s zjGle25t+TN5%Jox7-HKaXh$b{@fzO1Td1APK4w4G7qE|U(m$PjdD8!X0{fO7+eWsJ zW4j}b4s>qUKfsQTbKrZ})V*XzIBTtd9NOwehPR#PUA&JEk=`YZJtM=%^!U%f?xQ2a zr}XT_Y&OSdV;c79wNJD=s}76|c{_BD_FLu1@VR}zMD=FZ-Q~Jhv*WF+7T3>9u5{gK zv@K!ZukF{ieq{KT{2uA*k>N-32vP~>?sMG;Z>pn^-Om=^?c9f41HG#H)G$o^2%Ls9 zP_xte062@@u*4WJ&Y_=&rV?vVA~AP_zEu5X!#!Wr7rV~Uu4|m@LK00F@0bg5F)qdB zxDwNF4LY^qx^}B$*z-MPuW)9Lx&S#eOQQv?6TBbqv3;Uv>pW@a>(D=OoiuL1ji^%I z+)P$i7qKDy=kH@ab^7mb7gxVuS%gMD0}1~COrmKof0mi*quJ^seE?Oyw>#tUGyL}! zchmFh{{;2$B=ztF-(!jVMJCZS+L{CN*Z`|-UuipjoB!^W#v(j`hwumh~tjUWc>GUJDK+jXII32foMl}itEK|i2et0 zUC6!-{Qs{6|Npr5z+1Mxjd$@rKE%iP6rbZu4Cogg`0;n4;%@D;AQ^L!hJ^nuLsZIJOFv3dJSbh$)YHZsn|G`KwFz3uHv6@g8lOj8pQmGi@ps$KlFm6e z593i&!hXet^f)Ij`oCXHzZ6Xw{qIPiv0x|%Pv74X-`_&*=Q+;%q4&E~`ncTw6`#uY z`lIse15DL!nCSZWC1i_T-yZj0ssCfMbC7@U{;!nAG+cx0a070{&A1hvOXWXwqX#F~ zf8FgF%ZvH@Z*l)*R_-_FZ=-V z?XvaSaM6aJKQuff?h!nOC$IwLcnZ(pIc&g-*o0r!Ut5IBSNNV+u&GAqyKD5XdHvC~ z;`?9pe930CBhE$ZBzy51-oRUU8}H(Mw8?WHk|+H8Kc;_*&(R%i^nLQ@QTg+L{Hbl8 z! zp6dMQrRUctvMu`Zq|G#i8#B--mGg2hiQ*y;y_?(55~95cvom!xMgdZtC1nF8mBC{GVuz zxs;vTjIW~`J?KRSS>(_>-}t3;TBR4)x^Ew6{L*>i9Q6)VZ5;4x@uTW8TKQcpo3)V|{Lyfp=$mo+qi&@ zjN6^>PTY+}i2n|JfP4s#pws?c#h&v-&l$h{ze9fg^|}17=JT~8+r5v}6y<-3^565X zFsE?MTZQ2_j|E5K)xLto)Ydq$A8bsJ~qK~F>DO5&9mA-e-WEd zt`8u+-8uFage}57yIk8&_B*{6xfb8R4)1=OYbF0b((XOZ=R7~~{l~AWy6I8f%rHX_ z-AtHahJ*~WbgH`buHFZ`JPy0;GKW3v;e;%YkkDZUU39s1(@i(ssvAw0s;>J*UDWkg zWZ96EgoFtRiJH<06P;p0LPF1Zetw4SBxg?^k8}QbJ>Kun=ll77zPHcy{keSeFJ^=V z{L4^3lyE>#w`ycO5JXHHbJD^4#+e@qUWe_KJW!h7bytsx)Hf6Jjin*Wwd z--3TS|7{z;9oU8Z2in8tLgAHX4$C<|g4QHEGX#Im6hUz}f1BwQJ)ketyo zG?1H)>R%60CeV-KI8LG+XAs98iskbXl%h=DFINtv_Fo)*_9PPhxWge$m;Yq)`i zk@CO%_IUhPzDn)?;#H=dK_t8=nb_oU;VW zumY>F206$@gEi)j)*v*^be+ZGHTR&^T7;yz2W|7sKR9LGIu2iCYd|f%4)wyt|Nm_f zM_k8Vk8LqtZNb$0G%i%rS9}A-C!QqQoU=oiU1)NwxrhG;ls^;paHr<~ob+E8dLh38 z9H5s@^=vpqFG3|fu0beAT!T=w-1DOZr6}7c{cV)~P+6y}zQJbiyeib90WCO+sD<{nZ81-$CgQwe-4j>2Ig>hnjnt;iP!laR%p5pP`;qH(sDO&u2GM zFJGcxLEB>OU4E@A^?z}dU%ol`*T@?veA#?r@-_;lt5>JWx9Z)y-1m`MS2t2VR&JDI z{b#|VuJ}JYYcK~YK-!h<*fdVRc3N8KN$-U|=!dxP_W-ify7@um5Ddcz)a+!tC+ixu z|JSC4G4yfBLfrGJ8ISfMNh(ugAGqih`G2PSMCn5Jwbp%AD_;=jH^uS)3C^F0Ntl9Z zn1NYH?fi@50rzcyM>R+GGzdo&hO;(AgTHJ@#b@Bf|wd&JYx7N>9|Eia3hN=U_Q-&(kqJFS2 zW3@l%&5PB)^VPNVqiCC{{zdCTeoM7K_~ko(l59sI|1;z{6fE~#;*b04Uf{ljir$_J zsc{b1DJs_gM~V9@t>%B)^N*JQP>GyV+M3wFOn$}r$yM5f>UZQ(BcKH|RvlcS}vjnW#@`Q@*CB81c0 zMJT*ujF{|+xQ9T=S@jxvarZ%mbQkxvNd0H@ShvuR-vA845DdczjKUaEZ`Y!CjUevFSHlVa{fFAq& zO{3Yw=|y;C_iAN(Y>D>AZ25n^{GTWP@02IB2U7q4lxHiETVyRc%A8w;S~Q>qN25Mn z`HLn*8(YhA-v&vv;h(Ml=U0bE|NqOWlXqLy-{eX0ZK|>^|4LS9r+3X@qZ?se@C<31 zyK_qj=g14Vge%COE-jL67rq&;kvCAdTK-?I9Hy6!_iUpR;Wqs)Qvd&xM#}qoR_qu# zB<#!>7T(Dm7T&!$EbQ7iGVC5ZBD{BEM0me$blB5vRLJW)DtxeeZ1`~an6P*0n6NKz zY}lVYE_^h0T$mxw`tvV_x)U#kH3SIyJK(9 z9TJK^?ZF1oJ(Om!lV`GvTiaIVpKX;d8|P~66%KZo>mY1?xAgEayXYzQ_C9vWUB zJ~RxYkHDrA!^7tFBSLQf;b9}*=r%IEd2eJG<=8m9)opZmd+V66ZRe=4ecwIg>C2(=Y?GFbDI{AY5Z#`yZfrsQnMJ?MLvy zJHPW{X#4ENP_@?n2QS)Z0T~3%Lz-^rrIc&}_J6RZvUs5V2L{`JAlv=}L+w8>(*6TvnsmeSgIL_wucGEE891eV=lr zie6am`B6(Rz2*6(tp<7vDi^XnUiAEDo!5~Sj`FJ+o(RXulZbPl+Q~DB|1fJh@4haZ zqsM&#$<@jQw489CVP$t!80k8?KlVk_Qbjs<-H8r08?v5r>P#JTz{8S3WDhjZ52 z)NNJgjlo_v_Da?WSG!u>j(Yl$`Rd>C>R)r}OY(lc|8JxIKPoQj|DS&{404Sj7~?*M zks~lFx-kw7+7OM}5>3Vvn$cp+p%st+KTxj!&#rJ7H5>K+_ZffCPN~;kIkKFewm_D6 zCSW2aq24{!x%VmbW_4?md!0s~fw-q(a<*rj?pd`pX7S6{9+^YVL;QE(0&)=woLi#p z5!dxE;a-M{f!ZH;v@-^4XY}>_jr#w1{Qs|PW%E$qcBFcmT;ZHA+E-R_uR#uy%d9J0 zZ+_5r_K%J1A9?H_TiHL1}N2ac}Qd(Y|>6k*|jdpV<*(XsP=BEQ1h~PFxj({wX;1tS+r9TSCKTv z*6E*H*XSK<(nD(eq=)?Enm72}M9pIDGcxzM_cxIJl^)J`e|_Ck5A6bMy5>IsNM3$; zZ>f3W=e6I2>yDo2g|2fU^db92eTw|e#@3v*Dk*#3O_$t5V& zr!A|quUqRg$_M+r73*K?Q&->7zkU$c^6OuJrhmO#e|n$(^;Y}J>BBc})IZ-I`{&Z9 z_tuIey$$R4sSh#FwPG840l5fEune2x*p&9-3i>LnK@M^w@!jX?!_&862X>);u6w{9 zdh>MGUi(bgOFw|N(faa}_37Xfm(X$PJQ!M^BCwYsJx~B zzE%Hyy}mkml;3fjL_5AXzIKK?mmTIDx%pb$%P&1#pkKljTt$&IQY_7skflR?o9us_ z|E~#i12=IScX1yUo#v#2jW#r*>7qO!KmW7+FT`1`EICZph`W{^{|!^`dPnZ56U5OI zy->5$+FkO|{ zwOw1YK;1HIVF5P9IrhfZ7SWer z8CGBw)}X73ZIFz%@?0{m3*JI*!w&4i9>jGId&vVhgd%j5`3_{Xujh=GMzbHbKe}$~ zr!TSRWehVyI*R)TjMqo#X|De@_cJdeTw=m_^#9TZ;hMdp*am2!A4S_rmj?NqC;Ls z-1DOxB`H56y>RZQBtg)LRsTOyL1J;5`BdEV*S&PdjJL@xkCG^T>W3Cj$}7z zCR?~$+1it6L)Ah3e;h^)Y7y7P)fcG$U3a4Ujy8a}e{3ziKAWvw+`|xU0I~1ij7{Cy zz4d>R^ft6CcOAsO{|I46VGPEhaJBIZ$Fk_9;*K_e3G|7G|J12KxwzwAyQ%f}ONCkJ z`M2|PPvskhp;wZVoHGT}FaxtN2lKE14Z<`E+vL2*|34_#MksKfYxVygc>WWf|Fq{P zYcG3#vYz`0S@W5(eeo>8GOR#-hA_RK2&?F+^^fDNOQ+}HpRRw*<(Ka}Z6UXz@MZfM zkh#)*!A#G!)^ok$ew0Id_*cyJT-;@3x#LB7`oG7d3zVV^Xtgb!ebBZxlCL=tt2;Poi}u_+V`HW2-kTggsbE=+(6ej>u009t^j_$Lei(p3Xg*=yqIrpp<|HR!c4>@Ou;nFz%0x`L$UF1G|e^6KHa$ba^vbqt~AcR*tq-y zdVX~%qBl?W{4cvNdKKEZliaOj%XIPa%P+Ri0oj5= z`cd*Y3g{&no-5Nl2JUuLa947d_4S-&(O_+Gl%N!4C`Sbzi> zn{Qq-y(cPXdp3UMC|jNudhzRnei(p3=*W6L3?Vxw*vo+&fl-L#@ngtw$if6n#D<}s zp|`feKG8bF04bi5&gWQEZ*o)Nq19i^Bjesb7syMf*Z#OdUX?zYUv`iB{jIOKN3rv~y*5T3rkMg@&+a;dv z=!sq^?C<%#mp=5;9_}sMm@a()Dl^?1%29?v{Dxo{M&OJ6k5SyY(qP=TVDq&QHcfdZ zjN_Mu37CkYed>Sh(2~f2vPg z?#Z7>=E*I3(|ooyJ~^$TC!+YZTi7*?<-t#ilFf7u?p4r=LVS&Y-LPiExhWsFEJZ_;24! z z0W`kfapn)x>Tl`e-7~i1c=JX1|Ex4{)f{y0*d|S)74cs+MYrsqh7y#bY_z^DDo}|- z?x~OS24M(>VFX5D3>vPR-;XAJ;pUCz#p^e>>KiA~R;RBt)BN+f=AV-_!qwh0{~Y!7 zBOT_?ug(nP#FK>yn25q^{qM%7!z6lXp8oev^9<V+Ow(Hiuc{9MoSo z*MMArI(ieEMKfE)7|*eYUy^O2jm@Bif9pZ})(#;@=3 zu)bQu-7wzsqv@3AM_iN8iujMeHdKxD{9`@8`%V2Hp|9uf@A-Rpe&iQF?FYj0X^#4V zjb|epL1DS)#}@vjx7^!x>lcO@U+Bj!cG>fv_57cDe*W8psrgJDK<+}E`?-hQi@48c zYFv7$^vrz-NqXB{-#+$1=|$%77n|2#GFbjc8Ojm&_^d>cbIMSKT72A_%^U~Z_d)&I zhOO-HNZAQ@>(lDnx2}{&*UJB7)u+b4KQsPK);uuxj$U_}oy@sMgpK=GynW)?&>-#> z97W+l_Cx;1>8brk-N#9KJ1W`0D~7vA#5n}}huXu@yV!gEsj#oF{RUt8s_~Ym!k*b* z4SCbQ8s1y}FB-1&xH3c zKNBuF)@48ccTRmNyu0>GVXtzobJf$~3jdBh_6sEA`0)qIt`C)m9qH=hEg7Nn(ofQ^ zId%g#(WUI^+Gb9`GTdi(Q1--l)(_Us81Go^XK8WmV_d&*SNK%>XGBK0Pk*$2A^frY ze?l5SI=wp{=YMU}I9EMa_}l2|STFQJKMcSi6zO9We`hW zhf#xCbzB|d8vY~4;v7QtBbrugA0V~=-cWUKZ*_2{`hRjp7~(p^Fao1cIMwoq zRR3WdJqwlU!V1K_%gQi;-$YEp6imYm%)%VRceNg|yqw1!$Bc5Zh`t0xC)EGvT?6r7 zzhw`^J&c20m*@Ki5VUbJX``L)D8TwUNpA z@6871wBRU?fYMR>i;_Rf3^A_ z$@a9+E}k>EfJ?Z7tLVCFER2jcyc=Za2=gPz+eqyvJm7v>^HkTyU4Hk`)?1vbwGWoN zmzN)2Gxyqk1_1Rcx}#pbUx%LbL*4GD6%74ZTH(c?rG3)vZdxyX2m9J1b)ddQwlb#o z-=&pzyO~xo@^8{AyZudCQSX09E6)3ew32SO(@F>aU0Q9oYiV^0|0*qc{C-+4s=D1s ztDf1Fc6e%6TFvsmPU|DSei(p37=mFKfl)~9|2rm~U6FnN+7n?6zj4UI1mujBezT=t zauW9xOhfD&bRA{?Qm)S6-sBlKuX&I*i(aUVjdLjH&`X#4wwOm>fYiL|*~-u9o^8JJ zef#HW#k)UGE7|vXS}9pp{drn>>*r||#h<5DVxn^vVF{LD1y*4Va?s%2HIDpQS`*ov z{j;KFek?a08NrR9pFW6=GyEoA4a`)S+A z9oU6E*oy-=gd(KsU+HQCx_UlHtC{JZ`Bx!1LHmrXU+WsHKTm6*H;?>0tqH05d1caM z=I3e2jL*|r`+lC*g8bd?S-7J(j*}>C{aIQ;7}3jEUg4*=;u)BnJP~FEUo=7IC(&eAvc|w&p5jfAwkQ4_R|r z`9s!m$2sCh)|&ra_aLp0c={pQU`Lt8{gsSz*m+i`zjqUd@{nuILaaOIkn<4N05-`Z&GOFz?nQ`e|Jvqi=d9Gup)cXL3@fk-YY_Vn zIb>W5luK^GHgu_XQuB3x-hSV~FV*fdmwIzQwt*4p)~rkOI1s}Z>9>;|D?U#vB0FzN zA7mA3(SQ~l#c|Z^>=sUvkJgv9)6d`>ntbbK>7iw*^zlIXd`Dimu75LJ{WnuRft3Aw zxB73NI+l$4gzj?g1zf@vT*Woqz)hs)|7GY~tIM0o7JdF!GBy65t^a>V`#_y~n5oese(@X@Uk#ZW|0vcz%6mBe z5%)xzB%Udlh8d_Istq;T+yZ(W`)Cqo4t*Zlma^q9d^q;evf5k&e)*nt5xE3~!Y?D^ z9>l5n2ad%tkfb!Zihspm&*ht!WqZ!Cp8bS6pjz9XEJ5u1mywlsCpyo1knbNGuawEY>+pj zEg*Tx-rHn7_KH9C-^Q!r-s&1?5pQdZn;vZe`O2XK!W}{p$`EY;vF~3+FFm2`KrOui zmGo!>hGoIxkM$vLuPTeon5yo4*bh8sxj|GZN9x0d~2 zf_b(#*#lN(hMSJv#$DV;_)}#!x}zt0p%40D00vYn~l0~2C|LH@O z=%<#FWwCEfruNArue$dQ#`V{K`iviaJrib#V;(l`F&<~lXlnnzSM;-|>SvQJXr(8~ zw$$Br~j)zTT`I_->Uyh#&z~bc0U~!h^xypE+Us;8CGBw)*$Kq=a9MBf^FzD zk75VeVb6*DPwfpz--Eq4fI@x+ci1lIaZN#q{&*3+3~^0;#rTIdjxzG`|M#YQemvga zU^)8-s+?1c2DIRd_O+wY|E&H$n$YaJEgky*NTLl@y*)n;qXxCNJU{Aj1U2sMq@LBb+nztISe^&lQ@UrasOXiw{*L_pQj8!#a3ki;{WIFI`2N>{@Vt)bJEcrJ@L5z@0{kH z>cvL&B4Qi74ORE_|8ZEkQlng{y{!LF)*sXV7p|tx8e8%7K|c&YeScxJEe6q>-B%Na z(1#)ZQ#$f_5YJM*#B?%e+75v0QM*{_J@{5w>=o+{=+NitFQ(+c(ng; zF8$H|!&~Uv(00+iopq0Wy+6c%zK-F)1G}&Xh34uNnE$tzp4$KaRz^5LKZHtkQJh0x zE=(D6rfdJrR{qUri_m5$66VqR`!afJ{of4b_h;G%>fRQ_eS4B;_5V+69|%*62DIQP zj-zWrdN@gTjAG*_J9njrGvqm3z$IKk?E79Nui*x6qVR$JdvTjyYTThjTjMVMJ}Qj^ zRLoWX^DA?#$UPLhmlDKv0{`g$yZg+mzbw6@J9^@a`M(!;!&deUq{jcXAzB*M%}7qy z|3}rQ>i^Hw{}0sv+CVA$$O-lTG4;Q2MZQI`Z&K1jKOJTL^~+H~uSD)V-)jxqLy`85 zI;fv(4!|G`iPsmV5lzl%?)|V#ZyoO(ZQIzsty2D1zpVZLO0+d-|C4o7wg1T@!VMG0 z2#iAfZ|@lL(f>8Z(X%iCssD2*4_fxgdmH6JVcW>K#&@FQlTaXEPa*Tq+mn`@feL!% z<%eyGa8bhgVhN9Uk9fXHdJzN3fq+uzzn@#P>n z%Y4W_*7NW6|MevU!*ONb7XI7fv6ca02e}J-P;*Z^g*<>mC_>!ZA@1>-yktz=yKh?U zyYjC>s{YNE|Ii{#$1-hV{+->~%E+#E&p;l<@#t^8=Si~TOt;WZrsiMV9uVUH|ITpd zE8EVI7jOwzP$*3mT(qAQz4Vy0bhZk&Y3zYN@cB^M_xVtkY43aE`xU4}RqyA`FMB>5WHW~=I?3_k z$QGAy1NaTX5Ddczbj*7`j3PU02ZS-?IAozK3=9**-7An+|9vrAq_q zgywSJyw11x?vrHPi=ik_{zpls{GZ{Q4D@Zt)cVu0%6W0Dz$&ak4sx*t+t4sn{vRp+ z@#06zaPcE~S^7UO{ZEzug*hxt4OyElZIbna<$u@OA&ye)BkPqF76}zxio{C`NsCip6G=>c(nefA3gT@n=pVr2yGq8 z24zAk;`nz}qxu<#dw4&n?eG0$ct1GeT{V!Y|En$aeiwSb^Sxg@T3@+w&XZw?c!yyG z`p7e*$T1j)qS5RZT=V_`znrJH20^e~A& z1zkO*Q*s7oVNUd0@tpZ@*8jyi_+@38GIJjPN9%tU&`amKHd6op$zrqbkhUI3V< zMt#8T`oCmpp88K+7Wc)iXw^@wHa|}si?9UCumY>F203U@M>dwL{|bzM_fWt0SHB~* z|4)Yc{jB;Ahf#xn`hUQ^I7D{N(H_b4F2+i4*=!s9%20*edHRe?pcs-bhowvj?#~#lI@^^?Vx<4_5)7xtGWI}XeZC0o_>zJfI9jk``;z* zEBI&jKmGAyq}C~Bd;ded|B>GRWbglFZ5!uY!wuZTZQR9uge%_vWa$%eZ>QAyv(?fk zlG1VN|9>a`U+jOvrHi9GdLsXzeg4P-vJidfZx8-ST0gQRQ~J%2eg{gwWCeF6`Mxo= z_ZAvkd!={SwePFp-Q`~m@67CLf5pE_+cEnmX&=phBJ3ZV5%#_EY}mW_+3?}a{>JE@ z3wg86dwRv*HuFLlApXuNFNQ&6esv;zeBOSDbx(!^Tcw?D<3d%B7qu;42#2@7VC>+V zp|;x)YYSfp$x%H+@u}~HqGR6+rPisHbo?v!i|;C&PAbleCR}`-Dy7zZN#1{#wXAkQqie z|Ba!&!kd|23ETF4C2Y?e5Z>zle0W=3curZ=71F{xOZ$a)Cx1QcTKn~|JHvm_Wq~tzX)ufb1WZJiF{epnT>m$P zjQf92BWGY1<{i?9Us`}Cjk5@8v=d8__Yt3CpK6;k_`P1lB-tu41x`+s<5 zSi?UDx!8ijxz-WP&J5e=rIU>fuhrkC??Gjrz2A1)1CA`4p81d4|0B(%7N(A@KV|(p zJ=Om2t_knL`lB}9< z{lQY}57-)NP`lXNf7B1O{-Ce*2f{UuWuHKEhU@chy`^kL8}jGtN4rk$@owQLc^ris zr4#-q>7`rsp}E`XXHdDF{Q%`C!#REza0yp%71wYBH*p)W?u&CaqHXgo_kDy8WkId- zw_N=%y_eu0)&H5=|I)-Ja{@NA`*at+Cwid|`e6VXHhy9K&pve;ck6b~N4B;8y#24v zJN&8k?*rxDXWGA)wSTn@jyN9skAuWB1j8@__3HY%-TKt@IOi_zeLsdi4ypg&4QGQx zTmzJBPY?O>Se9cGP4=tJvpzW`s5L9CX>M zr)!e__bqie_ZDo!4y69U+hJW4J?;tDwAh>sdhUVr&^BH^pRJ76M~H2Sz0rSLdN>ee z7#9waMJPiRN~FJ-28$ZygKBB-0sBpdw8uVFe&^?QwT0|VY&_Kk>`LriN4RUrx|M84 zbE91+G# zuSKt0YW#sbaz~mNYo48W`e6VDp?Y2>CFe#;ZuzR&_|%HSo?!tE7?+K-yeQs z&~-B-j3cu!0TVF^Ioa}l>|YKHVG4K0NoxtoPWv&;AoImBi%i-7CfFyMUU<>?#aVm3 z(M!GKlGE(`^hJoa`)I>2C!?)m3BQ_F{RDCa>ecD1$Tg^=H_g;8nAP|2xZ9Ex2PfuM(cl|mfnCC#D4?Ce*~0icf@_WAMMZE)ld7kuXil3 zM_c?+VNYVy-7Mv%xdrqyIEM@9T=HZ{mzFvfS$oUagi-W3$7LLu6_2Yok~8eRu%la;z};yN!HMJ~Ou?oL-p_4o z=J#cXeef7xLcz4V^7*|)O89Qr&|_SdfJ{bDE=rVI=C)f{{=EFzbn zzWl|oj9h`bYHR%`TbGVi+-s1ex1j~CD4O|VC|>xYISMa^QnGBVxEH%dqxJ8|aZWC_ zU>kN|7xrK;8fIGmKG*tpvU#a^SBiJ7JsZe2?y6JPzvJ*l>)%mJuS2~sN0z^oN;~-- zu2Zc}LE&dFg#+SE>Ted@itF;-7jmx}i{)QIuS6NjQFA#f6!A~ZFECzHMz6OnzV1#| zsG>JpJKt1oEjhgbZL{6mboVGs3yP+Dex#m%zIaiN$N&F2AWb>%D30SK+HnTw(4a1C zM66?)(IU^M+W+)6R6Y1Y`(J%o+o*qwdioJ<&zeuO!v*nN!WCRaeTOyj*6v@U$A9uR zjZcId^qXi~%l3!X)!L7}?JL1A|6q2wOWsFefdcQUavBBY(vGmDQ|+nA-4hk!ivOS~ zn<*b>CyXh{yNKmu@@QI!bAEaWx0%g&Q&zfqnaz{CeFpR(`jKMg>{eQB^ zNBjRwpijgk{D0m5C+@RQ-!!UI=;f$T2UQ|h9J{awdvO4VP=qoxEI0ojssH<3)@MGU|IenL zB-P;;D^sp#}BB)xYYvqx9zG>ffc>AM}%G(-%*w zn_FkAf9Gp|@QeH3oFUJlP}mFPB@`@t*mfyZPhR1^ii*CT>#TOhP|rEgeCkZ^PW=(n z-!;c?;3je}u{V*Ma@g+%S>H{+kMOCqjqd1)Ug%tr9{P}RPrQEQ01Uzq48sVF!WfK0 zS8aO8A}63opa1dvi(=)6yjhM4dL`og{fWX&!W2xy49vnDG%S_>^%I)(A)C=cZ_QNp zkCy*Yby5DuVc}{P%Kz%BI_85#|jr4Z)bvg0pbygcKtF?Q)=x>z|JPqmA~pa2 zx_$``3sZATzvR4r$z}Z%^2lj^$L!%B+i!KsFH8_;TpLiQkCW6VYaS_Xo z&%d|v@1d?W+!_IX`L4BTi+pO2ia2+E3cqP6DA0Fb?71+5dlo8YdM?dSb zSqDVLWIz0Zr0=*m+h-)2;M?Fq|Wx`h>H%tDuuVVwfVW9W#{WJ~r z{;$jb_hP%=ae5o7CVT(Wy?@l8cC2R;Z#{X$dHGxQ1;w+;*m(T^$5DErf07jNKTa>* z&Hl699)a|BRNiC%u}-9XBl{1|@T=J;?@yAiCZvb@nQ7r1zY9q1AF^D(z&wCU+*gns ztzUqa>GB||cA6uUXKv6zWe{r5%DbpX{6|;A9sP@Y^6+W#oN(S{=aEV7Hu9=Cui*x6 zV&j_r;Wl{}3#8qm!Sc6!7x%z=Y`?sv|A0!|7cTszyoB!Pnr6Q#vKRWGGdF~OWUBo^ zKHhIrTYC_9zIz`=rsm&U-#3C@I9+*IXZ|C-RQso7CHo0|94f~uTM*|S#5PbCzX_O# zNtl9Zn1L>Bp5&7B(2-+LP43P<`iJB^EWjcx!7{8soZq>MtkdR;|94(P&p|HY9P;Lk z(#JmOW2|_yr4O<=!x*%Gqm-%_OVo$u(FTq9@1M9A8U$!s& zKDK|diM=zmzK-ql(fYdh@4xtu;P{W+!`;;D+8DLf#=gsqfs^?=wK29CpBN_pYX9vK zXQ40!j_swFo^oFg*!k&)kg`84cb}`>=UMm8uO?4@PF5kdD{9FGr2ZeIub52aN_(h!rFSn}4Ou(eH+aRn_kE9$MLXq> zJZbcx{DHEw(&lAp^!&r|sR6Dr2t&{|!g^S81V&*D8szuJx$>v}eDgy2Q=h&SNwgs~ zzP4Tcw^9Aa-dDR){$4A8lSjD6i6aX&^63P!PTtAgqO7Oa%OAD!XL8s8=}R5EQr%kZ z8tUdYGTCVU-{M4=BhqMbC8~zznpDOFMS>=l_fFnmy>b+a*;YP?m=GC z>iNmCa?dYcRLCEdF`tYW5EeLZ5td*XR$=oZ^8*G93=Q&B<96*YvRU40p+6qqUCsV9 zUHc1<_kX;l{e{&3*FQCf|AMxKdT)J!`VVWwor7F-9k;Io*}*ojjqDs}jez+9JEEVo zw2Rz>)OxgA!}vE&@@vN#oWmE#-!5=p!WGmhFPA^-zH7bX3)(>3E4hn| z{}=cD!u*2K<`)>-uP~>ea`5x)BmXn}>VI;L-o}Qt|uGP1lTp!K(=C<271b^Nf2OZhT;<|DEkj3kUlS3Q=*zZ7;Z9UgYz{d>K`d$)Rr-5W<(yEf8X058D8K1?U1|s+hJqJh_GqkFNV#le=(G~?r!(-#>`&|Zw~*m|DIV7*Z(Wwty_J< z+XKIvdagXj-}kgJlx0jOKzO;J81>NZN9HP)$OTJ zPQUIN)#SFFzZte4GVkW5_Pn94W7yh9xc*OtH+zQgtmfH&M5FL+;GMJcXt(ft^oFHR zh2JO7;ZJZGS76y;&M@)(1-+5pap<}59q!H<_GK9So1xtvY+a6-5}Wf=+<~rCPJ52= z1nUR>f&NY(Vdh!G7!u)g?x&pJaO_RMukvfR zj-%nkQ=w_1zGe^i&F^=g2swX_pJD*_-;>XiPdyoOMxr177QcjI{x6V~0Y{yW(`yHDcz?jh&3>+){l^~-}3xr?6+8`t|EAvW{dalWVE@7{B^TgX%I zexUySQ2D!e@e}q>?UUFq%ttt|vQOe*;}hZI$=2d*9GN&YnB8$CJ0$;4>eSB%dnbyg z_esS66IBoP94%uLr?bW;+nj7^fV61`P+b4SdTZP<7<|zGIrE zIhQkhQ+`bu%Bb_w!*zSmJh1o7DRt|oUrV%Ps%yKcw>#`{g)05}YI8;Ft^YhCZ0#L; z(0pcHXtuWFG5^sci^tkG^0YZ7j+K$+Co;mv{PQz1!U5MfINrR1ff?Zw?n7ijU;Ev- zhYy|qA~_TLFK2|0MyDGmPlSEvGs181+loW(dDJ)JRB&}0{cYdwuklmt@XZsx{}Xsm z`rpM}DKGxIV{hVpe2kKaWB(wXefb&b8|~@eO`NV8m1rMCULkv75oV1_oZc4o(TUT2 zMkm_OMftr%`?5ia(`UYwXdgu%!oO!$;`E}I67Bt7N}L`OWp?89(O*uqPt2l^PqdHw z<;3YC?k(iq?kzE)O7EtG5Z^dKM;qZxSRt1ra|N! zCVySJ(H!x==a93KUfJq@c6q;;C@<(4ew(}W#5WRYjvsgY7sxXtvslh>`ds99>z6pd z?^*E<#7A>`hW*ofnjg|Le2e?T*@F|6WHoBhfN$gXaR%>>{AS{h$R)n@ujB2Ep5d+K zJ;U~eJ;Q45H(wc)nCJetj{T*?|4IHT*6?3O{wA2Za)S4=-gR^4Bgp^O5p4+RA)SA2 zFZsWZ{l>0#51X^>88;#$Y(10_-UzEgcgMF4(FP~`pcfjHB|DG*FuXJA2jSf*tHS_( z?dGt(ce6)mP>}67+9M3%_YQaaU1fMJe>TTqG28(8PP<@C)UVRDKgQ9s@Ne*a6m9Ds zim(1K%%_*0i}p_8od4gr+tu5@PFCmqFf?d`G_a$z-`8$&@4p$3Ym@vI8L!c_*FQ4n z*i$O339YwZ3(31{L)-n|2su4oNUYC%A+e#`3yIe=UP$Eje<87P;0uXOysj_0^c)QEPt3A8^_^F=jp=}r*FtJ3xxYr zxMS*s^Y&-J`_=Y_^n8`#tich)chF4!F+N)SD~UfP+oHb;tX-Qiri1^^{X5a`VhF!S z{(02T8V88-mT>LgP5fol73A&LzMJ@~sCx&+3q4Ep^KGNe{>ao!eYwm~$DR{yI}I~K z==%%OM1ON*hO@!lk-yE;X`Ri^Gq$I(;LOm3Ml>L&^{KF4TVO-q^zizrr?kH^!p6(y zMV)`j{3q@i(!aLF8}{3NGqZcx*1LPyZolrg>b{kD`_rewj>X-zExY@tZuju+QstHP z3%dt)5ASVsF1=mfIWN0=_&{6lL+9^x{yyioPjal6Ii?Gr3J0~7J|-J7o(zYo(><^8 zU$P<7{QULY(oIR8J;LW|FS?KNV#kJ>A27DN`Gws>^(|qJYKw309%_dVOVka1%KX-+ z!VxqK{!(Zh+C4O|Pc&;2wPX%Vv|g4jMt>=^A;+`!_rCTX^#5vQ1JiNN#{NCRro0~h z1KcBQ>F5!*4jr0ED6_tfU&PCpfnUXr@~6Vi$rM%Xhp zK7U5|fc!Ainu@;W!m0cAlOOd^FBbO*2Op>>@2DrcsTb9a1;Q1QpE$3GEaomDOZ%%= zxAyS-aerq20)EgVRCVhSs@K|Id8K;V`L&CCm^-PS7XFCy8_33vp@}Ab&1gA0G|`&f z!}^>a*0J^ouaC+I2Uq!za`n3K1J84aUNA`;hyFKsb4_}30%259EV;-$|styKkS{@sxg$ zbnkt?i7nWQw>I_&6P>%QLq6EvBfQi2oy5B@e=D&g?>mW|C%zQkv+u|IE5DW4bs{tD zK074w0r!U&zn$1a=B<7*{AcI>8umW;Qdmpwz~AD4XFG%BdcJFn7KysL2va2 zy#+^c94FC^GdPC}xP&XXifhp1&$)@)xQqMHZOGZMcU?#)58RoNICybJ;^XTx68U#l zhC`pNwD;|+p>X%B;gi-^%@2Mx6c1l(d}J;A>Do{>aIL=oTJt~GnxD4T`jWMwdh$B< zjdkqH>)3bJvG1%4^^4bqBWRetE;OQP_&WJ-U1%A+F0>-qe_d$9JL>ZB>g7Ksrz+zN zt0>FN&r()??7Nkr5^2AnszU3~RiUMBR^oq*mKR^J6zkjka0(aEfzNOU576T~+8tPl zjo6I>)ZsjGR=*b3uYWCU!0Y>73%T203mdCn3!AVRTRL6~Tk!_oeDGS>_UUV3`)99( zx5&3IzZP~BycTwrzZTvh-)(&@>>_t_zqfQ6@<$ z`{^Hjx-J|TxGo$Vx-NW-{Oon%5Lw{7Lh_Tj>q61Ob)i^XC1k1hT1J+0SCEyCR}Ecf z{e|~W);PbGtn;4hXa80>a(-oKc(6J&USAoS?yU^X7gvWCdMp3r*_EN~;z}9e)v!MM z)v#gmtKs!mUJbb~zZy19eKl;lw<2s_`>OsAy9W0gvtRWe^dE$6!foI8gYZ_heR~G} zAne%rpAtKVzZ%|I|7v*m^sCm3zZ!O5e^owsHN1azc4E)LS3};y?h}|S z&%PS=&Hq8zzy3cZK05zuIH3P=aQpWYACLXdiToa~hC_p2_59M`*6$}i$@_kysPX%W z;+3z4lDWSXF8W@V(R+k6gUOhVnV65YSdTpHLjmeAePqa4hypa?7*60k@>V_-?vg`( zQ8|Wbn1gv(fDJc(5ME#K!?1>)gH0=b7&f>3FqF|7a1PKvN7saR$q$i__6d^{ zr^Dn#`~82X-|_DgrTmWIGdw_V`Qj~nfD#-oTN7#~tTC3$o`T=S@1u6fnoyU%MuvDY ztl#%!*g(EsY%YAG_AfSR_;1FR>raNQc;lY_FSgOQ<1J(8Z)1ms`cAx~|N1U=&F<#E z7yg4me;<2R>;K{d`iI!-*gov1e}n^#=6d7fQ~JL+bn)TZr9%2AD7t5i0wwfP#5GIh zsGwJ(s*!yahbJ4~M(u3&Rn*ht`pU=aGwID}ajX?ddK>=GyZ90Q1n>MZiNC;4@Ynb$ z{vMy>%imSz;Cpx#KfnfT!drL;^=QJ6@F(~){1|_WXUC{}@D2Q1Y{DBTK@)x(KgJVd z*;Vi#@PFYq@B`QbCTA1gz{e=WZ{v6IhxieGjGy4I@l*UgKF9v=NekG&v1j-T@+bHy za@f$cBi{@UVkp2mZ?in9N z3B43$j+LW=UWuwk7f7f`UD8-?D+H>>MLm@uF z0q%nvwfDAb?~!}wY7Z{d9$czDNbcdzdqsP3s`eteYoPWd|Mwh!pWMN{GedhbQ+t!# zHd=nfTX-8=uoZ9MP2^%DHeoYzupS%mI==a@q-lH)zkye=4jZruhf$9t{t!RHPx1Ho z9N!oha{e{`1O5~K3*Nw6*pJ`F@8i$#xA+{1@zOY+!dLMuzJV9>3@yZagRMZ{XYbWn^o+EL0aQ#?T?2 zXI>)Y#fGx^MeFUQu;J8M8W`7XB!@&soc& zQT?2B@W(McJ;l|u@Tbw;Ap9ImNeh1-!%w2$7KZ+p(LILF-XGU?|5bG3Z_rtr9{!H} z2mB0OOVUG{clIQ@W~GO&Y3boh^snMsw0e!5chkdflcTwxgQ1R`&b{fO`_Pc{-%x>g z-k*@^-=t@yhv8&SkAI)oG~l;V?`nhhn4FdfFY?dpt=`J?jmx}K4dv~N)LF+8;mtLq zvBCYr)gj&4FxW5h=Bo3^{WH~p(aoN`lT1!ZgpV2$;opjHU#;U))qzzH!{>1)bNLms z^=?aq|0>L(9LKnSjegL%8(h2cDkiyvt8|PUi&IUgW%TaWp!Q9>c_EkKaD8*g3PDe_WV6;bUCV!-iel z+s)&oH;3T8n?q?lY5#KbCb&1uV0%xOf5lfLkL6Xf{V!8Td)_1CggazOmqOE~vl-p6?x-PdC8z&*ro8lN$i1%(IP;|}2+>y)139{8O)oSiz7ojQ%F z@?^U`DMz{A_J`->+3T^(eRs`}|CeD1h6yXq{jI`!=G6Byex5qNz%|D3^ZxVL!{T@7 zx-YD9DL#LEr#rm=xV~3+J9T^y_x^!#I=p#vL%*GRcd;CcX(y(k3dg&SibH%G`iXm- zYj_8F%H0o?b9pgOt}s_wxQ~WZ0E zzZLIAIQNswZ^JNiQhFuAxA||$a?ao&eTDD??yh0lKCaU>!n@w$`YT<(ZwT)w7k8iW z-bZqu4dE?elPAqzTx73L<=h6x_rENj;pPQ6wjuZ7cSv@3UG67_xo^+5|1MVwd`^;gHyhhrp0V=Ts_>rf(OM>%9*c$us|5kiLWHRR#T`u?Y^ z(K=>rmTT0Lac_qPG@oX}MyoI_XcImuY!ByVVk+jNH~ONh&#JVpFe$C$!ql{maYc#F zIjhq;Yp11^J64T4G$QqXPvdeMF022S`u60;<-UD4^Wn6$whU$e9LxTnoe|#D2HVEY zwp}~yE&AKi(~df2eXIJPe0QpSre{h6=EJ;4zR$g9rSxFF%Ln9#+9a8x3$YkW zu^cP08f&p08?hDJu@k!y%iKJ2vAAy1my-Lq5266wxHGVGMf|tcuOxcVJ1=}Ak@Rp-M0n;oKJg9ekR*kT9~W2_{BgcwpWVnt09o2ZFmi>Ij3q7sdY6;El0%K!rm z_d5*CFgFGm?zfp?pb3mMHrT|96Tk+i-CN4Zs{d9_u6RwE zs~S?~$_&cM+*ak}CHYDS=L>~1BmK&m?L~@#V?H3hGhwyLnGZftvOiKJ75gQO#kqYm zav74>U2_ePS-lgbBW9uFIVO+S;_e?Q%+ue zt+JmmTbFXatv^!EY+j@6zCbpCmSUuy`nCl5SC0PzK7fkARZ_{i?W%Lkze5ewLfr}K zKxl|vC5^}?=JgHGoJn0Dx{9)zF$!pmV~hd}n44fN_n@}t%4yG(ow!IUBY4N58ow&2 z4P}f2bmzFn(-%n*vT*St(f6XOg}u~qk(9W&H{3Pz+#50*f9(nG5&wK--i*3;g1Uzq z@1~b`6ug*6VVtsH40OK=&+V1gK*x zM%t8S9b+u`tKWySlry%1UHv}XM)K_l>Gvjl4DUezZav7qLpbJ3p&7FVhJY$fa{=;z zs!-F8q}ib0H1_cHhP z&ZTn?U$gY&Up6h}`(I1vR{eG9+--kcdh)i6yBSrE>#COXqH>U3&6YtxHeVVcr}sb9*|Mp8UaY!hDvd4 zsOT}5q^^+C>1(7+`+X@-WWV!-%cYVz+bU!=_8MeuCg0#g)?;r#Hl}``@7`W6255#B zFdn{KOwbBuu(-Y_R!f9P?s{vY~MS^p0M zDXjm8p;+dBVI-LK|KO$__`m7Le??mSH+%!Kv@Vi&;aA`J2(joun6bD zefp2Gn)cUbxCi2hZ#O&yPr%EN1Jz)MVK@fwz(?>Ud<)AXct7Decp0t(O$g(EkiCKN zKgf+`{15Ul=R*O0Iw(wK{11vV82^J3%%xCw*H`O)h9ncwz?lkzn z;}J*Mm_Qks_^-W1`;#dfvH$z~BAJw}yJ-J_fBj!3`q2l_7vvh$JybQ^zhgDe*b<&C zp3!!2@|<=;w~2KQD7u-_zbD3&?}>T-d(z4?X+8Blv4EXt%a(MBxCl2wC&hzw6W)s) zbp=TjagSl|#ojX$#M*`+>BByV9Ke4VIfQ*X;qHc?LLZDmIhf!$WX%Urm<37BdG;s) zE#_RvC(gXJSMzQKNgXsY$KSwwz9AzB9m^nTNe!Yu86@R8_U_0ElFIrZ=4hCc#9oE0 z%?y$nzCwho3+Ne4?6pzLOyZ4!~K$Zp8goxEt2N1MoQf8lHz5I08q(27iQE z7=WvJe=dd3F-IW(iu?y`!F(I+h6mwMcmaL`dC&}2=!4(GG<*U}xTeb>1lGY;_z^q+ z6O;uX2T(UGV?GF0UPL(ndw@qylM9V-l5li2HLD4G1N;c?gA{lP@}U8QI4*~4ArfwZ z$1mZ%g~Pi8V0|=Q1`sCnRYyNjR%=P{j{ENXTDe|7a{+` zqxA>E=BURI{$BV8@m`9(3T`A!Abdc$FW}F(p@yMZO}ISdKIA_MZ$p+}M_v);L)_Ox z6Fdy7IsPl$PhnmKE$}H^1xKM3zJTw;F|fgZ!+AheKvRfJLM{c=7Bum&3%0@4@OR?= ziZGFwm%~Tc-@;x3$sDs8x69#E+^2CXho9r_L_Ur@dG{5xo#F2*<>Ux_f!PSp!YfzE zncoI6W&kf?-UDs$7a+~ggsl{&!ZduIM{^nRq1vK(7c**FnzfiO#_uWQa^%a%t;n09 z9rF;N%0jg6Eo6+-HN*ne><{`@QxVPk#l?pvJ<-#_b&Xqkv(aA-#wl0yJzx!cjN%} zLEMLe<75~{qT<8_Zt%cpFyEbrF&GC8-*?aA-m|&)oS>bOyLzW+Lw8CZG9P!Z(8|06rFcjE5EzYE!oy=RcIQI7YoFHdBB(>}&VlhOA_4r3olr;nA14m8q( zeH3>u{$t2-?49JD(@J_eNl#=OcE4TF7f_0{M|hejtt5P0PqH^4g3XHISZG z(v#!Ikt5h$TGCrjdLsvB$q(Fz@E=C@VDI&jUsL24viQIb(WmW@5@c!S4k<|3fxg8K zDMS`w&&6Gfe;zU)yJp!AL0?m{kvZ6#lDCWD6wlK<&l9o^dws%o<`1_^BeKf1o%zG< z%pY!N{&2gLaa=jaS0F2w#L90egWiSr;VisN`4moh$&j>$ZnEY<%m(D|!3&d=ZGWL| z`cLYjHz5%FCb$zGhgYE-{zUzD76J(y4l!^y{1gsC7F5AEfEt|UYFH23AOY@!hv4Us z3f0gI$z0?9%cv7z2g%WSDRs>EI1clz(8TyDV?DyWptQir<+NjPf0cgMIqJYxpWcPH*I&=)*Syn5m-B+Og**Wh;q_bTe*SFz_|zlM0e$F+-wTzirN8O>=<9wVUdtKgb-t8w%e;)p2jXHg5jPv-c-XAT@>>zBEEn7B z%$KtW;zQ!Un0r{p`JD{XbTW77{Ai_geR_p-|2$aQ&R!~x9|TJK->;O8f2@*gxTb5l zha0*64fIbBLnPP#BG;Eq9B<I-|L4_o;XE5T&tq^U;cf++dOci66XVzI*r&Lz zqtL+dH)4O3^K=pBY2vz^YkdTHg0#Pb`2Le~Msuz{NFT?E(_9aYgo;>&O{6VGbGCUK8#oa;CE z-;Ud(a8S+vz0ChFVXi5e@&A3y|0gp3pU(V$CiDNu0_-~63k^S%q9o@3cQOADC8S{~ zlpSS^5GrOE|A#8f)ljp@7$MYQu7?Kv8lefZ0h(RtKY?)7Pf^KSx%_(LaY;>@`pyO8*`j)9BxW zzki>F?o{f(`uADHuPVmoaK3Ef*K+>cUd9Ko=OK0Y7vNq*_(G&UgMBFu1u=FL#JZUv zxskMajkJ4>eEAHnA)Q)rdy;evBHcHVW(&wu@Hf(bCF%b);kIJFh;XN{e@57L()Duk zEQN3}ylXm`#_t67aN_(Jc5~d5@HG4yzTucJkrkX{KXF`&eH+|I-rNuS!3`U6dkvz9 zYagUS1o;{ThaiG;#ltSpLOHy|^%ZeF{osM}zps!#B7eg*b;B$?LjG$u@cu(~Ebl+$ zV%9=l67N40%-kTlG~WLW5mJ=G`wx1!mV4g;Wg+NmAS+f=rXt^m>KNYtINpC`T@dd- zehs0#|Hvlnh6UdL*&F13(!{uIiA0kItxJ~3e&qdNB~Dx55_tskqwq4c6JH1Mbw>Vc zjK7Qcx{0rcYwJx%|1p#DA31=15ceVchmj+Mcg0ZtbG!#RI%1 zDfY2Jj#Xca9vo|vyA*cYkikXC45Ep#L2tV7moua#!qwbBAj*bUe#ma%?#fpt`Y zyes-xsp-9zekJ`3=E$p;{6Gr5j7!EdPN`*_(!jVR=F(k^Qyz^KJ@cK}t66VMJ*!!! zNER}0ih8}5dLFyh$9unsz6=B6y~v(9)=Z#KJ`_wj5JEYC-4nxm53UHx0Z_j=J_hPH z$Vb7zU_YP1 zr##ToV9w3DUWzamf^LwxY$(NC0($(5;Z5>t2k-ywupfR6Z-T-*`7~U?dwC-~3Xg#e z+Ckxcd=IYW{oDpQyjx|T@t$+6i@L(gJDRD!r*HtuVJ+`!90c=zu7xF_raj1k2XoPM!19$QMJ^(+390=rH4uwZ~|Mo*3T*~7r&<>+HHjfo8~1&wW6rG}8X|unjl|a!_X9CFc>js71&pUcS^q@+e}MWwllmWN z#cn$nDt5}5HWTCD@c;S#9r1KIsY9%cf2V{Jfvd))hmT=yifluOS`nb+}E+;P`3pT+c@Dw};1~>vE;DvVRg$o?J6nP0~ z;>rIg%85ksKY{#DCjXIn*z?y?t|YG!9r@y)|2RbXpU(WpKJq__{(2JmPgz?0zoi)CHVoT;S8LFFW>`Mc@^ml3z+{3bOALQ^wP56Tg*AktLF}~KPqAJdeIk# zf^zhUp%99o_#E@^Py(e;R*wFEHTwSy5>ygTm6rK;1`cYV7IU48`F92m8i=>Cm-%<^sC9{dUH4;;^$uVmJK zC9yv+?%lZeM6thb4Ey`WvHmQUy?-Lu-*+wh3&Jqwkq~t2LfM;WHS5oUS$`J9`m;cE z@y?+`M7^R3B>#hW|3k_D5b}RUM2mh=-Zc9Wp~F=`I33{%&kO4x*kAE9`*=|Il$>Hs z>m2oxf%U)j=zclT|8=1MJ4n6ME7VgWwV3O4=%DG*S>Ku=VoBYjSkv|>wiMR6qW9F6#G21!`oQ}{I`*>9NFv|I*~Ry9;&}gKdH*-; zQFO zi*t;Ce?J<^HTB!$qp9ra$u&F#w*keH=5DwTcEHR{dzE{T4?+g_HOKQEwSuw*!XXqY z<3-jZZ-SLjn?%}^R@KO=WYYW)X`f0S93U^C1)5XHgQy3TrZldJfhZTa!2_ePm-CJ3 z1LR}Q*E37oATdDt643WgW<3CM5c?4FbNt&()a~=}((a_Lw^FBjsoRlV*t?NN2W^A$ zc;*go6ZQKF7Wkj>{-tIBehWUBf-m6`o|~&+E5t%P+zEUhUh^nuoV@?k zr`cZKe`GFp?KI{8EagA4V4nJaf%5-6^*^#0yB_xv{7aE#gfAbY{O9;eWEJ-6edzxs zqyLAjOGE#UI-?;I{Xb+Ab_4Fs__rX9PWq68=>K_XOCl}Ut<%hZhcKQ;z226F{=-_< zzs#Zk2b~*O|8k1HBmUjcQ%(QhK>fdj^)Kbr|DmjZS;qPo7xh1k#IpWHueM=X{}Rl4 zmn_~r7|Wpj54og488kr$ybe?FCHenP_!VYecTEvcJZM^g*ICmKsAOoqfdHPpba)kV z;S_uZS8@Cn*bcYDUh*S|_^yB(!2~uKgR{gF$T2$j0b&2dv$m7)oA7%aCg3jIpTNBZ z`XHER0u?UJ2GH}YRlo^&2hM|ndn(}$!Z3`&5#snA)IcLx;z;iV(mN4-1EdkVDT4fn zBtMXyndn>M-i3cRvJJcA0PV3f@&h?Q_(31-ttrwQ*@L||g8bn4e&pzR`U<%J&whm? z*j=Z{k9o!vkh#m2i#BMv`|eI5-5d&4a-Hhdbt!5 zwg_1jw_K`;zlQVIBFmO6|JQmfWF<6lJ_GK}__rYIu-BjBdF1?!NKI;hWI^_!0Lq5| z$u$IsRvRFBlz;ii0u<48mYo5J3AkKq{fCH-P#tfblQp zf1CkQR~;brOjb9fGXDcj8O;AcGx~2WtbsNLG5-^aZbb<5KMCkoEMxvB5&fSS=6?oR z;}FdJ541x^4C}2T1H`!`fV~L=q&t-PpDE^l(wP5AXMa258lZ1Di2D$782bqExZ;@q zL3*%{;_k(N3^|Tnvq1ajJbL=b9O|#!dFp@qe|eA(1yS@Xp%99oSWEk7HT}#``kBbG zG__w!`v><*!*}f;>^0bHy|jOj_1GJbjrcbq4cMEJEn4c_OzPe&o;NUq1*~8LJG9Ym zacFto^gM6KP6N-QiRTfzp(ma85A@Mq>esUVe`=WwF0lUpJbl@ty#I?lZ{Vh`@qmB) z&$m?G52k#E+@^2kdft^ELJB+yFY}&cLp3x&0x;#Rc?iFIct6eHg-Iw&r~Sh{R8Y@W zLe-LgQ2t$@{JTK;cY*Tng4Cy8kcNy4^kpwd6J`T6CtZ*hFj_B&$$5b`$OY=h3)GJn z#ERL5n;qF!e}OTy3(|i2f^^JZkj{k*;yibOJ^U`Pf9D11(O;0>>I>3Gn0^@0T@b!6 zPx*I2hP@YLM0?mT=6x{>td*-yd@M*7pH z!AqoT3GGbiq@C%6_Bh(15wuH@ju_gRnY1%G-v3=|H6M<$55oc4rQ}C1_`ieA@uM8; z1vl+d4~%fU3ws`I)_ll~WSt%|JBa6ycs0R1m&98_JGT^y6M62Y>B}ER4;>0H>kjdp zA#1sZs>EedOLtFtO&)#= z3eQ3ocwhj2NPb@pYhXY81d`z>tl>GxNB$>RpabG?+XJt_Zy*osFbWfZih|~AI0N@m zE@wh6e2+3d1g-$Kl+#2Yqu^JN1C{VoNQD-#!DDa_^}+q{H_YF_4156VaQgxDz(MY- z7uw-{_!F$)-U6W%9t90`VHRXV4&)lB|FzWrI_iHYpib9Ow-+Ld^wj^=)c=GjL6%}K zAdZ{4|BCa}{|nUr^VI+N)exo@S?8et2MsR9|DXx8fi^<3mHt0rj0YJ1Py1i`|G3*{ zX>*^V?QP}#ck=!tJH5RB$ba|$XQ^*aQQuIW_ag_0e{i3=zvOrQ|7qTT!nlzh;uxL! zZv3Bk#s`PZbrJ0F~~pA*4alo)IlTskbB+_MmPqa!sqZe2qG<3z>9DgHmm6cwa@}RJV(7e zPkl)BJ1_&tLF_}w;bg{t4>10d#yBrh{TBEr^3NQ%gX>5I%`%r{&7qqPIgkr&+*?27 zpGLQR2L1cRUMXDYMaR9D_HVD~7u5P69ryF-#^YZO75Fz1UzM&xsXmAP2VrWV4!`>8 zK53ZklSbMNO)lzxXvW-Cr<~PN?h@`7*azw((W-YLH{L`_77$o zZg$!yZN%w-cFv)G$Dnfq`Y+HGf&L3&ds1lsKwlc|pH$jE>9l_`Y5yFe{eyXA7wsQ# zbFH3u+CRy(e=v{Xrs}_Fdg&iRHvQHd7yUyk{X_b$dDI>Gm<#HuXP~f}dZ(N*V?ATX z)ITM(GfJuJ$_A-tIJSZ~Dlu2l->rrkj;$q*Is;?Z8@`+WX3o4Rj`{D?%zrb#-7?4g zH)Haq80NoYng7PmLKrL9BANf@xHiV`9U;trBRhiG7bl3djTx*3OlNKE>a)_5dRBUI z?<1am!VNI4Pq`pNk&Hz|(7z91>|u`Qf1c<66wm+S5_y;BY8J*9@)SGsU>{=s1pdJL zIRpK?>vKFBnUpEthVxFotyPYFpjG~fIco#&@O9KJpodY?#tUQI^LYIzy1>iC1#TVf zEf^pT2B&*vXtq~|3DeWtC%vRypO1C|>DEFVlQ3}uWfGi&g6l=2(oEa|WeqdQ_?jUo$r&anT*4C^0eSpP7?KASVrP57R`8R=a;!}C8Q{m%F3|Icv$?=k=X z7a5+Nm621k;#!y$_xV}s|5+L3<9}Z3Ec$A*a+YT#``mky1OH8aeDo>pw%~W^_q|y{RL(xgzIuDyQvPm$G=BAGDZd)Mo2`Eq!%G1& z7RtEa&uI&@{=Jw5WBSejInQz4g+irm?YKB1$E6)Q;>Iao$HlpCT)Os-Grl}7#*j^Sw4 z7yZm}=4XD7e&#siGvhKe-%r0`oc^Vk{-sw6QAjR|@JcZX*LpAe>qU%7scyL_rzvxS z0wvq3jt8(WS=J?@g}i$9r*tltx)iU}Lql31^UDUMiHQM2&?tKqu)gL{pp>m1lk(se zr6Q(9sSIjSsshKP8fp%Cr53VB0;HCD*ELBRK8!BYz34Krz$=$D>WO2oOU{2WP*h1U z@+|)DVETO5k&YkH@9#v{2}VX}XOf>aEE;29V7XEa`wH=q=QS)Q>*o0H;}!-RAR2ap z_V0Wf5}B`|&GlPFk}y9BFG3A?V%ZZN*#bHkCok3f^uj3ReD!ZbrD}4G)V|K1)9B2V zhcPDqHf`~DnL8Hd+CI2Kn$Ur3p;6V0-h2Jo5UKm>`_lOLtE53z(LbUc4jQhnllZ4D zrA@J%zCjRUP4G0l45&?Leg(gQY$%6XXaEC*@&5b{`40R8{t1^c&%}_Mh9No4)%Zsv z8HUsRiT23~#+ctjj>t{oD&8dSf{o&)-pOmaN{+okIb0Vh+Mit|!??Wx1%e*JO{-)+ z09_f1?f-&nsJ?H5hu|eBg6-VOEpQv$0e8c_V4>}|2icm=JR@yDBkdOxj2?1}w|q#( zkmJ@N896s1u5`D!>)#X)av;SmgK2IV%5c-3a7#~!oBGzx^X!)XWp2I!>lWvsA>J3a zbQ7-a#0c{hZsu!-n6Ge)0X8*iXeRe_zY9km_bUsYWT|I?7|*w71-hZ@JOOdQ)32CgWLd+BXCuH zE*YHl$dGSXhEI6NPY?6I!{SDIraa95dc=$S*ijE_=vfe1Z;`Bs0wvpFVg9R4e#&#J z^;#s)K>fG6Kq)Y26`je#{I^Alsx4Bywm{KG6(}Xh(s>K@pH0dWEmEUeomO0nG@*$ zSf!4z_2(SYfNbkj`CN#mV(`#hsAuL7mc*nXB~1Tct036l$m#|5%zFm5d<7*3*u~RnM z3_68vKQtb_O%o=D-wDBHMWLK^%mN2 zHffKr(WkMIc9fy9HmQlRNp+BocRgRJtS?k5g5Q#I)&D@A(&DtCE7{JtB6AdxJVQJ$ z4bDQP-dm{DA@ehBlE?Y9yEx}Eo8)+HlD#ZX$y#iull_*IF0@NYf=%>C3zcH7v*=u* zQpovq={lt#%|>Npm#npR$&R%%?q-+Vy>_YNTzR|fd_RG<|J^!esz9d{I{qj{33e%t zw~OA?fu50FN~=52|NNsQtYiGAo@*zbs(L&9KfBbNuuJV6ah|6iaqIPB7xR-`nY zwln`zq%_ai(c$Y5V@AGWinOzy;E!U~7AcmpJ%aCMguj9m+KqL zr2W58tZ2L%B`eD$*)9|Fe7S%Jy<2K@IZ7>Vbq@0Xlvx_)%+h$mO#ja;hQ(r~*=1&2zEm-Wn92WArIquU z>$}na$Wg3uX4b#`N$l07N?T~Q;#g~zcEWY6Hls6GsyKT~l`dpA*VPkmVm!etebe3O zf14@)%`!OGP5;j6SN!YW@%n$ z%;QDm`C1VJr2`+Ahhmc1rfHcWKXcO0KC>w3GpP$ozDp6r{c@Iv?fNdGvRpUSs?c z{T&8=OBlp0Jt~y{Cdo=%CE05k|D79Aa%0;?3wgm_C4cWK#`hWjpB_;Pr+EJ(JxX!P zD$&pAm69~Re{*1!lw~mg?;24mc6pV`1dmb`v`VTOL;Q`) zR?*(|DhBO{(wyN@T680dG0Ur%GDehEHo-Diuj2jh602^N*yBqYO-Wl)?E`GE_aP49~5i{9h$5a33~_$GYlY z`hOuK%Gil^^#33MT|n^0TqUFNOq#ydA z7kU_@>rNU`x)K?aOT7x+B*y3xuVSp$DUSH7q%HAM=^~F#kcZaHtHc6kQx{`B*@`JM zTQMSAV!Na{ri(sn7j45*rLjI+X$XE#>Z7`(?m&rBJJ>2U%ev61%vP!hSJ|7bRHS#& z2gp*&BD+{K*Ci#1U98FIl48OYdAp==S&5=sukTcMQmrciT$e^#3Eb8`VU?-lz_5mpJXQoOU}Mv$=whj+S6Vok7p!* z^_WtS87#VlVDujYq-e&g6bE&So?}W5G5=5avRxxe`Jk8Qe@v-#d6lZYBTBWytJLgT zF0~w2=jf37j9}*f8J9^6=J^j0L)w_qjJ*ZrBcmf&Owj5I7Bg6M0b)(+5SuGN?AkG< z%{!(z48hWV%B6Ia2TSL>GOr&7W=533`e52W0Wuuv zRYsPzp>Hv!xQW}dg!aERnDLol*1rVE_!6(8S>odR7voAc6EZnzD`@{MW87<0$&2Wc z{EQWpAB=w|vj655XEyX}LRYU#={~nYdd{Jr7Bi~!MLOvJj4K0KD`YU1_P>6G3~N`& z$PDArA)|`>v`g`v8dpZ0D`@|%VEmKu>cCOO73pM+fKxmPE5*Ict$61M!`#9+$Bfa2 z?hReZ7`sR5Pg_a9V5JOZtfU;U$?){JqUu;U(pR!ZZKZT*9n|y8P3+O6BRyT+B8LARo-Ux|(?d!xpTD}@W}zcfQ0O}mv+7vCt$S}9ph zCcEa{N=^uQJj?z|=SG#>;CA}9g-V|47uP9SDOX5#JoD}cu8`aikD}#y$qS+UKh5*M z)}!cV#*{*oAc~j(*Kgn*JWc&H)lU6Uq?8$olyYm4QbAr;Le-3eYjjA>qC;w-uA1_{ zyhv#nEK(ZvwEuPOVt{7)ge_oP>kt#PX10qtgZ6)7kz!3RLjSW}?9jHsLHm!i4Ro;n zoBm%yyEx;El&%9sN_TRR(nDR<3w`nAKMWjl$RG?ww#)E_BFgY0#kH(RadXce7|n2q z7siqtG7i}kT4|IMhd>Ww+*^MAI&_*D^TPf*_9EAH;=rt1yiQ80*D)@+PI}ewy6ez; zTPMMoT~I!^PRyUvr}M3os&ngP4!3&ZX)IqSi>i5cot#Iu1V&0A<(WZ0O}~Gg7zh)O z-L$Yy5|H+gb&UUBCux}5=dY6tWLxMuab#R4U8~ngH^+1aUMEiIi(ALI7-M!(ToZ-t z(7tstoUo2`SjRf>b>dE1$9;1@gmGaX(?v=(=1~?s{)KzfxFRGgGhDK>!l@g=CD$1t z+L;K+bA?NOdAJl*hqK35gw#+c6!k_(aaM%rp~Mg_r5sa6xboBCQZX1Vl_$cbYB8Mt zU%1rFhD+^KxYS{<$KHTnV|}`4_1rKI=Rl+1MEhxzWII{^>fOZr-zMh&HZlLRN%ChmNx`X2qMP5u z{!g1^k?>2X=axYbtcJ33zAXq9PzhDNo1_|Q1~*auZDDVkEmFU43;RE8k;afM(p0~N z=YI?D_GW3>wVD3kW-%SzO#hF1iUnhqsm)?#BYWGa&5ZwSmbSqtak!$SeLhM$dK;wk zM2$FmcSzUZ4hiA>J@Hkve`{#}Hqid9q5WG!`?rDiZw>9=8rr`#w0~=8|JKm{t)cx} zL;JUe_HPaC-x~3$>7N-Utyy8Rf%q)W^%9AU1$%jzBqJT?*7N;^^_+jbbY`s=Cv+LY zq?==UTw&6CI!yWo!z6{U1B+qI_k}Uv7be3~VUmEG3wt8cQy<1$1MeE7Lk4JK*Gty< zFv(sLCOIKtl7^c$X}#pdhA|HlCIzd*(DPYO`md+`x}I^A_4Gg2qkk4Ar5sa6xboC6 zsfY=q{t1(+j4-L*7l!^-81rvoQir`Bdjo!rAz_rQ8^y5f9lpP?QCdLb+$mXuJLyC1 zr2otQ7udC~*U^7x|BEU1zgS@Zi}UP%QU5w)C+t_SL^aKjv*|LKxx@4JnVGPDW{kUOP40=~_|I)ki>tQ+2%VJI>|G+f+ zpP>K1{Cgo3F&|&NcvSQm=szT(|3H0LmU@)7DD(eu)KN(P{J)R+|K$IN{xAEEG$)~t z2}TF{rqJp_{|_vftzer${}0+uqi+iBi|GHHNB;*MA1As#UC8dV3F$!xuowRQ{cOyG zQydz-R{gKQM-Q95h1vlV>gys^R0)luyV(@WKiB3O>jE zEC`QG09*q9z9?XE8J7p~;1S&nQQb zfygFg9T=ep2Ea-9N#rp&4sSy*ZkG{$5c%KOzlDFoH1*HbxW5-ef01*!@c$9^Al!Fi zzYDy${S^5Ow6Fb&@BXsyhn0OlraqCbgtO>Xvd_w{vy4%EC9MZO6f^IJ(Z#+WV5?>y z5Ci*y5Z;P(2i_vj5zh$H1;50c4?fOu82V<}=VKZB)FhsfUf!vp%(INKd_)_HeQHu~ zkx;I!l<;P-!!Z0FCg69FJ@~Es0jW99d&;|b67whUPq>(VaD$h9WzJ7Y9kQPLXij1u z5NHAeRAjL)P3md-sUJysFZk`-!QbDW`6!C z`uQ__pTgmjP9OUDQ+%HS*}cg8J`;YXIKFRzH23<%a_%y*f(`6Ym&*Q#(2&9Yh|q-D z0L`L&4%}%$u>V#Ub{!m}i^>vmXAh^|R>Rb7S-ZeK8OUnv?TjsS zB%tSmbi|<7lgyX`_D-a8A9_K_uZejHdP4Y_IL3zA?t51(==@m09m^i-Faj0$fHr?@9=V>*1F?BbOp?h1+4C zXX<|BuW;Xnd;zjB=fTUE3n3fM5v~N;3=Lp|37)+nWD#MHAght@AistN%$G8ja2d2< zw!n>;cfbO1-HKd9?ngcXPr?gu9=AfIgYY(F7jh*W!Q77=;k_L}dVy^&G|S+3n2*3Q z&@6dhvd&TeK@Q}uCSOULJY+uUTM+R+`Y-QGA!%C##pq<|p@cLn&3r@365nV1n|y}K z`NL9`NxmK+pV$6YYEyYGlF-A5IwB4GcupdDF31A|dC?rja{|T_SFnK{ z+D`CXKs$6mXB5u`bU`=t>?L2J5Bg!?5cvv2FbpGmc`m@c_!seT4Wo&yYv&rqxQ206 z&ePYW-#Ci&($91sMNbY!e7_eLatQZf{0ESO`1k42v%}tVVivs)^zhEn*M&~}oD6>Y z_o@^Izn6;2y{yWAw8Kgap-lOlg z{`&s}&w;`?-9>N(1m8&89{!B`_1Hr&e}MTEdGRI+&3b(!JQC={R;f9f*8y{Kt785TIAEnm*INM zKZFX*n~@F3+diY+PWV2|LlBSIja)^$AcinIAb~Ku;eFh{W{g=gh5qj}`oFV`{~~j- zYZuV}K9Bw{vH-gd_d@)O76PUCDEi+}!rlX=e5bxFVGZ-&Yor2M$y(tmWHt60WNqde zsf+&|<3DQ{|5?NM&l<*m)-e9VH`<{k<-f$}T_Yyn8rr{W(1%#V_|F=#B5m01$hPxq z#DQ$b-hu4I--+zP-i_?x_}&D*Z=bk^eH!?_{rRIZn6!rPAFbj0M{DT+tq~WvF?)RI zf9+c%-sCkhh8*XZOSh5la2bTqzqTcxqy3fY*+ z`ah%rdo!{nc7pLg+W*iBX0U)2Y+#4BIokj5@AZF}ozMl{@V~bIO{_on<%`3SrTf5QGAQj1?9T+h5qH0*#Hi&0awJ*&c~jKxepnQeHghLnT6XU$S2_- zybN00M)9{{?gJfmJq%$!jC(oe*O0%5<4}$LYy7?eg?$Dth4*kTkylpm0XIUfq} z(?Mb2VJYIdC?4eb;W;TmmSQ&&hY4E23>L704eZbcthbbQ=zvaeLKk#H5A;GGuqIOm zU=W627}WY7l}T}f`mLE!@WL3V-EXXZ!cJz_tA7o$R@U;{DdDnCu9FD4Ue=>9cSF<$*sT8fP0^}pi~6@! z{fklmwyA$NtAE?ozgYg%VA1MPYEU(#8c+?V22(?+f#TtI_%X0g;0+0IC)@>h!!Fnj z_rSf72=~EHU=Qqt`+<#vZ+H;)!G3rM9)=`%1XAEJcpRR91Mnm~1;2n)cp9F8XCVy^ z!gKIE9D*0%MR*C);br(GyaE~UD$M0-{Queh{~Z6n*8iX9|IhdT>-_&k{{LeCe~JIU z)c;@R|1bCdSMdMQh#KcdBWi3Pji|?dVNsD`rp{7{x|VKof}R$ZTx>+YgPZ73jF`-89#1nQ~!VJVKnp_D?Kh>c3UJwok0;c|Xlp!+)CJum1O$)c<}p1y1LySNK_v zdS9Ps*Qx(S7WLntUeD)t^}L_=sMq;LcBlGZovZ%WS-voSXP9sF8=BR$`ogRp^+k_* zgcSR*Bk*(^kLJR=-n>M8)6~<9CYjJ8hGwZ8&Z7J8knj-7HZz<8-s%>1Myvc8S`K z({{hpcE3}sM8)D1>vxLvJ8_E=#h-d(2rkxt)9ODL!$0TepHuSBS@K*M*oaiWt_J@x4CrLCx{mBvSPl)I(675ffXn!I^Z)L|W|FQlA zh>nqH|IJ5llW6}fqvH_mzfmg8X#ed|n??I?Dw+!;HvT!vABTE*{baN*)CgRWed^Z=#NbO$5D-PiyEc>O1H|^n}qz@ zx>>eH$<}Dux<$6)7$aM^$<~`?>vq{1D_i{$`0r?|dZ_=dVkG7!AzvASkHLSN|9`vq zZ!|^?<E3QZ}Y-l$1@6!;6yg#+W=NkEOgIkIUnm zo`ZCGJmpz=9P1PEcpATG*dkBJ6SvC~De}a#@*6r{pQ3{Du62d;3L-q)Mv#gQq3+prpPmsjtY>@{BwaFVAp)&k*dH{qoF1 z@{D>ZWZpCUr}2x1E%MAu@(d~YtUUX)JewxZ9+YRFdrF>tMV=+`(}Yk2N{g4Y{gU>O zq&+5SPf1$pvy#RSZ`EH7@Eq0`C5@E(?mt=%%E5R!c&8lPF9#ozgHOxBG&%U9JSWfX zm*?eqHQ}C*m*?-4=dnK|&p$5DzbJ>~1$iOm2^D`_-^plsQC@sOUVKSjl5}}lUQXrb zCslqaugEK@{8A-DUKKtnEtztpRgReDh*gf*G?6-DcsY9MkcakB3mYMWFl84 zv@($=6ZtYxAQL*7D3pmJnJAVCy-bwIM5#=a%S44tRLVq^OjOH6jZDt(V*CL3k4NhS?4*({SSGHH}alT5bCq**2{ zGHI1bn@rkevP~u(GTAPZ9WvP|lTMlJlF4qF?2*Y{nH-SGL75zq$zhpv$)sB*Ju*2e zlU|t|6JL(_a>b_=U!M5##aAG{BJmZAPcOa_@s*0NOnl|ys}x_A_^QRn1YxcC>cm$s zz6S9%imyq02JtnEuSI-D@tMTeDn7ILEaJ0@uT6Xo@wJPuLwud$bBeD^eBI*f5nr$P z`oz~Sz5($Kif>5J(D#jq&m}&$po#Ar6-p>xtJg~8wK93lD6g61wT}No)tx0ra%Jmc z-b1-l(TgS^GieBVk@TVo5I~~u`@ZjcqVIvXn@O51S!HSG6v=uaD-%Fs{}_Zm2UzDy z#$3^QxbM#+0uk=M{$TmO@e zI8{DQmya{$<81jjS3WM3kBjBwQu(-CKCYCHtL5WH`M6m=Zk3PQ<>OBIxLZE%m5=-7 z<3ag&SUy_BY590oK3P(%_)tFmP(HPkPp#!sTlw^%d}=SB zI?AWc@~NwQ>Moyp%BSA)sjqzMFP{d=r@``RsC*hOpGL~3(ei1md>SvGCd#MD@@cAk znl7Ja%BR`#X`y^tET2}&r;YMyvwYerpSH`Vo$_h7eA+9Y_RFV(^69XA`muaEDxZ$a zr<3yOqWm;eei|-6jg+6p%1`6vr|I(3Z24)f{IpnpS}s4WmY>$kPa8$^|4-ZHr=9ZC zZux1y{B&4;`my|URDL=xKb@4HPRmbc<)_Q?(^V0L__?M0+**EaD?fLapL@#Bz2)b= z^7ByndAR&MQhpvUKhKw+7t7CA<>%Y~tJu7;ZDZ5O$!Pi9Q9gH-&)wy7Px;(eJ`a@7 zgXQy3`8-@ckCe|Q7%QL0%jb#md9r+-Dxasz=b7?(wtSu|pXbZxh4Oi^d|oP_m&@nX z@_DU%UN4_F%ID4Ud8_z~_j#v$-YuW^%IE#^`JjA0ET4ZYpO4Dt1Na`TIio`*{&Z z`O;Fpw3aVJ<;!sSGE%;bmM;_K%Vha7Q@+fWFLUL~eEG6azATn6E9J{-`Lb5Ntd}nv z<;!OIvQxh7mM?qd%YONCP`(_NFF%$qN9D_L`EpXeoR%+V<;!LHa#j9g{XbjFf1Liu zdULYdoa{Fz=jA`{o&00SzyG{wPA;31TPOdREB_ewuemb(pAY39BmOm4M*VBf$NX!~ z$N%%HIdN&CIhkrsrvLM_IdN&GIhpnE?Yx=KoPW)g`SOp2=6tC+S#C~N%0E_{^R@RA z({41EHk*?z|C;mdzfS(q@^<+jt9~!Ex)dnU)Rg88~**e z*_`Y(C;QFGL347{oE$s(t-U$vXihqtldk5ZyE*A`^3Udue>Qjgv$^A+L(R!}X=b_k zTidT&&3rb0``|y*Y}%xEa$0`f@~>IqR~UMo^0%7hZ#B!`YG$$3 zEPAV%WV`&j<6kouC(TsNnR}Zv_wO{bcJe;K{`&-$uww~t6Cb=!{IxlndeC}G$(J5en+|Xek1J3+w{$Vy+BTziJZJ$ zZRUO2Xx`77DVjx`HZwhayY>AUa{2w5xt%t1J8d3*=3g@hCx4y2-P+9IteL}^^?lVm z&V}YdZ)gAQ?J4l8{crC3@LP`yzjYbnhrcHL=SVZ*+k@VgdG`KDi#zwP8T@&(Fel9& z%~^9-vrH$=n}5G-)GGun=yI!HYaA-^EQWO?jC4OdgN94?YaoP z{&rt}dnmvEP=0SKzkevdca+~d%kN#~_rCIbfBAi&{61WMpDMpEm*3aQ?;GX!?ehD6 z`Tel`epG%xDZgKr-*3wA_vN2Ilz+CAf3}u?wilm{J{$kpX@Nc-|Jhys*<1eESA0r( zh5vK3{INFv$J(v4iTAU~_p`b8v*q`*mG`r4lkL7=JbXVpa?5G?V`KS`4fEJoc|SXR zJKO9ne{A-ZKei_R*mCFA5Fc^r(D zKMs48>xZr%x$oo$)v21x?mKbcne(&e+%y+6YEI3oh4RN${~wpf<&W##Kdz_W&z8#{ zH~oKHyYbdz?tb{={>)kHANLo|`rglm-_IuA&t~7xj{dmwpgU*q&h8E6e(3Gt_akqU z+|RwAEx(^_yr1p9pB?|#V~*b+dh%a4oHRH5`NKx}vu&=}->q6yt7=mpRJ-a>ovKT9 zs~**>`c%IfP=jhn4XY6~rpDEbYUVw!7SxhjRx4^%ZK`dxtM=5sI#7q|Se=(Y+l{N; z^4l%1eNs)SX=PsR=GAUq?dH|Krq&ftyZN?nDf4dMQRd%n{_W=9ZvO4&-~OXIQkK(x zqE6LW`Llz&gS&&bgQtV1gQvszJ1nom{5s65!@N4oqr*Hp%%j6RI?SWPJUWfH)8jjh zxzqaT9972NY22OGMdzaOxK5ATvwrtXyJx-bnSRgodzQI(qwdO|`~3T!mwnI4spsZ& zLYdd8^?GVvr_5)jJvZh{(_WhP(sY-mzht>=Znum}W4g4QOV8n@WnFp>FD>uVb9iZ) zm!88*&*3HWrR855^CkDC=kd}qFD>)Zy1!(&^t@iqDr2}bhD+<_lIhZOa%ml1dQL7q zCzqa+OXIuroLqWNF1M8DrS*B~ zb#Qs1F4dK~R?L^T$~wBdR}baSEB-6~EB-6~EB-6~EB-6~EB-6~EB-6~EB-6~EB-6~ zEB-6~EB-6~EB-6~EB-6~EB-6~EB-6~EB-6~EB-6~EB-6~EB-6~EB-5f;qX7N_^{8#+f{MY=~ z{MY=~{MY=~{MY=~{MY=~{MY=~{MY=~{MY=~{MY=~{MY=~{MY=~{MY=~{QjW*dChsv zdChsvdChsvdChsvdChsvdChsvdChsvdChsvdChsv>CfSx*PPd!*PPd!*PPd!*PPd! z*PPd!*PPd!*PJ(;H=H+|H=H+|H=H+|H=H+|H=H+|H=H+|H=H+|H=H+|H=H+|H=H+| zH=H+|H=H+|H=H+~=Nr%SjpzBs^L*oZzVSTY@Za#?@Za#?@Za#?@Za#?@Za#?@Za#? z@cZTOKX3SN_;2`c_;2`c_;2`c_;2`c_;2`c_-{FHId3^{Id3^{Id3^{Id3^{Id3_A zto(V)dCPgrdCPgrdCPgrdCPgrdCPgrcguIncguIncguInb<1_jb<1_jb<1_jb<1_j zb<1_jb<1_jb<1_jb<1_jb<1_jb<1_jb<1_jb<1_jb=$Z+ueYApThHsQ=k?a}dh2<; zn9p5>n9p5>n9 zp5@+icF%L~IlE`N_nh5x-Fr>mv)!}Zv)!}Zv)!}Zv)!}Zv)!}Zv)!}Zv)!}Zv)!}Z zv)!}Zd+zS}?mc(+jQ5Pbp!|8ydGC3VFV0_?v;CtYEV0++tV0mD9V0ke9 z2jhP*{s-fKF#ZQ)eK6JsV|_5r2jhG&&IjXsFwO_#d@#-jV|*~42jh7#o(JQ3FrEkF zc`%lT+w!j;hSaDUQ^)@Ob@uf*e)Vf{%)#2ej=Ktkm{$Cwt(=nrF z)j|1JXTKUygXLdct?Gm7RNbmid0dyrb*-rlwd>zsXaB!T?^h1~)n%q#&DqTX znrH8_GT+`+wWEx&*L1z68^f9~&17%+*M|9Qbd-PX&i_A_a<*K){?MXYRhQ~kJ*rpr zseUz}2Gx)nRwHVxssH7xpGSAD&EemduKWJF^w>N9?@I^&zW1qn^sn2V#wY)MX>RYY zOAc56`_k-R_cn8%P?P0ri_x?gNsCdm7(t5>w79QjO3kP_wV;;Nidt8jYFquNj?{@d zRp;tLU8!qz^R)%Ixz0DG(xnzk}Kx?zq-M3{lQ+R+Gx4jS8mHn@6 z&7|(xYwGQWX0a|AQbXNu>TBEje@*DhU$^ZW{@eVTah^F6_W_A6KW&U-E=6z*(Vq1@zX|+3i zZ9P_v#aezpdwZ6!EU)M7_GYg0E}g2grkaJm4dCs{+q3ev=mzMO((0Acdhxa<{(9;z z-Owz{OQ-dwskimkJeBX4x?F0M&1>miJrvfa{yoa(X>V&)ZR&$+R~@ReskbFI!~RRy z=BaP%ep|Z@p{?g@+kmrP)u*hXx9bC6|61j3gKimoTb0echnji=d46|Y@g9DE0soKT z{`G=xCimWI8*S=+;xQM;zIvH^B{t(}X5f1Be7#??mfO5f-`)cMew{S~_Lf+ETWSLt zf4irdbfT%RMrxI|ZobcD(rwY*L)cVX zYFjmnwp_#h>-lu0xxIO$^*v^W@0YAFPgQesGn~Em0Xl2ek9XSpoSW6rpskr^jxKq- zws}*wc`Lpx`_#GjP+RkiG|!K>P1}uXmg4O(V(pH&eZ*~}R@InS)2zQ5v+iH;Mp-iV zW#VPmS34E?+MF_AEwk*`PR}=zO_!2Z`SHJ`z5~lBJizivwZ7WD&P8A)M@$FKV7~Jjw|Ow zz2)2RZuvH*#^=?ta(Qx6ncvh|`8IQ3zRh-)ZyO%7xlz7tAE@*4ZRfar+dC=W4hPD& zlcx67W%+hy`L^x9*|yg`sJc~;vMk&0-)xhAvyJ`D_VqX0)8A}Af3wZ}&Gzy)+sof< zBY(4f{LQxUH`}ke4$C*&h~I1*ezR?;|4@x7Cfjn~Y^Qy*?e)!e);HTu-)!ILNL0h+ zoA1!yeAoTvd+j&hW54{fkhPA#Y<<-RWS>S|M+%HzA{mF0CAZ`VzE?CwyO(|xSW$2Y3So>{f3PSk06 z?Dg1QxAz)LuX*=cey@4=UMchGW9u`YKI81Oj6VLpOJ)9j{QWKJgEC#e`ScGf>!IIt z{l?pGJpDf^%j-Aa{yXJy1J=R7d3hW({=qKQqZkJlmGKQ)hlAF`;GsHF9zSFpL#7>K z8uDBW@eUcwkg*P%-?00Jt#j|Q$B`9fS)-OQHdG$RO+P-WIL6I?!k8!6C#>@c^O>-$ z3Cox;{iO9eX?~ODF=d(4Ow;B&ZGFvJ2ea1UtYyx5-sW7NGv+zQh2HYGIG{|oWQI`Z@#dCZYjaBNlBM|>Pl^*RudbIcG(O#oRdu<-=k$JQy=FuLQ zM|(~l?Kye0cjVFDkw<$%9__(+w3p)X#%%3{c(k|S(H@0IdkP+Hg+JQ*ezax%Xv_N1 z7WAX7=f^wq^B3aLmhPi1*hgEgkG4P`ZFN4{dVGAqv}O4CVD&y+l_y&dPe06+rPrd*=`S5$P2l}b~gKEzE)PP!2Tk5(zb+~SG?#W+-r;bB)T%Nr9pX_CQ z>O3z`-u+Kq9%~&ubF(Ys2r?s*2wBA~tHVl2! z1GZe=vVa};?KK**F@9EsXP*%f%1s-@B4-DeKAPx-7ahs>Z ze&sQT=Hp%W^rOo^o|Y#c`cFrj<>?so_@O+VSV1SIJ+b;uEZffYr&EtPg*-L=sqvo~ z`vLo)vTj$A6wkJE6p6o<<@twMwVb}v9@;v4-V{RK;RBOt( z$JocrbL>P}2V=%PW<8A?@3`fSyMMy6Cq~M%x5M)k%hbFwkLgxrnbW+}*4>QtGs8S< znLd=n9n_|ByEnq~T#GVxA5hQpyz{2>A@#gqnnlwun!gwP^Ro53Y+bJOsp0axYPwaI zSB-6LM_D(HbT+U5XP-3B>#Xa>W z^QL8OSx4K(wC%QS(`_5CUu}Ng@%SC`UphgEY^+WIj9S@@%K?vmLw7cF;cCY5II;Ty~Z|+bQ~NC+M@Co6kNHp6@*` zcIJJzv+4T}?W#|WsFU*Dm+$X(+k9_1R6nYF^-#XIcB@{sua3)i2Q0sTa9`(?T2)(W zN4dY#d^&H-_b&6d2k*N*ci+2A=Y8?LYq5N{tL?j8ZQp(A`)>Ezcdz~LJ?7mrtmf5` zGLN3S^1XLmZIthQZtt_4zVq_E-&p%cmFokRH(F>T^eP1@VW$Sj?xK~@%pfaY_ z1!dY*>tfa8)+}$`BpJ$BE0_Ka!Y`uEN0`+;@z<3;&?WS)+*e?J;mraiWPkB#A&>6qih<4%mxG5GI} z!GAxozE7R|TmAjaxX!#b{B8bzZatn`kN!4)zp%b9-0lF>cVB0|JIM3hfsF4CUVL{T z;=2P7-+f*AZfE*;JJG+}0sh@V`R{gmzx=SOF3XF*zb|$ZzO)>x+w#&nsD{;8d9i2Z z#peHu4ceEsyYk}T_sfUb^5XCCi-XQDHcnm~5Ps?GP<_f{J5A#&$4i%a`!)BMZu9J& zR8!@p??ZX%x4glt@-j4DUdAuV%ft_=SM4j;r{>f_d6{;b*TT#6nKG|w^O>=n8RMQY z-x<@+xP8XFX4}+?a^LKha{sLR=UkuTn(tD^<-`5Ohx^OCY3G?2EyLfum&GMz`Tp*` zEIyPMuc4QvcIEyh<6biD(oK2sW&Oox`^&1w_>%szYCfxn%GlQ2?wj|E*UZa?^|G<4 z_R7m{Q`W!F4+J>Eg^m%Vjm9(%^AUE^hcM!9}uy&O%J7oXcNC)WL$ z`JG#@7uV&*!RQwUO<#PqeR0t8<=VU)oP4=9Mh6RD90+`IAn?WK@r#3OFE<|VYwe2z zYcCF*z4&~7@zwU_)>s`7d+|DXaWLw|!KW7onqC~Vd2!I@#Q~TX2VPzrc&T%nrOqFg zx_43P{zwssQ!T1hwW$xPU3I8V)up;skLp!@s$UJLK{cd?RWq+qHKxYZgql=SYFf>x zSv9BT)q+}7#`NQ|T2ZTNO|7d9WvoALscp5RcGaHRR|ksY#~;;^I#ws@RGq1F#b^tr z{&=OX)s4DUcj{g}lU(vy^;-SSuKs4%6aEwa6aEu^ zf5WT4;q`=HFICk`RecYyzK2&kz3K`73I7Sd?fs$KjA;&KjA;& zKjA;&KjA;&KjA;&_r0c`@cUa)PxyVuslMaX6aEwa6aEu^JKO3B{|WyI{|WyI{|WyI z{|WyI{|UeENA-mNg#U#9gx}wxdcuFg@4Hhy<=3!NPx(*zPx(*zPx*az)l>dc{!{)_ z{!{)_{!@P6yXqe0fl;2L$YFntD@}Kgb@}Kgb@}Kgb z@}Kgb^4ngjr~GI9wwtQ&jMW~Ddd7dof5v~tf5v~tuZOPcp{si6>KXqT{~5nM9`%g> zjQ@=PjQ@<^j?j9>f5v~tf5z`SX+7ijzgVwl{Ac`U{0?B&Gk)J!tM3T)jQ@<^XGJ~Z zw@p`V(^Y#`>KXqTzdbDVjNkW(dd7dof5v~tf5v~t?|VkI4Oq|k&-gW8ReNRX8UGpo z8UH!|IsZBTIsZAoZN_@ef6jl-f6jl-f6jl-f6jl-f6jl-f6jl-f6jl-f6i}bbv@@l z=RfB^=RfB^=l8#vujl;d{Jx*pbN+MwbN+MwbN+MwbN+MwbN+MwbN+MwbN+MwbN+Mw zbAEf8s_#_QXKOv@Kj%N^Kj%N^w->6O^ZU+K&-rZ=)N}p|{tNyK{tNyK{tNyKe%lQ7 zg8zd5g5P(wdcl9ef5Csjf5Csjf5Csjf5Csjf5Csjf5CsjZ~MF23s&`BR=t;1?`6H< zzu>oLtlBeHFZeI`?IEl7kkt$R3;qj!+wS#(|APO5|AOE5#_D@xwI{7!@L%xT#;F(l z7yK9e7yP#QtM422g8zcwUbkwmTfN}7v%cC{Uu_pvJM61Qys8neYQ(D=@v26=YI~{L ziC^u+uNv{H9r;xwUe$Ar4z^T{c=eiJBVN^rS2f~Q2Vkm3ys8ne zYQ(D=@v26=su8bh#H$X@RE>C5BVP5Lwra$y8u98i|24mTP4$}pn&0=`>OfA_pjS2M z)oXqSb*k^c)vn&EL9c4is~Ys`HNWkn>Ht{PpjRFEsT%aE-N99ZUe%yi?G~;Y^r{BE zYWHx}pjRC%s~YsG{ae-d>Z(DnYS611^r{BE>cCOepjS2MRo}U*2ED35uWHb%_J>ss zdev^`YP+o3H&z{Fs=k+3-^;58y{bX4`hH&Rg06N!SKDq?gI?93S2gHW4SLmX>8e4m z+NV}E=v57RRfAsDpjS2MRSkOehF^nTbpWes(5o8sss_D!!>>WFYS611^s3$4RfAsD zpjS2MRSkMogI?93S2gHWyS}Riy{bX4YS611^r~%#dc%Lif5UG(qS}tAH~bp+ssm>A zmS5vuwadKPzNi}as>Z#laj)L;Yuu}LrB{u6RpVZ@{ZVayRE>Mp!M3V#ui6%=_7_%- zd(}2cz2(=yS2gfe4Sdx>%c_B|YT&E4{I~qJTdGFBY9C{@kFjdxs~Y*L?cb`Auio-& z^@oVU-wyCOyzN(?GYUrzX{CE5g zDOCH7s_m`n&_dPNS2gz4JARFQ^^RY2U%lhk;8*YXHTl&$evN+BLCWfIM74ciHTqSJ zepRDi)#z6>`c;j7)nSUN(XTpGQ8oHijega>*m}pW(XZa|+iz7J#;6+p>K(tPzk0{7 z@vqvSRqfBJcl;Uv>mC0c{~f;rq*XIu)eKlQ16Iv|)qbz)plQ_%STzGy&4AT@u<8&= zb%>ygjEA!)j(Ju_#gNm_%#z&hm5Lb!m63D zY9_1?{15yO{15yO{2C0a2E(esus-l>GOU^mt0u#$$*^iNteOm~Cc|pKd(~)IANU{m zANU{mANVyN)(3vght*-KssXWTK&%=Nt3y`Re#`p6uW6t@@Y{c1A0A46_(8R(R@J6H zsCLz%I#rkIRz0d$^{IX}pa#{D8df7}RE?=|HK8Wel$usEYF5pud9|Px)sk9PD{57( zsdcrXHr1BeRy%4}?WuirpbpiK>PQ`{6LqT2)VaD)m+DGgs~dHz?$o{FZ{cs@Z{cs@ z_oFpw;kQ3GE&MI~E&TScMSoqi@TG-c8(&)ZweqEfUprq~__g$}!s; zz}W8>t%1=R7>6aJJur^Lq=jFLU^F_Uh2OrxI5rder(^$gTKOHqOe=pYe=C10e=C10 ze=EOz*wIWF`?1r?-^$<0Zy#e?`R!*+D}O7${f%km*NBo<{#Jg+fuaF1t^BS0t^Ar1 z)5_n<-^$<0-^#DYFq#R|%HPW0%CAc>x&)&mFs=Np{H^@0{QBzB%5UF#G|NRtTy(@m z(_1vXMGI9lR7Ia#^tnY(TQproPh0e~rHx-FTiW>B_;s>HCtKS19ny_sSJAr`O>5Du zmNx!2e*09@#;-Fin$eqeteG%BTyzm30* zU&B%~EJedoG%Q8KQZy{ZKHz9tN*lj0RfMS`Oclo_B21MJ{K8Zbriw$^5vIxqeqpKz zQ$?66!c-BaiZE5|PmVBEKJW`vMW8Al_&@M};QzocU={nEBV-jJs|Z=;1HYhE92d+7 zenG2z;Qzq?fnVq5ID{Yj$a$R*!D=IOdxUej&AV@OSVFtEGd#gI`NTI`}*IJNP^JJNSj!qIoKs zry|gn4t}i@5p0WKTeMI_uq}>bL~~U-_%&FigTI5ngTI5ngTI5ngTI5nlfRR{lfRR{ zlfRSSv5*MIMK~^+w9?7n$?y0`blb;q>o`sl$FAd8Njmw3=hDgF$=}H@NEgTXB21S~ z{!V_4UJ_xMZ;G*`8)X?4@@V2Cx0h@C%@x^>Ew5eFrECJ{Eii-lfRR{lV3nD zT1VpeVLJIWkVP9wI{7>KJNY~LJNX@7OecRQe<#0IlQ`ZO&4STz7RMda#oxuR`7GK{ zB1Moc{w{vUB-6#;#oxv6xP7DxB3+O!{x1G5{w{uvYjONCUHo1Aj$@{aUo%^}_#M+s z7k?Lj7k?MOghIObr4*uhG5P_b@hy4+q9-6-{9XK7V$#Lm#oxu>#oxv6m{s%*MBhNV z_#MAW7r(~2=p#rMe;0ojzn+40^NTm6o4=c1%pu+U8tkIqAl>{T59#LDdXsK`jd$th z@8;K!kZ%5Ner-AF=I`e3=I`e3=GU)~Zhk!r>E_qElWzWQ{%-zme!UFo=I`e3=I`e3 z=I`e3=I`e3=I`bgyNF}KaV$99{9+ezOgP>A;ujIWh+}rq7#PQi<2Z4;`MdcY$4fVV zH-9(3-ie53q=&zUUrZxn8tLKh;TPA4=EC&wJJy^Y{vQ4w{vQ4we#f8F!{5W-!><=3 zJ^VfVJ^VfVJ^UIJ)5Gsrb$a-F_E-X`@8uVVi8xHeVImHbUjAPGUjAPGUjAPGUVcrU>E-X` z@8#F<84aK5<<}RJUViOi>E-X^*BqQa{yu)kcB3&g8bhOfEPeca{C)g={C)g={C)g= z{C)g={C)g={C)fyO4G-$sWg53ef*kB)5qV(uO}gW{C)g={C)g={Q4Bq$KS`_$KS`_ z$KS`VmnVJvef)j=ef)j=ef)j=ef)j=nw-WTG{#16Mf6rggKV_grJui_ zzn@>zbu`VUpI_r_`uY3$`}zC%`}zC%^<|`=zn{OKzn@>rUNqXKpTD2KpTD2KpTD19 z18(~H`}sBErk`KWQ~LS)`TP0%`Sn0W>tFi$HR`6HU$bro_y_nk?M7Q*^hsrae}G@V zR0jCSr^e}I30UjuRm_y_n0 z_y_n0_y_p)y<~uYfPavGkbjV0!*aAkMmuB%`3LzmFh`F}^vGn8U!P0{`3Lz2`3Lz2 z`3Lz2`3Lzm*k_P`kbjV0vwa5n2l@5bWRPFqSu{jvkbjVWkY96jv|&brbO!nL;$)D2 zkbjV0({u*;2l)s2_331gU$0IE`885!kbjVWkbjVWkYArh2Kfj12l)s22l)s2hxmv1 zhxmv1hxmv1^~7a}UsHDU#$||KU?4;MnzS>-Kg2)8Kg2)8Kg2)8uO}o!{6qXh{6qXh z{6qX&OryCwL;OShL;OShdhasCKg2)8Kg2)8Kg2)8Kg2)8Kg2)8Kg2)8Kg2)8Kg2)8 zKg2)8Kg2)8uZcXG$TP%0#6QHZuPHVg6zMVg6x$?YkM~ALbwCALbwCALiHA zn_>PD{t^BW{t^BWem##F;UD22;n)9|5&jW=jk+1(AK@S2AK@S2AK@S2AK@S2AK@S2 z*Eg9Fel5fq;UD22;n&EY5&jYW5q?em8Q~w{AK@S2AK@S2AK@S2AK@S2AK@S2AK@S2 zAK@S2AK@S2AK@S2AK@S27tF~Bzi>`Q_=N{D!au@4%0J3K%0J4l#W|z=qx_@%qx_@% zdOtJDKgvJKKgvJKKgvJKKgvJKKgvJKKgvJKKgvJKKgvJKuYWY7{GG5#@r@!E{>kMWQ3>u=2%{}}%m{}}%m{}}%m{}}%m{}}%m{}}%m{}{jU zQ}hRBj9-sz#`wqh$N0zi$N0zi_0DFDe~f>Oe~e!*ZN~V=_{aG5)n<%;j9*YGWBgALrNqn{obe{&9X`tBmuH z^N;fjUS*troPV5OuW-is$N9(k$N9(k$N9C}XPkeWf1H1uf1H1uf1H1uf1H1uUteCv z`N#Rk`N#Rk`N#Rk`N#S7?PY?0f`5WvcqSA46Z~5GGr>Q>Kf$lRFBAL|{1f~W{1f~W z{Cb)*!9T%2!9T$-4v-0ck$_C_Pw-Fh3*}^je}aF4e}aF4e}Z3hAQSu({1f~W{1f~W z{1f~W{CW&C!9T$-W{?T~34S4;Oz=-5Hi6(!9T%2!9T%2!9U4A$uAU? zN&ZQGk%dh1Px4RlPx4RlPx4RlPx4RlPx4RlPx4RlPx4RlPx4RlPx4RlPx4RlPx4Rl zPx4RlPx4RlPx9*rk9b5T`6u}&`6u}&`6u}&`6u}&`6u}&`6u}&`6u}&`6u}&`6u}& z`6u}&`6u}&`6u}&`6u}&`6v0O_^0@%_^0@%_^0^w^Ja>FihqiKihqiKihqiKihqh< z4|}Hg_5MblBUAiS{8Ri>{8Ri>{8Ri>{Q5$p=RH&W`rae|ktzNu{we+`{we+`{we+` z{we+`{we+`{waRFrJ3TN;-BK5;-BK5;ul=Z6#o?e6#o?e6#o?e6#o?e6u&;@O!H6k z>%Y%5|1|$J|1|$J|1`f~N~ZbsvSylJKWnD>r}_0bM~E!b{L}o?{L}o?{L}o?{L}o? z{L}pUp)<`t%|FdQ%|FdQ%|FdQ%|FdQ&984d!fDYvooRmk)0yU<=AY)D=AY)D=AY)D z<`jM4`n5B|Kf^!6 zKf^!6Kf^!6Kf^DqlNtUQ{u%xm{u%xm{u%xm{u%xmexaVs@Xzqi@Xzqi@Xzqi@Xzqi z@Xzqi@Xzq;7taj;4F3%O48MFtX833LXZUCMXZQt%GQ+R8IC_htw>Wx>Gs8c_Kf^!6 zFW4ISip=uQ^3U?m^3U?m@(UehmVcIimVcIimVcIimVcIimVcIimVcIimVcIimVcIi zmVcIimVcIimVcIimVcIimVcIimS5j<^gU;mf0loiU*047pfk%q%RkFM%RkF62a;KS z!Klpg&+^am%Z6l@U+;8g`30sj%RkFM%RkFM%RkFM%P&k7S(420&+*Uk&+*Uk&+!Xd zMbIj9{B!(s{B!(z_cO;o$3MqE$3MqE$3Mp}KpEkz%<<3h&+!XtWsZN2Uv?$3E1BaL z-pU-mKJU!&&+*Uk>s5|?K<4=8_~-cN_~-cN_~-bAGc(7pKRk2%bNqAsbNqAsbNqAs zbNqAsbNqAsbNqAs^ZfJt^ZfJt^ZfJt^ZfJtdd@S?KhHnUKhHnUFKCu|{&{{q>6zyj zJj*=)Jii|G%=6Fl&-2go&-2go&-3eF&piJ;|2)55_RRCo^Uw3o^Uw3o^XqZXJinYo z=K1IO=lS(^_!syW_!syW_!syW_!syW_!syW_=WDWz`wx1z`wx1z`wx1 zz`wx1z`wx1z`wx1z`wx1z%R>`1^xy81^xy81^xy81^xy81^xy81^xy81^xy81^xy8 z1^xwoxsojKFYqt$FYqt$FY+()FY+()FY+()FY+()FY+()FY?P7WRZW7Ul=%x{EPgH z{EPgH{EPgH{EPgH{EPgH{EPfD3R&b|hLf&JzC;{}TTa{}TTa{}TTa{}R8TW#lfh#J|M9#J|M9 z#J|KZ-;*W&CH^J;CH^J;CH^J;CH^ITxs5FGFYzz&FYzz&`%#4~@e5-|7&}Y+OZ-dx zOZ-dxOZ-dxOZ>7QS>j*fU*cclmoLgP|1$qF|1$qF|1$qF|1$qF|1$qFzbsOg`Q_@e z%)iXP%r85dW&UOUW&UOUW&UOUWqu*?Eb}k(FZ0WwWSL*CDa-uJ{PIm%=3nMt=3nMt z=3nMt=3nMt=3nMt=3nMt=3nMt=3nMt=3nMt=3nMt=3nMt=3nMt=3nMt=3nMt;g@;M z3jYfK3cvhoR`^%=SNK=>SNK=> zg@1*Ag@1*Ag@1*Ag@1*Ag@1*Ag@2WQm0zwbtNg3{tNg3{tNg3{tNe14S><2lU*%ur zU*%urU*%urU*%urU*%urmr=_q|0@3~zr0aa`B(W@`DKr?$}htf8KkW8ukx?*ukx?* zukx?*ukx?*ukx?*uky<*#SaT)mEVsGWR-uFf0cig-;WJMHZH6D@^O**jEqxO`B(W@ z`PcZ@`28qB*7(=>*ZBQFLDu-!_}BQ?_}BQ?_}BQ?_}BPl?y|kTw1_ z{x$wJem`=MHGV&IkTw1_{x$wJ{xyC-evmc(HU2gJHGV&a5P7V~V?|aqYy4~cYy4~c zYy4~cYy4~cYy4~cYy4~cYy4~cYy5JqS>u;=%^Lq2{~G@~zsz7{ytB@~&cDvT&cDvT z&cDvT&cDvT&cDvT&cDttD;vMgkahlbe!tR?b$W2LA^C2LA^C2LA^C2LA@XjD0rvH~2UBW$&`Vzrnx3 zzrinymkoZ|<81J6@Ne*M@Ne*M@Ne*M@Ne*M@Ne*M@Ne+@Mdobq%PePuf0KWcU%oG! z{G0rn{G0rn{G0rn{PNA&?P+x*-7+x#-8+2-Ho-{#-u-{#-u-{#-u-{zOK z&o=)y|2F?N|2Dt;eYW|x`M3GE`M3FZ_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs z_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs z_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs`FHtu z`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu z`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu z`FHu{+2eN>vdh2AzstYNFXNtF{ylzK_w4cS@%t^4?D6mM@A3OJq3rSd(eLc>`{D2G z@%!=b?D6mM@A1n(XODl6e~*8Ue~*8U-w&E)kAIJUkAIJUkAIKf??1%vreu$QkAII} zmO6XS{CoU+{CoU+{PNb>g@CT z{g>?X@AL2T`~86I^UH&0pMRf!pMRf!pMRf!pWkoBWS@VZf1lqkHf5iGpMRf!pMRf! zpMRf!pI^Q_`~3U-`~3U-`~3U-`}}@;Cj0z;eYIp9CwKj1&$ zKj8Pnb~)hpn>ab(Kj1&$Kj1&$Kj1&$Kj1&$Kj1&$Kj1&$Kj8OUIyvA!;6LC$;P=}) zIp9CwKj1&$Kj1&$_nSNMn>#t+Kj1&$Kj8PfBRS+hHpLJ zr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c z|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUc zPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>? z|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*{lHpLJr~gm? zpZ-7nfBOIQ|NYi}{eSxZ^#AGq)BpEd`}P0n|I`1c|4;v){y+VH`v3I*>HpLJr~gm? zpZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v) z{y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6( zKmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp z{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7n zfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH z`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D z|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ z^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ z|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I* z>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq z|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq z)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ z|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJ zr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c z|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUc zPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>? z|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm? zpZ-7nfBOIQ{~h)p_8;~i_8;~i_8;~i_8;~i_8;~i_8;~i_8;~i_8;~i_8;~i_8;~i z_8;~i_8;~i_8;~i_8;~i_8;~i_Ur%C|EK>?|DXOp{eSxZ^#2|9ANC*iANK42)Bkta zf7pN6f7pN6f7pN6f7pN6f7pN6f7pN6f7pN6f7pN6f7q}8Pye6(KmC9D|MdUq|I`2X zu>WEI!~Tc;`v3I*J?z*2_ptwA|HJ-={SW&e_CM@@*#EHqVgJMahy4%xAND`&f7t)9 z|6%{b{)hb!`yci{?0?w*u>WEI!~Tc;5BneXKkR?l|FHjI|HJ-={SW&e_CM@@*#EHq zVgJMahy4%xAND`&f7pN8f7*ZAf7*ZAf7*ZAf7*ZAum4Z~pZ>qo{?q=`{?mT_fBOIQ z|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I* z>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq z|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq z)BmUcPye6(zsvs1{>%Q${>%Q${>%Q${>%Q$e*J&?|1SG4`!D-1`}P0n|I`1c|4;v) z{y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6( zKmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp z{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7n zfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH z`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D z|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ z^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ z|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I* z>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq z|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq z)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ z|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJ zr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c z|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUc zPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>? z|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm? zpZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v) z{y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6( zKmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp z{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7n zfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH z`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D z|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ z^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ z|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I* z>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq z|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq z)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ z|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJ zr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c z|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUc zPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>? z|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm? zpZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v) z{y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6( zKmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp z{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7n zfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH z`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D z|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ z^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ z|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I* z>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq z|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq z)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ z|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJ zr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c z|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUc zPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>? z|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm? zpZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v) z{y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6( zKmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp z{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7n zfBOIQ|LOnJ|EK>?|DXOp{eSxZezyN-`}P0n|I`1c|4;v){y+VH`v3I*>HpLJr~gm? zpZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(zc>5e?AQON|4;v){y+VH z`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D z|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|L^S&-a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I z0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv3_uuwFaTiy z!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a z0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1Da zgaHTx5C$L&Kp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!- z0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K; z2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu z0}uuv3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx z5C$L&Kp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S z1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rX zAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv z3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L& zKp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST z7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhl zfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv3_uuw zFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp229 z0AT>a0Q6!1hy5S+f7p)!2m{cE{U7#!*#BYwhy5S+f7t(F|A+k__J7#_VgHByANGIP z|6%`!{U7#Y0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv z3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L& zKp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST z7=SPUecJzN|EK+*_G19T0E7Vu1JI}apZ0&+|7riH{h#)K+W%?)r~RMyf7<_P|EK+* z_J7*{Y5&vyr~Oa+pY}iPf7<`F|7riz{-^y<`=9nd?SI<;wEt=U)BdOZPy3(tKka|o z|Fr*U|I_}b{ZIR!_CM`^+W)lwY5&vyr~Oa+pY}iPf7<`F|7riz{-^yIfG_|(?SI<; zwEt;81|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu1JKLa0E7Vu z1JKLa0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rX zAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv z3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L& zKp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST z7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhl zfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv3_uuw zFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C*{8f2gga z0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?W zL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz z1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaN1*ei}eD zfM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCF zXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks118 z0MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT z(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz8J)f1fmfXaLaw zq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V z0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?W zL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz z1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$ zhz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c z1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh z5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC? z4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1 zAR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ( z8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2 zKs1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4Immo zG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4 zfM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCF zXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks118 z0MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT z(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G z0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLaw zq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V z0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?W zL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz z1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$ zhz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c z1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh z5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC? z4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1 zAR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ( z8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2 zKs1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4Immo zG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4 zfM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCF zXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks118 z0MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT z(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rKkG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR z7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|n zMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y z(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifp zG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C z4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU( z0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy z07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y{lyQ07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy z07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=F zfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfP zU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR z7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|n zMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y z(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifp zG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C z4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU( z0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy z07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=F zfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfP zU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR z7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|n zMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y z(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifp zG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C z4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU( z0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy z07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=F zfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfP zU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=LrU(*Q;T7!6=FfYAU(0~ifpG=R|n zMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PX!Z zX#k@Ej0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP z8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn z1~3}HXaGCyKkYy5KkcUhj0P|oz-RzF?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5 z?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5 z?LX~5?LX~5?LX~5?LY0m?7!^4?7!^4?7!^4?7!^4?7!^4?7!^4?7!^4?7!^4?7!^4 z?7!^4?7!^4?7!^4?7!^4?7!^4?7!^4?7!@%0gMJP8o+1(qXCQtFdD#U0HXnn2C&Qi z%YGWbXaJ)Dj0UjF{>%Q${>%Q${>%Q${>%Q${>%Q${>%Q${>%Q${>y$Ez-R!Y0gMJP z8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn z1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y z0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U z0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|o zz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQt zFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)D zj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1( zqXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}H zXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP z8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn z1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0Uhjx1R-&S0^Fwug=Qc zU%gDYzxsc7f5idZU)_X%e|1av{nfo44*u|OuB_kRT&8sRX9pJr9e%v$^!okHIf46| zZ~FH)-zn~Iz69UjB&+u~E!h3-@BJCy+^T+mbHDZd&CMzIyDKX9yUPRjyYuw-yOXT< zyR&-tyC1dgcRx$q?_NCK?@S2?pW6G=^S%H0)ZX2peZRY%_I`Ii(f#fQlKZ 1 and piece[-1] == ',' and piece[-2].isdigit(): + cur_pieces = sp_model.EncodeAsPieces(piece[:-1].replace(u'▁', '')) + if piece[0] != u'▁' and cur_pieces[0][0] == u'▁': + if len(cur_pieces[0]) == 1: + cur_pieces = cur_pieces[1:] + else: + cur_pieces[0] = cur_pieces[0][1:] + cur_pieces.append(piece[-1]) + new_pieces.extend(cur_pieces) + else: + new_pieces.append(piece) + return new_pieces + + +class InputFeatures: + """A single set of features of data.""" + def __init__(self, + example_id, + qas_id, + doc_span_index, + tok_start_to_orig_index, + tok_end_to_orig_index, + token_is_max_context, + input_ids, + tokens, + valid_length, + p_mask, + segment_ids, + paragraph_text, + paragraph_len, + cls_index, + start_position=None, + end_position=None, + is_impossible=None): + self.example_id = example_id + self.qas_id = qas_id + self.doc_span_index = doc_span_index + self.tok_start_to_orig_index = tok_start_to_orig_index + self.tok_end_to_orig_index = tok_end_to_orig_index + self.token_is_max_context = token_is_max_context + self.input_ids = input_ids + self.tokens = tokens + self.valid_length = valid_length + self.p_mask = p_mask + self.segment_ids = segment_ids + self.paragraph_text = paragraph_text + self.paragraph_len = paragraph_len + self.cls_index = cls_index + self.start_position = start_position + self.end_position = end_position + self.is_impossible = is_impossible + + +def _convert_index(index, pos, M=None, is_start=True): + """convert tokenized index to corresponding origin text index""" + if index[pos] is not None: + return index[pos] + N = len(index) + rear = pos + while rear < N - 1 and index[rear] is None: + rear += 1 + front = pos + while front > 0 and index[front] is None: + front -= 1 + assert index[front] is not None or index[rear] is not None + if index[front] is None: + if index[rear] >= 1: + if is_start: + return 0 + else: + return index[rear] - 1 + return index[rear] + if index[rear] is None: + if M is not None and index[front] < M - 1: + if is_start: + return index[front] + 1 + else: + return M - 1 + return index[front] + if is_start: + if index[rear] > index[front] + 1: + return index[front] + 1 + else: + return index[rear] + else: + if index[rear] > index[front] + 1: + return index[rear] - 1 + else: + return index[front] + + +def preprocess_text(inputs, lower=False, remove_space=True, + keep_accents=False): + """simple text clean""" + if remove_space: + outputs = ' '.join(inputs.strip().split()) + else: + outputs = inputs + outputs = outputs.replace('``', '"').replace('\'\'', '"') + if not keep_accents: + outputs = unicodedata.normalize('NFKD', outputs) + outputs = ''.join([c for c in outputs if not unicodedata.combining(c)]) + if lower: + outputs = outputs.lower() + + return outputs + + +class SQuADTransform: + """Dataset Transformation for XLNet-style QA. + + The transformation is processed in the following steps: + - Convert from gluonnlp.data.SQuAD's record to SquadExample. + - Tokenize the question_text in the example. + - For examples where the document is too long, + use a sliding window to split into multiple features and + record whether each token is a maximum context. + - Tokenize the split document chunks. + - Combine the token of question_text with the token + of the document and insert [CLS] and [SEP]. + - Generate the start position and end position of the answer. + - Generate valid length. + + E.g: + + Inputs: + + question_text: 'When did BBC Japan begin broadcasting?' + doc_tokens: ['BBC','Japan','was','a','general','entertainment','channel,', + 'which','operated','between','December','2004','and','April', + '2006.','It','ceased','operations','after','its','Japanese', + 'distributor','folded.'] + start_position: 10 + end_position: 11 + orig_answer_text: 'December 2004' + + Processed: + + tokens: ['when','did','bbc','japan','begin','broadcasting','?', + '[SEP]','bbc','japan','was','a','general','entertainment','channel', + ',','which','operated','between','december','2004','and','april', + '2006','.','it','ceased','operations','after','its','japanese', + 'distributor','folded','.','[SEP]','[CLS]'] + segment_ids: [0,0,0,0,0,0,0,0,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] + start_position: 19 + end_position: 20 + valid_length: 36 + + Because of the sliding window approach taken to scoring documents, a single + token can appear in multiple documents. + So you need to record whether each token is a maximum context. E.g. + Doc: the man went to the store and bought a gallon of milk + Span A: the man went to the + Span B: to the store and bought + Span C: and bought a gallon of + ... + + Now the word 'bought' will have two scores from spans B and C. We only + want to consider the score with "maximum context", which we define as + the *minimum* of its left and right context (the *sum* of left and + right context will always be the same, of course). + + In the example the maximum context for 'bought' would be span C since + it has 1 left context and 3 right context, while span B has 4 left context + and 0 right context. + + Parameters + ---------- + tokenizer : XLNetTokenizer. + Tokenizer for the sentences. + labels : list of int. + List of all label ids for the classification task. + max_seq_length : int, default 384 + Maximum sequence length of the sentences. + doc_stride : int, default 128 + When splitting up a long document into chunks, + how much stride to take between chunks. + max_query_length : int, default 64 + The maximum length of the query tokens. + is_pad : bool, default True + Whether to pad the sentences to maximum length. + is_training : bool, default True + Whether to run training. + do_lookup : bool, default True + Whether to do vocabulary lookup for convert tokens to indices. + """ + def __init__(self, + tokenizer, + vocab, + max_seq_length=384, + doc_stride=128, + max_query_length=64, + is_pad=True, + uncased=False, + is_training=True): + self.tokenizer = tokenizer + self.vocab = vocab + self.max_seq_length = max_seq_length + self.max_query_length = max_query_length + self.doc_stride = doc_stride + self.is_pad = is_pad + self.is_training = is_training + self.uncased = uncased + + def _is_whitespace(self, c): + if c == ' ' or c == '\t' or c == '\r' or c == '\n' or ord(c) == 0x202F: + return True + return False + + def _toSquadExample(self, record): + example_id = record[0] + qas_id = record[1] + question_text = record[2] + paragraph_text = record[3] + orig_answer_text = record[4][0] if record[4] else '' + answer_offset = record[5][0] if record[5] else '' + is_impossible = record[6] if len(record) == 7 else False + + example = SquadExample(qas_id=qas_id, + question_text=question_text, + paragraph_text=paragraph_text, + example_id=example_id, + orig_answer_text=orig_answer_text, + start_position=answer_offset, + is_impossible=is_impossible) + return example + + def _transform(self, *record): + """Loads a data file into a list of `InputBatch`s.""" + + example = self._toSquadExample(record) + sp_model = nlp.data.SentencepieceTokenizer( + self.tokenizer._sentencepiece_path)._processor + max_N, max_M = 1024, 1024 + f = np.zeros((max_N, max_M), dtype=np.float32) + + query_tokens = self.tokenizer(example.question_text) + if len(query_tokens) > self.max_query_length: + query_tokens = query_tokens[0:self.max_query_length] + query_tokens = self.vocab.to_indices(query_tokens) + + paragraph_text = example.paragraph_text + para_tokens = _encode_pieces( + sp_model, preprocess_text(example.paragraph_text, self.uncased)) + + chartok_to_tok_index = [] + tok_start_to_chartok_index = [] + tok_end_to_chartok_index = [] + char_cnt = 0 + for i, token in enumerate(para_tokens): + chartok_to_tok_index.extend([i] * len(token)) + tok_start_to_chartok_index.append(char_cnt) + char_cnt += len(token) + tok_end_to_chartok_index.append(char_cnt - 1) + + tok_cat_text = ''.join(para_tokens).replace(u'▁', ' ') + N, M = len(paragraph_text), len(tok_cat_text) + + if N > max_N or M > max_M: + max_N = max(N, max_N) + max_M = max(M, max_M) + f = np.zeros((max_N, max_M), dtype=np.float32) + gc.collect() + + g = {} + + def _lcs_match(max_dist): + f.fill(0) + g.clear() + + ### longest common sub sequence + # f[i, j] = max(f[i - 1, j], f[i, j - 1], f[i - 1, j - 1] + match(i, j)) + for i in range(N): + + # note(zhiliny): + # unlike standard LCS, this is specifically optimized for the setting + # because the mismatch between sentence pieces and original text will + # be small + for j in range(i - max_dist, i + max_dist): + if j >= M or j < 0: + continue + if i > 0: + g[(i, j)] = 0 + f[i, j] = f[i - 1, j] + + if j > 0 and f[i, j - 1] > f[i, j]: + g[(i, j)] = 1 + f[i, j] = f[i, j - 1] + + f_prev = f[i - 1, j - 1] if i > 0 and j > 0 else 0 + if (preprocess_text(paragraph_text[i], + lower=self.uncased, + remove_space=False) == tok_cat_text[j] + and f_prev + 1 > f[i, j]): + g[(i, j)] = 2 + f[i, j] = f_prev + 1 + + max_dist = abs(N - M) + 5 + for _ in range(2): + _lcs_match(max_dist) + if f[N - 1, M - 1] > 0.8 * N: + break + max_dist *= 2 + + orig_to_chartok_index = [None] * N + chartok_to_orig_index = [None] * M + i, j = N - 1, M - 1 + while i >= 0 and j >= 0: + if (i, j) not in g: + break + if g[(i, j)] == 2: + orig_to_chartok_index[i] = j + chartok_to_orig_index[j] = i + i, j = i - 1, j - 1 + elif g[(i, j)] == 1: + j = j - 1 + else: + i = i - 1 + + if all(v is None + for v in orig_to_chartok_index) or f[N - 1, M - 1] < 0.8 * N: + print('MISMATCH DETECTED!') + return None + + tok_start_to_orig_index = [] + tok_end_to_orig_index = [] + for i in range(len(para_tokens)): + start_chartok_pos = tok_start_to_chartok_index[i] + end_chartok_pos = tok_end_to_chartok_index[i] + start_orig_pos = _convert_index(chartok_to_orig_index, + start_chartok_pos, + N, + is_start=True) + end_orig_pos = _convert_index(chartok_to_orig_index, + end_chartok_pos, + N, + is_start=False) + + tok_start_to_orig_index.append(start_orig_pos) + tok_end_to_orig_index.append(end_orig_pos) + + if not self.is_training: + tok_start_position = tok_end_position = None + + if self.is_training and example.is_impossible: + tok_start_position = -1 + tok_end_position = -1 + + if self.is_training and not example.is_impossible: + start_position = example.start_position + end_position = start_position + len(example.orig_answer_text) - 1 + + start_chartok_pos = _convert_index(orig_to_chartok_index, + start_position, + is_start=True) + tok_start_position = chartok_to_tok_index[start_chartok_pos] + + end_chartok_pos = _convert_index(orig_to_chartok_index, + end_position, + is_start=False) + tok_end_position = chartok_to_tok_index[end_chartok_pos] + assert tok_start_position <= tok_end_position + + def _piece_to_id(x): + return sp_model.PieceToId(x) + + all_doc_tokens = list(map(_piece_to_id, para_tokens)) + + # The -3 accounts for [CLS], [SEP] and [SEP] + max_tokens_for_doc = self.max_seq_length - len(query_tokens) - 3 + + # We can have documents that are longer than the maximum sequence length. + # To deal with this we do a sliding window approach, where we take chunks + # of the up to our max length with a stride of `doc_stride`. + _DocSpan = collections.namedtuple( # pylint: disable=invalid-name + 'DocSpan', ['start', 'length']) + doc_spans = [] + features = [] + start_offset = 0 + while start_offset < len(all_doc_tokens): + length = len(all_doc_tokens) - start_offset + if length > max_tokens_for_doc: + length = max_tokens_for_doc + doc_spans.append(_DocSpan(start=start_offset, length=length)) + if start_offset + length == len(all_doc_tokens): + break + start_offset += min(length, self.doc_stride) + for (doc_span_index, doc_span) in enumerate(doc_spans): + tokens = [] + token_is_max_context = {} + segment_ids = [] + p_mask = [] + + cur_tok_start_to_orig_index = [] + cur_tok_end_to_orig_index = [] + + for i in range(doc_span.length): + split_token_index = doc_span.start + i + + cur_tok_start_to_orig_index.append( + tok_start_to_orig_index[split_token_index]) + cur_tok_end_to_orig_index.append( + tok_end_to_orig_index[split_token_index]) + + is_max_context = _check_is_max_context(doc_spans, + doc_span_index, + split_token_index) + token_is_max_context[len(tokens)] = is_max_context + tokens.append(all_doc_tokens[split_token_index]) + segment_ids.append(0) + p_mask.append(0) + + paragraph_len = len(tokens) + + #add sep token + tokens.append(4) + segment_ids.append(0) + p_mask.append(1) + + # note(zhiliny): we put P before Q + # because during pretraining, B is always shorter than A + for token in query_tokens: + tokens.append(token) + segment_ids.append(1) + p_mask.append(1) + #add sep token + tokens.append(4) + segment_ids.append(1) + p_mask.append(1) + + #add cls token + tokens.append(3) + segment_ids.append(2) + p_mask.append(0) + + input_ids = tokens + + # The mask has 0 for real tokens and 1 for padding tokens. Only real + # tokens are attended to. + valid_length = len(input_ids) + # Zero-pad up to the sequence length. + cls_index = len(input_ids) - 1 + while len(input_ids) < self.max_seq_length: + padding_length = self.max_seq_length - valid_length + input_ids = input_ids + [0] * padding_length + segment_ids = segment_ids + [3] * padding_length + p_mask = p_mask + [1] * padding_length + + assert len(input_ids) == self.max_seq_length + assert len(segment_ids) == self.max_seq_length + assert len(p_mask) == self.max_seq_length + + span_is_impossible = example.is_impossible + start_position = None + end_position = None + if self.is_training and not span_is_impossible: + # For training, if our document chunk does not contain an annotation + # we throw it out, since there is nothing to predict. + doc_start = doc_span.start + doc_end = doc_span.start + doc_span.length - 1 + out_of_span = False + if not (tok_start_position >= doc_start + and tok_end_position <= doc_end): + out_of_span = True + if out_of_span: + # continue + start_position = 0 + end_position = 0 + span_is_impossible = True + else: + # note(zhiliny): we put P before Q, so doc_offset should be zero. + # doc_offset = len(query_tokens) + 2 + doc_offset = 0 + start_position = tok_start_position - doc_start + doc_offset + end_position = tok_end_position - doc_start + doc_offset + + if self.is_training and span_is_impossible: + start_position = cls_index + end_position = cls_index + + if example.example_id < 20: + print('*** Example ***') + print('qas_id: %s' % (example.qas_id)) + print('example_index: %s' % (example.example_id)) + print('doc_span_index: %s' % (doc_span_index)) + print('tok_start_to_orig_index: %s' % + ' '.join([str(x) for x in cur_tok_start_to_orig_index])) + print('tok_end_to_orig_index: %s' % + ' '.join([str(x) for x in cur_tok_end_to_orig_index])) + print('token_is_max_context: %s' % ' '.join([ + '%d:%s' % (x, y) + for (x, y) in token_is_max_context.items() + ])) + print('input_ids: %s' % ' '.join([str(x) for x in input_ids])) + print('p_mask: %s' % ' '.join([str(x) for x in p_mask])) + print('segment_ids: %s' % + ' '.join([str(x) for x in segment_ids])) + + if self.is_training and span_is_impossible: + print('impossible example span') + + if self.is_training and not span_is_impossible: + pieces = [ + sp_model.IdToPiece(token) + for token in tokens[start_position :(end_position + 1)] + ] + answer_text = sp_model.DecodePieces(pieces) + print('start_position: %d' % + (start_position)) + print('end_position: %d' % (end_position)) + print('answer: %s' % (answer_text)) + + # note(zhiliny): With multi processing, + # the example_index is actually the index within the current process + # therefore we use example_index=None to avoid being used in the future. + # The current code does not use example_index of training data. + # if self.is_training: + # feat_example_index = None + # else: + # feat_example_index = example.example_id + + feature = InputFeatures( + example_id=example.example_id, + qas_id=example.qas_id, + doc_span_index=doc_span_index, + tok_start_to_orig_index=cur_tok_start_to_orig_index, + tok_end_to_orig_index=cur_tok_end_to_orig_index, + token_is_max_context=token_is_max_context, + tokens=tokens, + input_ids=input_ids, + valid_length=valid_length, + p_mask=p_mask, + segment_ids=segment_ids, + paragraph_text=example.paragraph_text, + paragraph_len=paragraph_len, + cls_index=cls_index, + start_position=start_position, + end_position=end_position, + is_impossible=span_is_impossible) + features.append(feature) + + return features + + def __call__(self, record, evaluate=False): + examples = self._transform(*record) + if not examples: + return None + features = [] + + for _example in examples: + feature = [] + feature.append(_example.example_id) + feature.append(_example.input_ids) + feature.append(_example.segment_ids) + feature.append(_example.valid_length) + feature.append(_example.p_mask) + feature.append(_example.start_position) + feature.append(_example.end_position) + feature.append(_example.is_impossible) + feature.append(len(_example.input_ids)) + features.append(feature) + + return features + + +def _check_is_max_context(doc_spans, cur_span_index, position): + """Check if this is the 'max context' doc span for the token.""" + + # Because of the sliding window approach taken to scoring documents, a single + # token can appear in multiple documents. E.g. + # Doc: the man went to the store and bought a gallon of milk + # Span A: the man went to the + # Span B: to the store and bought + # Span C: and bought a gallon of + # ... + # + # Now the word 'bought' will have two scores from spans B and C. We only + # want to consider the score with "maximum context", which we define as + # the *minimum* of its left and right context (the *sum* of left and + # right context will always be the same, of course). + # + # In the example the maximum context for 'bought' would be span C since + # it has 1 left context and 3 right context, while span B has 4 left context + # and 0 right context. + best_score = None + best_span_index = None + for (span_index, doc_span) in enumerate(doc_spans): + end = doc_span.start + doc_span.length - 1 + if position < doc_span.start: + continue + if position > end: + continue + num_left_context = position - doc_span.start + num_right_context = end - position + score = min(num_left_context, num_right_context) + \ + 0.01 * doc_span.length + if best_score is None or score > best_score: + best_score = score + best_span_index = span_index + + return cur_span_index == best_span_index diff --git a/scripts/language_model/model/qa.py b/scripts/language_model/model/qa.py new file mode 100644 index 0000000000..38934701e1 --- /dev/null +++ b/scripts/language_model/model/qa.py @@ -0,0 +1,293 @@ +"""XLNetForQA models.""" + +import mxnet as mx +from mxnet.gluon import HybridBlock, Block, loss, nn + + +class PoolerStartLogits(HybridBlock): + """ Compute SQuAD start_logits from sequence hidden states. """ + def __init__(self, prefix=None, params=None): + super(PoolerStartLogits, self).__init__(prefix=prefix, params=params) + self.dense = nn.Dense(1, flatten=False) + + def __call__(self, hidden_states, p_masks=None): + # pylint: disable=arguments-differ + return super(PoolerStartLogits, self).__call__(hidden_states, p_masks) + + def hybrid_forward(self, F, hidden_states, p_mask): + # pylint: disable=arguments-differ + """ Args: + **p_mask**: (`optional`) ``torch.FloatTensor`` of shape `(batch_size, seq_len)` + invalid position mask such as query and special symbols (PAD, SEP, CLS) + 1.0 means token should be masked. + """ + x = self.dense(hidden_states).squeeze(-1) + if p_mask is not None: + x = x * (1 - p_mask) - 1e30 * p_mask + return x + + +class PoolerEndLogits(Block): + """ Compute SQuAD end_logits from sequence hidden states and start token hidden state.""" + def __init__(self, units=768, is_eval=False, prefix=None, params=None): + super(PoolerEndLogits, self).__init__(prefix=prefix, params=params) + self._eval = is_eval + self._hsz = units + with self.name_scope(): + self.dense_0 = nn.Dense(units, activation='tanh', flatten=False) + self.dense_1 = nn.Dense(1, flatten=False) + self.layernorm = nn.LayerNorm(epsilon=1e-12, in_channels=768) + + def __call__(self, + hidden_states, + start_states=None, + start_positions=None, + p_masks=None): + # pylint: disable=arguments-differ + return super(PoolerEndLogits, + self).__call__(hidden_states, start_states, + start_positions, p_masks) + + def forward(self, hidden_states, start_states, start_positions, p_mask): + # pylint: disable=arguments-differ + F = mx.ndarray + if not self._eval: + start_states = F.gather_nd( + hidden_states, + F.concat( + F.contrib.arange_like(hidden_states, + axis=0).expand_dims(1), + start_positions.expand_dims( + 1)).transpose()) #shape(bsz, hsz) + start_states = start_states.expand_dims(1) + start_states = F.broadcast_like( + start_states, hidden_states) # shape (bsz, slen, hsz) + x = self.dense_0(F.concat(hidden_states, start_states, dim=-1)) + x = self.layernorm(x) + x = self.dense_1(x).squeeze(-1) + if p_mask is not None and self._eval: + p_mask = p_mask.expand_dims(-1) + p_mask = F.broadcast_like(p_mask, x) + if p_mask is not None: + x = x * (1 - p_mask) - 1e30 * p_mask + return x + + +class XLNetPoolerAnswerClass(Block): + """ Compute SQuAD 2.0 answer class from classification and start tokens hidden states. """ + def __init__(self, units=768, dropout=0.1, prefix=None, params=None): + super(XLNetPoolerAnswerClass, self).__init__(prefix=prefix, + params=params) + with self.name_scope(): + self._units = units + self.dense_0 = nn.Dense(units, + in_units=2 * units, + activation='tanh', + use_bias=True, + flatten=False) + self.dense_1 = nn.Dense(1, + in_units=units, + use_bias=False, + flatten=False) + self._dropout = nn.Dropout(dropout) + + def __call__(self, hidden_states, start_states=None, cls_index=None): + # pylint: disable=arguments-differ + return super(XLNetPoolerAnswerClass, + self).__call__(hidden_states, start_states, cls_index) + # pylint: disable=unused-argument + + def forward(self, sequence, start_states, cls_index): + # pylint: disable=arguments-differ + # get the cls_token's state, currently the last state + F = mx.ndarray + index = F.contrib.arange_like(sequence, axis=0, + ctx=sequence.context).expand_dims(1) + valid_length_rs = cls_index.reshape((-1, 1)) - 1 + gather_index = F.concat(index, valid_length_rs).T + cls_token_state = F.gather_nd(sequence, gather_index) + + x = self.dense_0(F.concat(start_states, cls_token_state, dim=-1)) + x = self._dropout(x) + x = self.dense_1(x).squeeze(-1) + return x + + +class XLNetForQA(Block): + """Model for SQuAD task with XLNet. + + Parameters + ---------- + bert: XLNet base + prefix : str or None + See document of `mx.gluon.Block`. + params : ParameterDict or None + See document of `mx.gluon.Block`. + """ + def __init__(self, + xlnet_base, + start_top_n=None, + end_top_n=None, + version_2=False, + is_eval=False, + units=768, + prefix=None, + params=None): + super(XLNetForQA, self).__init__(prefix=prefix, params=params) + with self.name_scope(): + self.xlnet = xlnet_base + self.start_top_n = start_top_n + self.end_top_n = end_top_n + self.loss = loss.SoftmaxCELoss() + self.start_logits = PoolerStartLogits() + self.end_logits = PoolerEndLogits(units=units, is_eval=is_eval) + self.version2 = version_2 + self.eval = is_eval + if version_2: + self.answer_class = XLNetPoolerAnswerClass(units=units) + self.cls_loss = loss.SigmoidBinaryCrossEntropyLoss() + + def __call__(self, + inputs, + token_types, + valid_length=None, + label=None, + p_mask=None, + is_impossible=None, + mems=None): + #pylint: disable=arguments-differ, dangerous-default-value + """Generate the unnormalized score for the given the input sequences.""" + valid_length = [] if valid_length is None else valid_length + return super(XLNetForQA, + self).__call__(inputs, token_types, valid_length, p_mask, + label, is_impossible, mems) + + def _padding_mask(self, inputs, valid_length, left_pad=False): + F = mx.ndarray + if left_pad: + # left padding + valid_length_start = valid_length.astype('int64') + steps = F.contrib.arange_like(inputs, axis=1) + 1 + ones = F.ones_like(steps) + mask = F.broadcast_greater( + F.reshape(steps, shape=(1, -1)), + F.reshape(valid_length_start, shape=(-1, 1))) + mask = F.broadcast_mul( + F.expand_dims(mask, axis=1), + F.broadcast_mul(ones, F.reshape(ones, shape=(-1, 1)))) + else: + # right padding + valid_length = valid_length.astype(inputs.dtype) + steps = F.contrib.arange_like(inputs, axis=1) + ones = F.ones_like(steps) + mask = F.broadcast_lesser(F.reshape(steps, shape=(1, -1)), + F.reshape(valid_length, shape=(-1, 1))) + mask = F.broadcast_mul( + F.expand_dims(mask, axis=1), + F.broadcast_mul(ones, F.reshape(ones, shape=(-1, 1)))) + return mask + + def forward(self, inputs, token_types, valid_length, p_mask, label, + is_impossible, mems): + # pylint: disable=arguments-differ + """Generate the unnormalized score for the given the input sequences. + + Parameters + ---------- + inputs : NDArray, shape (batch_size, seq_length) + Input words for the sequences. + token_types : NDArray, shape (batch_size, seq_length) + Token types for the sequences, used to indicate whether the word belongs to the + first sentence or the second one. + valid_length : NDArray or None, shape (batch_size,) + Valid length of the sequence. This is used to mask the padded tokens. + + Returns + ------- + outputs : NDArray + Shape (batch_size, seq_length, 2) + """ + if isinstance(valid_length, list) and len(valid_length) == 0: + valid_length = None + attention_mask = self._padding_mask(inputs, + valid_length).astype('float32') + output, _ = self.xlnet(inputs, token_types, mems, attention_mask) + start_logits = self.start_logits(output, + p_masks=p_mask) # shape (bsz, slen) + bsz, slen, hsz = output.shape + if not self.eval: + #training + start_positions, end_positions = label + end_logit = self.end_logits(output, + start_positions=start_positions, + p_masks=p_mask) + span_loss = (self.loss(start_logits, start_positions) + + self.loss(end_logit, end_positions)) / 2 + + cls_loss = None + total_loss = [span_loss] + if self.version2: + start_log_probs = mx.nd.softmax(start_logits, axis=-1) + start_states = mx.nd.batch_dot(output, + start_log_probs.expand_dims(-1), + transpose_a=True).squeeze(-1) + + cls_logits = self.answer_class(output, start_states, + valid_length) + cls_loss = self.cls_loss(cls_logits, is_impossible) + total_loss.append(0.5 * cls_loss) + total_loss_sum = span_loss + 0.5 * cls_loss if cls_loss is not None else span_loss + return total_loss, total_loss_sum + else: + #inference + start_log_probs = mx.nd.log_softmax(start_logits, + axis=-1) # shape (bsz, slen) + start_top_log_probs, start_top_index = mx.ndarray.topk( + start_log_probs, k=self.start_top_n, axis=-1, + ret_typ='both') # shape (bsz, start_n_top) + index = mx.nd.concat(*[ + mx.nd.arange(bsz, ctx=start_log_probs.context).expand_dims(1) + ] * self.start_top_n).reshape(bsz * self.start_top_n, 1) + start_top_index_rs = start_top_index.reshape((-1, 1)) + gather_index = mx.nd.concat( + index, start_top_index_rs).T #shape(2, bsz * start_n_top) + start_states = mx.nd.gather_nd(output, gather_index).reshape( + (bsz, self.start_top_n, hsz)) #shape (bsz, start_n_top, hsz) + + start_states = start_states.expand_dims(1) + start_states = mx.nd.broadcast_to( + start_states, (bsz, slen, self.start_top_n, + hsz)) # shape (bsz, slen, start_n_top, hsz) + hidden_states_expanded = output.expand_dims(2) + hidden_states_expanded = mx.ndarray.broadcast_to( + hidden_states_expanded, shape=start_states.shape + ) # shape (bsz, slen, start_n_top, hsz) + end_logits = self.end_logits( + hidden_states_expanded, + start_states=start_states, + p_masks=p_mask) # shape (bsz, slen, start_n_top) + end_log_probs = mx.nd.log_softmax( + end_logits, axis=1) # shape (bsz, slen, start_n_top) + # Note that end_top_index and end_top_log_probs have shape (bsz, END_N_TOP, start_n_top) + # So that for each start position, there are end_n_top end positions on the second dim. + end_top_log_probs, end_top_index = mx.ndarray.topk( + end_log_probs, k=self.end_top_n, axis=1, + ret_typ='both') # shape (bsz, end_n_top, start_n_top) + end_top_log_probs = end_top_log_probs.reshape( + (-1, self.start_top_n * self.end_top_n)) + end_top_index = end_top_index.reshape( + (-1, self.start_top_n * self.end_top_n)) + + start_probs = mx.nd.softmax(start_logits, axis=-1) + start_states = mx.nd.batch_dot(output, + start_probs.expand_dims(-1), + transpose_a=True).squeeze(-1) + + cls_logits = None + if self.version2: + cls_logits = self.answer_class(output, start_states, + valid_length) + + outputs = (start_top_log_probs, start_top_index, end_top_log_probs, + end_top_index, cls_logits) + return outputs diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py new file mode 100644 index 0000000000..65a3021050 --- /dev/null +++ b/scripts/language_model/run_squad.py @@ -0,0 +1,540 @@ +""" +Question Answering with XLNet +""" +import os +import time +import argparse +import random +import logging +import warnings +import copy +import json +import collections +import pickle +import numpy as np +import mxnet as mx +import gluonnlp as nlp +from gluonnlp.data import SQuAD +from model.qa import XLNetForQA +from data.new_qa import SQuADTransform, preprocess_dataset, convert_examples_to_inputs +from transformer import model +from xlnet_qa_evaluate import predict_extended +from utils_squad_evaluate import EVAL_OPTS, main as evaluate_on_squad + +os.environ['MXNET_USE_FUSION'] = '0' +log = logging.getLogger('gluonnlp') +log.setLevel(logging.DEBUG) +formatter = logging.Formatter(fmt='%(levelname)s:%(name)s:%(asctime)s %(message)s', + datefmt='%H:%M:%S') + +parser = argparse.ArgumentParser(description='XLNet QA example.' + 'We fine-tune the XLNet model on SQuAD dataset.') + +parser.add_argument('--only_predict', action='store_true', help='Whether to predict only.') + +parser.add_argument('--model_parameters', type=str, default=None, help='Model parameter file') + +parser.add_argument('--model', type=str, default='xlnet_cased_l12_h768_a12', + help='The name of pre-trained XLNet model to fine-tune') + +parser.add_argument('--dataset', type=str, default='126gb', + help='The dataset BERT pre-trained with.') + +parser.add_argument('--predict_file', default='./data/dev-v2.0.json', type=str, + help='SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json') + +parser.add_argument('--uncased', action='store_true', + help='if set, inputs are converted to lower case.') + +parser.add_argument( + '--output_dir', type=str, default='./output_dir', + help='The output directory where the model params will be written.' + ' default is ./output_dir') + +parser.add_argument('--epochs', type=int, default=3, help='number of epochs, default is 3') + +parser.add_argument('--batch_size', type=int, default=32, + help='Batch size. Number of examples per gpu in a minibatch. default is 32') + +parser.add_argument('--test_batch_size', type=int, default=24, + help='Test batch size. default is 24') + +parser.add_argument('--optimizer', type=str, default='bertadam', + help='optimization algorithm. default is bertadam') + +parser.add_argument( + '--accumulate', type=int, default=None, help='The number of batches for ' + 'gradients accumulation to simulate large batch size. Default is None') + +parser.add_argument('--lr', type=float, default=3e-5, help='Initial learning rate. default is 5e-5') + +parser.add_argument( + '--warmup_ratio', type=float, default=0, + help='ratio of warmup steps that linearly increase learning rate from ' + '0 to target learning rate. default is 0') + +parser.add_argument('--log_interval', type=int, default=10, help='report interval. default is 10') + +parser.add_argument( + '--max_seq_length', type=int, default=512, + help='The maximum total input sequence length after WordPiece tokenization.' + 'Sequences longer than this will be truncated, and sequences shorter ' + 'than this will be padded. default is 512') + +parser.add_argument( + '--doc_stride', type=int, default=128, + help='When splitting up a long document into chunks, how much stride to ' + 'take between chunks. default is 128') + +parser.add_argument( + '--max_query_length', type=int, default=64, + help='The maximum number of tokens for the question. Questions longer than ' + 'this will be truncated to this length. default is 64') + +parser.add_argument( + '--n_best_size', type=int, default=20, + help='The total number of n-best predictions to generate in the ' + 'nbest_predictions.json output file. default is 20') + +parser.add_argument( + '--max_answer_length', type=int, default=64, + help='The maximum length of an answer that can be generated. This is needed ' + 'because the start and end predictions are not conditioned on one another.' + ' default is 64') + +parser.add_argument('--version_2', action='store_true', + help='SQuAD examples whether contain some that do not have an answer.') + +parser.add_argument( + '--null_score_diff_threshold', type=float, default=0.0, + help='If null_score - best_non_null is greater than the threshold predict null.' + 'Typical values are between -1.0 and -5.0. default is 0.0') + +parser.add_argument('--gpu', type=int, default=None, + help='Number of gpus to use for finetuning. CPU is used if not set.') + +parser.add_argument('--sentencepiece', type=str, default=None, + help='Path to the sentencepiece .model file for both tokenization and vocab.') + +parser.add_argument('--debug', action='store_true', + help='Run the example in test mode for sanity checks') +parser.add_argument('--pretrained_xlnet_parameters', type=str, default=None, + help='Pre-trained bert model parameter file. default is None') + +parser.add_argument('--layerwise_decay', type=float, default=0.75, help='Layer-wise lr decay') +parser.add_argument('--wd', type=float, default=0.01, help='adam weight decay') +parser.add_argument('--seed', type=int, default=29, help='Random seed') +parser.add_argument('--start_top_n', type=int, default=5, help='to be added') +parser.add_argument('--end_top_n', type=int, default=5, help='to be added') +parser.add_argument('--dropout', type=float, default=0.1, help='dropout') +parser.add_argument('--attention_dropout', type=float, default=0.1, help='attention dropout') +parser.add_argument('--training_steps', type=int, help='training steps') +parser.add_argument('--raw', action='store_true', help='if do data preprocessing or load from pickled file') +parser.add_argument('--dev_dataset_file', default='./output_dir/out.dev', type=str, help='location of dev dataset') +parser.add_argument('--train_dataset_file', default='./output_dir/out.train', type=str, help='location of train dataset') + +args = parser.parse_args() + +# random seed +np.random.seed(args.seed) +random.seed(args.seed) +mx.random.seed(args.seed) + +if not os.path.exists(args.output_dir): + os.mkdir(args.output_dir) + +fh = logging.FileHandler(os.path.join(args.output_dir, 'finetune_squad.log')) +fh.setLevel(logging.INFO) +fh.setFormatter(formatter) +console = logging.StreamHandler() +console.setLevel(logging.INFO) +console.setFormatter(formatter) +log.addHandler(console) +log.addHandler(fh) + +log.info(args) + +pretrained_xlnet_parameters = args.pretrained_xlnet_parameters +if pretrained_xlnet_parameters and args.model_parameters: + raise ValueError('Cannot provide both pre-trained BERT parameters and ' + 'BertForQA model parameters.') + +ctx = [mx.cpu(0)] if not args.gpu else [mx.gpu(i) for i in range(args.gpu)] + +log_interval = args.log_interval * args.accumulate if args.accumulate else args.log_interval +if args.accumulate: + log.info('Using gradient accumulation. Effective batch size = %d', + args.accumulate * args.batch_size) +if args.max_seq_length <= args.max_query_length + 3: + raise ValueError('The max_seq_length (%d) must be greater than max_query_length ' + '(%d) + 3' % (args.max_seq_length, args.max_query_length)) + +# vocabulary and tokenizer + +get_pretrained = True + +get_model_params = { + 'name': args.model, + 'dataset_name': args.dataset, + 'pretrained': get_pretrained, + 'ctx': ctx, + 'use_decoder': False, + 'dropout': args.dropout, + 'attention_dropout': args.attention_dropout +} + +xlnet_base, vocab, tokenizer = model.get_model(**get_model_params) + +num_layers = len(xlnet_base._net.transformer_cells) +for (i, layer_parameters) in enumerate(xlnet_base._net.transformer_cells): + layer_params = layer_parameters.collect_params() + for key, value in layer_params.items(): + value.lr_mult = args.layerwise_decay**(num_layers - i - 1) + +batchify_fn = nlp.data.batchify.Tuple( + nlp.data.batchify.Stack(), + nlp.data.batchify.Stack(), # Already padded in data transform + nlp.data.batchify.Stack(), # Already padded in data transform + nlp.data.batchify.Stack('float32'), + nlp.data.batchify.Stack('float32'), + nlp.data.batchify.Stack('float32'), + nlp.data.batchify.Stack('float32'), + nlp.data.batchify.Stack('float32')) + +if pretrained_xlnet_parameters: + # only load XLnetModel parameters + nlp.utils.load_parameters(xlnet_base, pretrained_xlnet_parameters, ctx=ctx, ignore_extra=True, + cast_dtype=True) + +net = XLNetForQA(xlnet_base=xlnet_base, start_top_n=args.start_top_n, end_top_n=args.end_top_n, + version_2=args.version_2) +net_eval = XLNetForQA(xlnet_base=xlnet_base, start_top_n=args.start_top_n, end_top_n=args.end_top_n, + version_2=args.version_2, is_eval=True, params=net.collect_params()) + +initializer = mx.init.Normal(0.02) + +if args.model_parameters: + # load complete XLNetForQA parameters + nlp.utils.load_parameters(net, args.model_parameters, ctx=ctx, cast_dtype=True) +else: + net.start_logits.initialize(init=initializer, ctx=ctx) + net.end_logits.initialize(init=initializer, ctx=ctx) + if args.version_2: + net.answer_class.initialize(init=initializer, ctx=ctx) + +net.hybridize(static_alloc=True) +net_eval.hybridize(static_alloc=True) + + +def split_array(arr, num_of_splits): + """split an array into a number of splits""" + size = arr.shape[0] + if size < num_of_splits: + return [arr[i:i + 1] for i in range(size)] + slice_len, rest = divmod(size, num_of_splits) + div_points = [0] + [(slice_len * index + min(index, rest) + slice_len + (index < rest)) + for index in range(num_of_splits)] + slices = [arr[div_points[i]:div_points[i + 1]] for i in range(num_of_splits)] + return slices + + +def split_and_load(arrs, ctxs): + """split and load arrays to a list of contexts""" + assert isinstance(arrs, (list, tuple)) + # split and load + loaded_arrs = [[i.as_in_context(ctx) for i, ctx in zip(split_array(arr, len(ctxs)), ctxs)] + for arr in arrs] + return zip(*loaded_arrs) + + +def train(): + """Training function.""" + segment = 'train' if not args.debug else 'dev' + log.info('Loading %s data...', segment) + if args.version_2: + train_data = SQuAD(segment, version='2.0') + else: + train_data = SQuAD(segment, version='1.1') + if args.debug: + sampled_data = [train_data[i] for i in range(100)] + train_data = mx.gluon.data.SimpleDataset(sampled_data) + log.info('Number of records in Train data: %s', len(train_data)) + if args.raw: + train_data_transform = preprocess_dataset( + train_data, + SQuADTransform(copy.copy(tokenizer), vocab, max_seq_length=args.max_seq_length, + doc_stride=args.doc_stride, max_query_length=args.max_query_length, + is_pad=True, is_training=True), dataset_file=args.train_dataset_file) + else: + train_data_transform = preprocess_dataset(raw=False, dataset_file=args.train_dataset_file) + + log.info('The number of examples after preprocessing: %s', len(train_data_transform)) + + train_dataloader = mx.gluon.data.DataLoader(train_data_transform, batchify_fn=batchify_fn, + batch_size=args.batch_size, num_workers=4, + shuffle=True) + + optimizer_params = {'learning_rate': args.lr, 'wd': args.wd} + try: + trainer = mx.gluon.Trainer(net.collect_params(), args.optimizer, optimizer_params, + update_on_kvstore=False) + except ValueError as _: + warnings.warn('AdamW optimizer is not found. Please consider upgrading to ' + 'mxnet>=1.5.0. Now the original Adam optimizer is used instead.') + trainer = mx.gluon.Trainer(net.collect_params(), 'bertadam', optimizer_params, + update_on_kvstore=False) + + num_train_examples = len(train_data_transform) + step_size = args.batch_size * args.accumulate if args.accumulate else args.batch_size + num_train_steps = int(num_train_examples / step_size * args.epochs) + epoch_number = args.epochs + if args.training_steps: + num_train_steps = args.training_steps + epoch_number = 999 + + log.info('training steps=%d', num_train_steps) + num_warmup_steps = int(num_train_steps * args.warmup_ratio) + step_num = 0 + + def set_new_lr(step_num, batch_id): + """set new learning rate""" + # set grad to zero for gradient accumulation + if args.accumulate: + if batch_id % args.accumulate == 0: + net.collect_params().zero_grad() + step_num += 1 + else: + step_num += 1 + # learning rate schedule + # Notice that this learning rate scheduler is adapted from traditional linear learning + # rate scheduler where step_num >= num_warmup_steps, new_lr = 1 - step_num/num_train_steps + if step_num < num_warmup_steps: + new_lr = args.lr * step_num / num_warmup_steps + else: + offset = (step_num - num_warmup_steps) * args.lr / \ + (num_train_steps - num_warmup_steps) + new_lr = args.lr - offset + trainer.set_learning_rate(new_lr) + return step_num + + # Do not apply weight decay on LayerNorm and bias terms + for _, v in net.collect_params('.*beta|.*gamma|.*bias').items(): + v.wd_mult = 0.0 + # Collect differentiable parameters + params = [p for p in net.collect_params().values() if p.grad_req != 'null'] + # Set grad_req if gradient accumulation is required + if args.accumulate: + for p in params: + p.grad_req = 'add' + + epoch_tic = time.time() + total_num = 0 + log_num = 0 + finish_flag = False + for epoch_id in range(epoch_number): + step_loss = 0.0 + step_loss_span = 0 + step_loss_cls = 0 + tic = time.time() + if finish_flag: + break + for batch_id, data in enumerate(train_dataloader): + # set new lr + step_num = set_new_lr(step_num, batch_id) + data_list = list(split_and_load(data, ctx)) + # forward and backward + batch_loss = [] + batch_loss_sep = [] + with mx.autograd.record(): + for splited_data in data_list: + _, inputs, token_types, valid_length, p_mask, start_label, end_label, _is_impossible = splited_data # pylint: disable=line-too-long + valid_length = valid_length.astype('float32') + is_impossible = _is_impossible if args.version_2 else None + log_num += len(inputs) + total_num += len(inputs) + out_sep, out = net( + inputs, + token_types, + valid_length, + [start_label, end_label], + p_mask=p_mask, # pylint: disable=line-too-long + is_impossible=is_impossible) + ls = out.mean() / len(ctx) + if args.accumulate: + ls = ls / args.accumulate + batch_loss_sep.append(out_sep) + batch_loss.append(ls) + ls.backward() + # update + if not args.accumulate or (batch_id + 1) % args.accumulate == 0: + trainer.allreduce_grads() + nlp.utils.clip_grad_global_norm(params, 1) + trainer.update(1, ignore_stale_grad=True) + if args.version_2: + step_loss_sep_tmp = np.array([[span_ls.mean().asscalar(), cls_ls.mean().asscalar()] for span_ls, cls_ls in batch_loss_sep]) + step_loss_sep_tmp = list(np.sum(step_loss_sep_tmp, axis=0)) + step_loss_span += step_loss_sep_tmp[0] + step_loss_cls += step_loss_sep_tmp[1] + + step_loss += sum([ls.asscalar() for ls in batch_loss]) + if (batch_id + 1) % log_interval == 0: + toc = time.time() + log.info( + 'Epoch: %d, Batch: %d/%d, Loss=%.4f, lr=%.7f Time cost=%.1f Thoughput=%.2f samples/s' # pylint: disable=line-too-long + , + epoch_id + 1, + batch_id + 1, + len(train_dataloader), + step_loss / log_interval, + trainer.learning_rate, + toc - tic, + log_num / (toc - tic)) + + if args.version_2: + if args.accumulate: + step_loss_span = step_loss_span / args.accumulate + step_loss_cls = step_loss_cls / args.accumulate + log.info('span_loss: %.4f, cls_loss: %.4f', step_loss_span / log_interval, step_loss_cls / log_interval) + + tic = time.time() + step_loss = 0.0 + step_loss_span = 0 + step_loss_cls = 0 + log_num = 0 + if step_num >= num_train_steps: + logging.info('Finish training step: %d', step_num) + finish_flag = True + break + epoch_toc = time.time() + log.info('Time cost=%.2f s, Thoughput=%.2f samples/s', epoch_toc - epoch_tic, + total_num / (epoch_toc - epoch_tic)) + ckpt_name = 'model_xlnet_squad_{0}.params'.format(epoch_id + 1) + params_saved = os.path.join(args.output_dir, ckpt_name) + nlp.utils.save_parameters(net, params_saved) + log.info('params saved in: %s', params_saved) + + +RawResultExtended = collections.namedtuple( + 'RawResultExtended', + ['start_top_log_probs', 'start_top_index', 'end_top_log_probs', 'end_top_index', 'cls_logits']) + + +def evaluate(prefix='p'): + """Evaluate the model on validation dataset. + """ + log.info('Loading dev data...') + if args.version_2: + dev_data = SQuAD('dev', version='2.0') + else: + dev_data = SQuAD('dev', version='1.1') + (_, _), (data_file_name, _) \ + = dev_data._data_file[dev_data._version][dev_data._segment] + dev_data_path = os.path.join(dev_data._root, data_file_name) + + if args.debug: + sampled_data = [dev_data[0], dev_data[1], dev_data[2]] + dev_data = mx.gluon.data.SimpleDataset(sampled_data) + log.info('Number of records in dev data: %d', len(dev_data)) + + + if args.raw: + dev_dataset = dev_data.transform( + SQuADTransform(copy.copy(tokenizer), vocab, max_seq_length=args.max_seq_length, + doc_stride=args.doc_stride, max_query_length=args.max_query_length, + is_pad=True, is_training=False)._transform, lazy=False) + with open(args.dev_dataset_file, 'wb') as file: + pickle.dump(list(dev_dataset), file) + else: + with open(args.dev_dataset_file , 'rb') as file: + dev_dataset = pickle.load(file) + dev_dataset = mx.gluon.data.SimpleDataset(dev_dataset) + + dev_data_transform = convert_examples_to_inputs(dev_dataset) + + log.info('The number of examples after preprocessing: %d', len(dev_data_transform)) + + dev_dataloader = mx.gluon.data.DataLoader(dev_data_transform, batchify_fn=batchify_fn, + num_workers=4, batch_size=args.test_batch_size, + shuffle=False, last_batch='keep') + + log.info('start prediction') + + all_results = collections.defaultdict(list) + + epoch_tic = time.time() + total_num = 0 + for (batch_id, data) in enumerate(dev_dataloader): + data_list = list(split_and_load(data, ctx)) + for splited_data in data_list: + example_ids, inputs, token_types, valid_length, p_mask, _, _, _ = splited_data + total_num += len(inputs) + outputs = net_eval(inputs, token_types, valid_length, p_mask=p_mask) + example_ids = example_ids.asnumpy().tolist() + for c, example_ids in enumerate(example_ids): + result = RawResultExtended( + start_top_log_probs=outputs[0][c].asnumpy().tolist(), + start_top_index=outputs[1][c].asnumpy().tolist(), + end_top_log_probs=outputs[2][c].asnumpy().tolist(), + end_top_index=outputs[3][c].asnumpy().tolist(), + cls_logits=outputs[4][c].asnumpy().tolist() + if outputs[4] is not None else [-1e30]) + all_results[example_ids].append(result) + if batch_id % args.log_interval == 0: + log.info('Batch: %d/%d', batch_id + 1, len(dev_dataloader)) + + epoch_toc = time.time() + log.info('Time cost=%2f s, Thoughput=%.2f samples/s', epoch_toc - epoch_tic, + total_num / (epoch_toc - epoch_tic)) + + log.info('Get prediction results...') + + all_predictions = collections.OrderedDict() + all_nbest_json = collections.OrderedDict() + scores_diff_json = collections.OrderedDict() + for features in dev_dataset: + results = all_results[features[0].example_id] + example_qas_id = features[0].qas_id + score_diff, best_non_null_entry, nbest_json = predict_extended( + features=features, results=results, + sp_model=nlp.data.SentencepieceTokenizer(tokenizer._sentencepiece_path)._processor, + n_best_size=args.n_best_size, + max_answer_length=args.max_answer_length, start_n_top=args.start_top_n, + end_n_top=args.end_top_n) + scores_diff_json[example_qas_id] = score_diff + all_predictions[example_qas_id] = best_non_null_entry + all_nbest_json[example_qas_id] = nbest_json + + output_prediction_file = os.path.join(args.output_dir, 'predictions_{}.json'.format(prefix)) + output_nbest_file = os.path.join(args.output_dir, 'nbest_predictions_{}.json'.format(prefix)) + if args.version_2: + output_null_log_odds_file = os.path.join(args.output_dir, + 'null_odds_{}.json'.format(prefix)) + else: + output_null_log_odds_file = None + + with open(output_prediction_file, 'w') as writer: + writer.write(json.dumps(all_predictions, indent=4) + '\n') + with open(output_nbest_file, 'w') as writer: + writer.write(json.dumps(all_nbest_json, indent=4) + '\n') + if args.version_2: + with open(output_null_log_odds_file, 'w') as writer: + writer.write(json.dumps(scores_diff_json, indent=4) + '\n') + + if args.version_2: + evaluate_options = EVAL_OPTS(data_file=dev_data_path, pred_file=output_prediction_file, + na_prob_file=output_null_log_odds_file, + na_prob_thresh=args.null_score_diff_threshold) + else: + evaluate_options = EVAL_OPTS(data_file=dev_data_path, pred_file=output_prediction_file, + na_prob_file=None, na_prob_thresh=args.null_score_diff_threshold) + + results = evaluate_on_squad(evaluate_options) + return results + + +if __name__ == '__main__': + if not args.only_predict: + train() + evaluate() + else: + evaluate() From 5529b38780ae80785084dc9e0770afea513f3915 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 15 Jan 2020 16:32:03 +0800 Subject: [PATCH 28/59] update new features --- scripts/bert/data/preprocessing_utils.py | 83 +-- scripts/bert/finetune_classifier.py | 36 +- scripts/bert/finetune_squad.py | 36 +- scripts/language_model/data/qa.py | 690 ------------------- scripts/language_model/run_glue.py | 323 ++++++--- scripts/language_model/run_squad.py | 708 +++++++++++++++----- scripts/language_model/xlnet_qa_evaluate.py | 152 +++++ 7 files changed, 1008 insertions(+), 1020 deletions(-) delete mode 100644 scripts/language_model/data/qa.py create mode 100644 scripts/language_model/xlnet_qa_evaluate.py diff --git a/scripts/bert/data/preprocessing_utils.py b/scripts/bert/data/preprocessing_utils.py index ef92ac552a..9c47bd32e1 100644 --- a/scripts/bert/data/preprocessing_utils.py +++ b/scripts/bert/data/preprocessing_utils.py @@ -45,7 +45,7 @@ def truncate_seqs_equal(seqs, max_len): return seqs -def concat_sequences(seqs, separators, separator_mask=[]): +def concat_sequences(seqs, separators, separator_mask=None): """ Insert special tokens for sequence list or a single sequence. For sequence pairs, the input is a list of 2 strings: @@ -80,6 +80,8 @@ def concat_sequences(seqs, separators, separator_mask=[]): np.array: mask for special tokens """ assert isinstance(seqs, collections.abc.Iterable) and len(seqs) > 0 + if not separator_mask: + separator_mask = [] concat = sum(( seq + sep for sep, seq in itertools.zip_longest(separators, seqs, fillvalue=[])), @@ -93,41 +95,17 @@ def concat_sequences(seqs, separators, separator_mask=[]): []) return concat, segment_ids, p_mask -def concat_sequences_2(seqs, separators, separator_mask=[]): - """ - Insert special tokens for sequence list or a single sequence. - For sequence pairs, the input is a list of 2 strings: - text_a, text_b. - Inputs: - text_a: 'is this jacksonville ?' - text_b: 'no it is not' - separator: [[SEP], [SEP]] - - Processed: - tokens: 'is this jacksonville ? [SEP] no it is not . [SEP]' - segment_ids: 0 0 0 0 0 1 1 1 1 1 1 - p_mask: 0 0 0 0 1 0 0 0 0 0 1 - valid_length: 11 - - Parameters - ---------- - separator : list - The special tokens to be appended to each sequence. For example: - Given: - seqs: [[1, 2], [3, 4], [5, 6]] - separator: [[], 7] - it will be: - [1, 2, 3, 4, 7, 5, 6] - - seqs : list of sequences or a single sequence - Returns - ------- - np.array: input token ids in 'int32', shape (batch_size, seq_length) - np.array: segment ids in 'int32', shape (batch_size, seq_length) - np.array: mask for special tokens - """ +def concat_sequences_extended(seqs, + separators, + seq_p_mask, + separator_mask=None): + """TBA""" assert isinstance(seqs, collections.abc.Iterable) and len(seqs) > 0 + assert len(seq_p_mask) == len(seqs), 'sequence position mask ' \ + 'should have the same length with sequences.' + if not separator_mask: + separator_mask = [] concat = sum(( seq + sep for sep, seq in itertools.zip_longest(separators, seqs, fillvalue=[])), @@ -135,12 +113,12 @@ def concat_sequences_2(seqs, separators, separator_mask=[]): segment_ids = sum( ([i] * (len(seq) + len(sep)) for i, (sep, seq) in enumerate( itertools.zip_longest(separators, seqs, fillvalue=[]))), []) - p_mask = sum(( - [0] * len(seq) + mask - for sep, seq, mask in itertools.zip_longest(separators, seqs, separator_mask, fillvalue=[])), - []) + p_mask = sum( + (s_mask + mask for sep, seq, s_mask, mask in itertools.zip_longest( + separators, seqs, seq_p_mask, separator_mask, fillvalue=[])), []) return concat, segment_ids, p_mask + def tokenize_and_align_positions(origin_text, start_position, end_position, tokenizer): """Tokenize the text and align the origin positions to the corresponding position""" @@ -198,7 +176,8 @@ def align_position2doc_spans(positions, if not isinstance(positions, list): positions = [positions] doc_start, doc_end = doc_spans_indices - if all_in_span and not all([p in range(doc_start, doc_end) for p in positions]): + if all_in_span and not all( + [p in range(doc_start, doc_end) for p in positions]): return [default_value] * len(positions) new_positions = [ p - doc_start + @@ -290,8 +269,8 @@ def check_is_max_context(doc_spans, cur_span_index, position): SquadExample = collections.namedtuple('SquadExample', [ - 'qas_id', 'question_text', 'paragraph_text', 'doc_tokens', 'example_id', 'orig_answer_text', - 'start_position', 'end_position', 'is_impossible' + 'qas_id', 'question_text', 'paragraph_text', 'doc_tokens', 'example_id', + 'orig_answer_text', 'start_position', 'end_position', 'is_impossible' ]) @@ -343,12 +322,14 @@ def convert_squad_examples(record, is_training): return example -def preprocess_text(inputs, lower=False, remove_space=True, keep_accents=False): +def preprocess_text(inputs, lower=False, remove_space=True, + keep_accents=False): + """Simple text preprocess""" if remove_space: outputs = ' '.join(inputs.strip().split()) else: outputs = inputs - outputs = outputs.replace("``", '"').replace("''", '"') + outputs = outputs.replace('``', '"').replace('\'\'', '"') if not keep_accents: outputs = unicodedata.normalize('NFKD', outputs) outputs = ''.join([c for c in outputs if not unicodedata.combining(c)]) @@ -359,6 +340,7 @@ def preprocess_text(inputs, lower=False, remove_space=True, keep_accents=False): def _convert_index(index, pos, M=None, is_start=True): + """Working together with _lcs_match(), convert the token index to context index""" if index[pos] is not None: return index[pos] N = len(index) @@ -395,12 +377,17 @@ def _convert_index(index, pos, M=None, is_start=True): return index[front] -def _lcs_match(max_dist, seq1, seq2, max_first_seq_len, max_second_seq_len, lower=False): - f = np.zeros((max(len(seq1), 1024), max(len(seq2), 1024)), dtype=np.float32) +def _lcs_match(max_dist, seq1, seq2, lower=False): + """unlike standard LCS, this is specifically optimized for the setting + because the mismatch between sentence pieces and original text will be small + """ + f = np.zeros((max(len(seq1), 1024), max(len(seq2), 1024)), + dtype=np.float32) g = {} - for i in range(max_first_seq_len): + for i, token in enumerate(seq1): for j in range(i - max_dist, i + max_dist): - if j >= max_second_seq_len or j < 0: continue + if j >= len(seq2) or j < 0: + continue if i > 0: g[(i, j)] = 0 @@ -411,7 +398,7 @@ def _lcs_match(max_dist, seq1, seq2, max_first_seq_len, max_second_seq_len, lowe f[i, j] = f[i, j - 1] f_prev = f[i - 1, j - 1] if i > 0 and j > 0 else 0 - if (preprocess_text(seq1[i], lower=lower, + if (preprocess_text(token, lower=lower, remove_space=False) == seq2[j] and f_prev + 1 > f[i, j]): g[(i, j)] = 2 diff --git a/scripts/bert/finetune_classifier.py b/scripts/bert/finetune_classifier.py index 42e9fd4fa1..f6924eb364 100644 --- a/scripts/bert/finetune_classifier.py +++ b/scripts/bert/finetune_classifier.py @@ -72,7 +72,16 @@ parser = argparse.ArgumentParser( description='BERT fine-tune examples for classification/regression tasks.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) + +parser.add_argument('--optimizer', + type='str', + default='bertadam', + help='The optimizer to be used for training') parser.add_argument('--epochs', type=int, default=3, help='number of epochs.') +parser.add_argument('--training_steps', + type=int, + help='The total training steps. ' + 'Note that if specified, epochs will be ignored.') parser.add_argument( '--batch_size', type=int, @@ -495,14 +504,20 @@ def train(metric): all_model_params = model.collect_params() optimizer_params = {'learning_rate': lr, 'epsilon': epsilon, 'wd': 0.01} trainer = gluon.Trainer(all_model_params, - 'bertadam', + args.optimizer, optimizer_params, update_on_kvstore=False) if args.dtype == 'float16': amp.init_trainer(trainer) + epoch_number = args.epochs step_size = batch_size * accumulate if accumulate else batch_size num_train_steps = int(num_train_examples / step_size * args.epochs) + if args.training_steps: + num_train_steps = args.training_steps + epoch_number = 9999 + + logging.info('training steps=%d', num_train_steps) warmup_ratio = args.warmup_ratio num_warmup_steps = int(num_train_steps * warmup_ratio) step_num = 0 @@ -523,10 +538,13 @@ def train(metric): patience = args.early_stop tic = time.time() - for epoch_id in range(args.epochs): + finish_flag = False + for epoch_id in range(epoch_number): if args.early_stop and patience == 0: logging.info('Early stopping at epoch %d', epoch_id) break + if finish_flag: + break if not only_inference: metric.reset() step_loss = 0 @@ -574,7 +592,7 @@ def train(metric): all_model_params.zero_grad() step_loss += ls.asscalar() - if do_regression: + if not do_regression: label = label.reshape((-1)) metric.update([label], [out]) if (batch_id + 1) % (args.log_interval) == 0: @@ -582,6 +600,10 @@ def train(metric): args.log_interval, epoch_id, trainer.learning_rate) step_loss = 0 + if step_num >= num_train_steps: + logging.info('Finish training step: %d', step_num) + finish_flag = True + break mx.nd.waitall() # inference on dev data @@ -630,8 +652,6 @@ def evaluate(loader_dev, metric, segment): metric.reset() step_loss = 0 tic = time.time() - label_list = [] - out_list = [] for batch_id, seqs in enumerate(loader_dev): input_ids, valid_length, segment_ids, label = seqs input_ids = input_ids.as_in_context(ctx) @@ -642,12 +662,10 @@ def evaluate(loader_dev, metric, segment): else: out = model(input_ids, segment_ids.as_in_context(ctx), valid_length) - label_list.append(label.as_in_context(mx.cpu(0))) - out_list.append(out.as_in_context(mx.cpu(0))) - ls = loss_function(out, label).mean() + ls = loss_function(out, label).mean() step_loss += ls.asscalar() - if do_regression: + if not do_regression: label = label.reshape((-1)) metric.update([label], [out]) if (batch_id + 1) % (args.log_interval) == 0: diff --git a/scripts/bert/finetune_squad.py b/scripts/bert/finetune_squad.py index afc57bb86d..54b2a964fd 100644 --- a/scripts/bert/finetune_squad.py +++ b/scripts/bert/finetune_squad.py @@ -112,7 +112,10 @@ type=int, default=3, help='number of epochs, default is 3') - +parser.add_argument('--training_steps', + type=int, + help='training steps, epochs will be ignored ' + 'if trainin_steps is specified.') parser.add_argument( '--batch_size', type=int, @@ -221,7 +224,9 @@ action='store_true', help='Run the example in test mode for sanity checks') -parser.add_argument('--load_feature_from_pickle', action='store_true', help='load features from file if set') +parser.add_argument('--load_feature_from_pickle', + action='store_true', + help='load features from file if set') args = parser.parse_args() output_dir = args.output_dir @@ -373,12 +378,17 @@ def train(): log.info('Start Training') optimizer_params = {'learning_rate': lr} - trainer = mx.gluon.Trainer(net.collect_params(), optimizer, - optimizer_params, update_on_kvstore=False) - + trainer = mx.gluon.Trainer(net.collect_params(), + optimizer, + optimizer_params, + update_on_kvstore=False) num_train_examples = len(train_data_transform) step_size = batch_size * accumulate if accumulate else batch_size - num_train_steps = int(num_train_examples / step_size * epochs) + num_train_steps = int(num_train_examples / step_size * args.epochs) + if args.training_steps: + num_train_steps = args.training_steps + epochs = 9999 + num_warmup_steps = int(num_train_steps * warmup_ratio) step_num = 0 @@ -416,9 +426,12 @@ def set_new_lr(step_num, batch_id): epoch_tic = time.time() total_num = 0 log_num = 0 + finish_flag = False for epoch_id in range(epochs): step_loss = 0.0 tic = time.time() + if finish_flag: + break for batch_id, data in enumerate(train_dataloader): # set new lr step_num = set_new_lr(step_num, batch_id) @@ -461,6 +474,11 @@ def set_new_lr(step_num, batch_id): tic = time.time() step_loss = 0.0 log_num = 0 + + if step_num >= num_train_steps: + log.info('Finish training step: %d', step_num) + finish_flag = True + break epoch_toc = time.time() log.info('Time cost={:.2f} s, Thoughput={:.2f} samples/s'.format( epoch_toc - epoch_tic, total_num / (epoch_toc - epoch_tic))) @@ -671,7 +689,8 @@ def preprocess_dataset(tokenizer, pool = mp.Pool(num_workers) start = time.time() if not load_from_pickle: - example_trans = partial(convert_squad_examples, is_training=input_features) + example_trans = partial(convert_squad_examples, + is_training=input_features) # convert the raw dataset into raw features examples = pool.map(example_trans, dataset) raw_features = pool.map(trans, examples) @@ -699,8 +718,7 @@ def preprocess_dataset(tokenizer, example[10], # start_position, example[11])) # end_position else: - data_feature = mx.gluon.data.SimpleDataset( - list(raw_features)) + data_feature = mx.gluon.data.SimpleDataset(list(raw_features)) end = time.time() pool.close() diff --git a/scripts/language_model/data/qa.py b/scripts/language_model/data/qa.py deleted file mode 100644 index 01e4df7afb..0000000000 --- a/scripts/language_model/data/qa.py +++ /dev/null @@ -1,690 +0,0 @@ -# Copyright 2018 The Google AI Language Team Authors and DMLC. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""XLNet for QA datasets.""" -import collections -import multiprocessing as mp -import unicodedata -import gc -from mxnet.gluon.data import SimpleDataset -import numpy as np -import gluonnlp as nlp -__all__ = ['SQuADTransform'] - - -class SquadExample: - """A single training/test example for SQuAD question. - - For examples without an answer, the start and end position are -1. - """ - def __init__(self, - qas_id, - question_text, - paragraph_text, - example_id, - orig_answer_text=None, - start_position=None, - is_impossible=False): - self.qas_id = qas_id - self.question_text = question_text - self.paragraph_text = paragraph_text - self.orig_answer_text = orig_answer_text - self.start_position = start_position - self.is_impossible = is_impossible - self.example_id = example_id - - -def convert_single_example_to_input(example): - """convert a single example into necessary features for model input""" - features = [] - for _example in example: - feature = [] - feature.append(_example.example_id) - feature.append(_example.input_ids) - feature.append(_example.segment_ids) - feature.append(_example.valid_length) - feature.append(_example.p_mask) - feature.append(_example.start_position) - feature.append(_example.end_position) - feature.append(_example.is_impossible) - feature.append(len(_example.input_ids)) - features.append(feature) - return features - - -def convert_examples_to_inputs(examples, num_workers=8): - """convert examples into necessary features for model input""" - pool = mp.Pool(num_workers) - dataset_transform = [] - for data in pool.map(convert_single_example_to_input, examples): - if data: - for _data in data: - dataset_transform.append(_data[:-1]) - dataset = SimpleDataset(dataset_transform).transform( - lambda x: (x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7])) - - pool.close() - return dataset - - -def _encode_pieces(sp_model, text, sample=False): - """apply sentence pieces to raw text""" - if not sample: - pieces = sp_model.EncodeAsPieces(text) - else: - pieces = sp_model.SampleEncodeAsPieces(text, 64, 0.1) - - new_pieces = [] - for piece in pieces: - if len(piece) > 1 and piece[-1] == ',' and piece[-2].isdigit(): - cur_pieces = sp_model.EncodeAsPieces(piece[:-1].replace(u'▁', '')) - if piece[0] != u'▁' and cur_pieces[0][0] == u'▁': - if len(cur_pieces[0]) == 1: - cur_pieces = cur_pieces[1:] - else: - cur_pieces[0] = cur_pieces[0][1:] - cur_pieces.append(piece[-1]) - new_pieces.extend(cur_pieces) - else: - new_pieces.append(piece) - return new_pieces - - -class InputFeatures: - """A single set of features of data.""" - def __init__(self, - example_id, - qas_id, - doc_span_index, - tok_start_to_orig_index, - tok_end_to_orig_index, - token_is_max_context, - input_ids, - tokens, - valid_length, - p_mask, - segment_ids, - paragraph_text, - paragraph_len, - cls_index, - start_position=None, - end_position=None, - is_impossible=None): - self.example_id = example_id - self.qas_id = qas_id - self.doc_span_index = doc_span_index - self.tok_start_to_orig_index = tok_start_to_orig_index - self.tok_end_to_orig_index = tok_end_to_orig_index - self.token_is_max_context = token_is_max_context - self.input_ids = input_ids - self.tokens = tokens - self.valid_length = valid_length - self.p_mask = p_mask - self.segment_ids = segment_ids - self.paragraph_text = paragraph_text - self.paragraph_len = paragraph_len - self.cls_index = cls_index - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - - -def _convert_index(index, pos, M=None, is_start=True): - """convert tokenized index to corresponding origin text index""" - if index[pos] is not None: - return index[pos] - N = len(index) - rear = pos - while rear < N - 1 and index[rear] is None: - rear += 1 - front = pos - while front > 0 and index[front] is None: - front -= 1 - assert index[front] is not None or index[rear] is not None - if index[front] is None: - if index[rear] >= 1: - if is_start: - return 0 - else: - return index[rear] - 1 - return index[rear] - if index[rear] is None: - if M is not None and index[front] < M - 1: - if is_start: - return index[front] + 1 - else: - return M - 1 - return index[front] - if is_start: - if index[rear] > index[front] + 1: - return index[front] + 1 - else: - return index[rear] - else: - if index[rear] > index[front] + 1: - return index[rear] - 1 - else: - return index[front] - - -def preprocess_text(inputs, lower=False, remove_space=True, - keep_accents=False): - """simple text clean""" - if remove_space: - outputs = ' '.join(inputs.strip().split()) - else: - outputs = inputs - outputs = outputs.replace('``', '"').replace('\'\'', '"') - if not keep_accents: - outputs = unicodedata.normalize('NFKD', outputs) - outputs = ''.join([c for c in outputs if not unicodedata.combining(c)]) - if lower: - outputs = outputs.lower() - - return outputs - - -class SQuADTransform: - """Dataset Transformation for XLNet-style QA. - - The transformation is processed in the following steps: - - Convert from gluonnlp.data.SQuAD's record to SquadExample. - - Tokenize the question_text in the example. - - For examples where the document is too long, - use a sliding window to split into multiple features and - record whether each token is a maximum context. - - Tokenize the split document chunks. - - Combine the token of question_text with the token - of the document and insert [CLS] and [SEP]. - - Generate the start position and end position of the answer. - - Generate valid length. - - E.g: - - Inputs: - - question_text: 'When did BBC Japan begin broadcasting?' - doc_tokens: ['BBC','Japan','was','a','general','entertainment','channel,', - 'which','operated','between','December','2004','and','April', - '2006.','It','ceased','operations','after','its','Japanese', - 'distributor','folded.'] - start_position: 10 - end_position: 11 - orig_answer_text: 'December 2004' - - Processed: - - tokens: ['when','did','bbc','japan','begin','broadcasting','?', - '[SEP]','bbc','japan','was','a','general','entertainment','channel', - ',','which','operated','between','december','2004','and','april', - '2006','.','it','ceased','operations','after','its','japanese', - 'distributor','folded','.','[SEP]','[CLS]'] - segment_ids: [0,0,0,0,0,0,0,0,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] - start_position: 19 - end_position: 20 - valid_length: 36 - - Because of the sliding window approach taken to scoring documents, a single - token can appear in multiple documents. - So you need to record whether each token is a maximum context. E.g. - Doc: the man went to the store and bought a gallon of milk - Span A: the man went to the - Span B: to the store and bought - Span C: and bought a gallon of - ... - - Now the word 'bought' will have two scores from spans B and C. We only - want to consider the score with "maximum context", which we define as - the *minimum* of its left and right context (the *sum* of left and - right context will always be the same, of course). - - In the example the maximum context for 'bought' would be span C since - it has 1 left context and 3 right context, while span B has 4 left context - and 0 right context. - - Parameters - ---------- - tokenizer : XLNetTokenizer. - Tokenizer for the sentences. - labels : list of int. - List of all label ids for the classification task. - max_seq_length : int, default 384 - Maximum sequence length of the sentences. - doc_stride : int, default 128 - When splitting up a long document into chunks, - how much stride to take between chunks. - max_query_length : int, default 64 - The maximum length of the query tokens. - is_pad : bool, default True - Whether to pad the sentences to maximum length. - is_training : bool, default True - Whether to run training. - do_lookup : bool, default True - Whether to do vocabulary lookup for convert tokens to indices. - """ - def __init__(self, - tokenizer, - vocab, - max_seq_length=384, - doc_stride=128, - max_query_length=64, - is_pad=True, - uncased=False, - is_training=True): - self.tokenizer = tokenizer - self.vocab = vocab - self.max_seq_length = max_seq_length - self.max_query_length = max_query_length - self.doc_stride = doc_stride - self.is_pad = is_pad - self.is_training = is_training - self.uncased = uncased - - def _is_whitespace(self, c): - if c == ' ' or c == '\t' or c == '\r' or c == '\n' or ord(c) == 0x202F: - return True - return False - - def _toSquadExample(self, record): - example_id = record[0] - qas_id = record[1] - question_text = record[2] - paragraph_text = record[3] - orig_answer_text = record[4][0] if record[4] else '' - answer_offset = record[5][0] if record[5] else '' - is_impossible = record[6] if len(record) == 7 else False - - example = SquadExample(qas_id=qas_id, - question_text=question_text, - paragraph_text=paragraph_text, - example_id=example_id, - orig_answer_text=orig_answer_text, - start_position=answer_offset, - is_impossible=is_impossible) - return example - - def _transform(self, *record): - """Loads a data file into a list of `InputBatch`s.""" - - example = self._toSquadExample(record) - sp_model = nlp.data.SentencepieceTokenizer( - self.tokenizer._sentencepiece_path)._processor - max_N, max_M = 1024, 1024 - f = np.zeros((max_N, max_M), dtype=np.float32) - - query_tokens = self.tokenizer(example.question_text) - if len(query_tokens) > self.max_query_length: - query_tokens = query_tokens[0:self.max_query_length] - query_tokens = self.vocab.to_indices(query_tokens) - - paragraph_text = example.paragraph_text - para_tokens = _encode_pieces( - sp_model, preprocess_text(example.paragraph_text, self.uncased)) - - chartok_to_tok_index = [] - tok_start_to_chartok_index = [] - tok_end_to_chartok_index = [] - char_cnt = 0 - for i, token in enumerate(para_tokens): - chartok_to_tok_index.extend([i] * len(token)) - tok_start_to_chartok_index.append(char_cnt) - char_cnt += len(token) - tok_end_to_chartok_index.append(char_cnt - 1) - - tok_cat_text = ''.join(para_tokens).replace(u'▁', ' ') - N, M = len(paragraph_text), len(tok_cat_text) - - if N > max_N or M > max_M: - max_N = max(N, max_N) - max_M = max(M, max_M) - f = np.zeros((max_N, max_M), dtype=np.float32) - gc.collect() - - g = {} - - def _lcs_match(max_dist): - f.fill(0) - g.clear() - - ### longest common sub sequence - # f[i, j] = max(f[i - 1, j], f[i, j - 1], f[i - 1, j - 1] + match(i, j)) - for i in range(N): - - # note(zhiliny): - # unlike standard LCS, this is specifically optimized for the setting - # because the mismatch between sentence pieces and original text will - # be small - for j in range(i - max_dist, i + max_dist): - if j >= M or j < 0: - continue - if i > 0: - g[(i, j)] = 0 - f[i, j] = f[i - 1, j] - - if j > 0 and f[i, j - 1] > f[i, j]: - g[(i, j)] = 1 - f[i, j] = f[i, j - 1] - - f_prev = f[i - 1, j - 1] if i > 0 and j > 0 else 0 - if (preprocess_text(paragraph_text[i], - lower=self.uncased, - remove_space=False) == tok_cat_text[j] - and f_prev + 1 > f[i, j]): - g[(i, j)] = 2 - f[i, j] = f_prev + 1 - - max_dist = abs(N - M) + 5 - for _ in range(2): - _lcs_match(max_dist) - if f[N - 1, M - 1] > 0.8 * N: - break - max_dist *= 2 - - orig_to_chartok_index = [None] * N - chartok_to_orig_index = [None] * M - i, j = N - 1, M - 1 - while i >= 0 and j >= 0: - if (i, j) not in g: - break - if g[(i, j)] == 2: - orig_to_chartok_index[i] = j - chartok_to_orig_index[j] = i - i, j = i - 1, j - 1 - elif g[(i, j)] == 1: - j = j - 1 - else: - i = i - 1 - - if all(v is None - for v in orig_to_chartok_index) or f[N - 1, M - 1] < 0.8 * N: - print('MISMATCH DETECTED!') - return None - - tok_start_to_orig_index = [] - tok_end_to_orig_index = [] - for i in range(len(para_tokens)): - start_chartok_pos = tok_start_to_chartok_index[i] - end_chartok_pos = tok_end_to_chartok_index[i] - start_orig_pos = _convert_index(chartok_to_orig_index, - start_chartok_pos, - N, - is_start=True) - end_orig_pos = _convert_index(chartok_to_orig_index, - end_chartok_pos, - N, - is_start=False) - - tok_start_to_orig_index.append(start_orig_pos) - tok_end_to_orig_index.append(end_orig_pos) - - if not self.is_training: - tok_start_position = tok_end_position = None - - if self.is_training and example.is_impossible: - tok_start_position = -1 - tok_end_position = -1 - - if self.is_training and not example.is_impossible: - start_position = example.start_position - end_position = start_position + len(example.orig_answer_text) - 1 - - start_chartok_pos = _convert_index(orig_to_chartok_index, - start_position, - is_start=True) - tok_start_position = chartok_to_tok_index[start_chartok_pos] - - end_chartok_pos = _convert_index(orig_to_chartok_index, - end_position, - is_start=False) - tok_end_position = chartok_to_tok_index[end_chartok_pos] - assert tok_start_position <= tok_end_position - - def _piece_to_id(x): - return sp_model.PieceToId(x) - - all_doc_tokens = list(map(_piece_to_id, para_tokens)) - - # The -3 accounts for [CLS], [SEP] and [SEP] - max_tokens_for_doc = self.max_seq_length - len(query_tokens) - 3 - - # We can have documents that are longer than the maximum sequence length. - # To deal with this we do a sliding window approach, where we take chunks - # of the up to our max length with a stride of `doc_stride`. - _DocSpan = collections.namedtuple( # pylint: disable=invalid-name - 'DocSpan', ['start', 'length']) - doc_spans = [] - features = [] - start_offset = 0 - while start_offset < len(all_doc_tokens): - length = len(all_doc_tokens) - start_offset - if length > max_tokens_for_doc: - length = max_tokens_for_doc - doc_spans.append(_DocSpan(start=start_offset, length=length)) - if start_offset + length == len(all_doc_tokens): - break - start_offset += min(length, self.doc_stride) - for (doc_span_index, doc_span) in enumerate(doc_spans): - tokens = [] - token_is_max_context = {} - segment_ids = [] - p_mask = [] - - cur_tok_start_to_orig_index = [] - cur_tok_end_to_orig_index = [] - - for i in range(doc_span.length): - split_token_index = doc_span.start + i - - cur_tok_start_to_orig_index.append( - tok_start_to_orig_index[split_token_index]) - cur_tok_end_to_orig_index.append( - tok_end_to_orig_index[split_token_index]) - - is_max_context = _check_is_max_context(doc_spans, - doc_span_index, - split_token_index) - token_is_max_context[len(tokens)] = is_max_context - tokens.append(all_doc_tokens[split_token_index]) - segment_ids.append(0) - p_mask.append(0) - - paragraph_len = len(tokens) - - #add sep token - tokens.append(4) - segment_ids.append(0) - p_mask.append(1) - - # note(zhiliny): we put P before Q - # because during pretraining, B is always shorter than A - for token in query_tokens: - tokens.append(token) - segment_ids.append(1) - p_mask.append(1) - #add sep token - tokens.append(4) - segment_ids.append(1) - p_mask.append(1) - - #add cls token - tokens.append(3) - segment_ids.append(2) - p_mask.append(0) - - input_ids = tokens - - # The mask has 0 for real tokens and 1 for padding tokens. Only real - # tokens are attended to. - valid_length = len(input_ids) - # Zero-pad up to the sequence length. - cls_index = len(input_ids) - 1 - while len(input_ids) < self.max_seq_length: - padding_length = self.max_seq_length - valid_length - input_ids = input_ids + [0] * padding_length - segment_ids = segment_ids + [3] * padding_length - p_mask = p_mask + [1] * padding_length - - assert len(input_ids) == self.max_seq_length - assert len(segment_ids) == self.max_seq_length - assert len(p_mask) == self.max_seq_length - - span_is_impossible = example.is_impossible - start_position = None - end_position = None - if self.is_training and not span_is_impossible: - # For training, if our document chunk does not contain an annotation - # we throw it out, since there is nothing to predict. - doc_start = doc_span.start - doc_end = doc_span.start + doc_span.length - 1 - out_of_span = False - if not (tok_start_position >= doc_start - and tok_end_position <= doc_end): - out_of_span = True - if out_of_span: - # continue - start_position = 0 - end_position = 0 - span_is_impossible = True - else: - # note(zhiliny): we put P before Q, so doc_offset should be zero. - # doc_offset = len(query_tokens) + 2 - doc_offset = 0 - start_position = tok_start_position - doc_start + doc_offset - end_position = tok_end_position - doc_start + doc_offset - - if self.is_training and span_is_impossible: - start_position = cls_index - end_position = cls_index - - if example.example_id < 20: - print('*** Example ***') - print('qas_id: %s' % (example.qas_id)) - print('example_index: %s' % (example.example_id)) - print('doc_span_index: %s' % (doc_span_index)) - print('tok_start_to_orig_index: %s' % - ' '.join([str(x) for x in cur_tok_start_to_orig_index])) - print('tok_end_to_orig_index: %s' % - ' '.join([str(x) for x in cur_tok_end_to_orig_index])) - print('token_is_max_context: %s' % ' '.join([ - '%d:%s' % (x, y) - for (x, y) in token_is_max_context.items() - ])) - print('input_ids: %s' % ' '.join([str(x) for x in input_ids])) - print('p_mask: %s' % ' '.join([str(x) for x in p_mask])) - print('segment_ids: %s' % - ' '.join([str(x) for x in segment_ids])) - - if self.is_training and span_is_impossible: - print('impossible example span') - - if self.is_training and not span_is_impossible: - pieces = [ - sp_model.IdToPiece(token) - for token in tokens[start_position :(end_position + 1)] - ] - answer_text = sp_model.DecodePieces(pieces) - print('start_position: %d' % - (start_position)) - print('end_position: %d' % (end_position)) - print('answer: %s' % (answer_text)) - - # note(zhiliny): With multi processing, - # the example_index is actually the index within the current process - # therefore we use example_index=None to avoid being used in the future. - # The current code does not use example_index of training data. - # if self.is_training: - # feat_example_index = None - # else: - # feat_example_index = example.example_id - - feature = InputFeatures( - example_id=example.example_id, - qas_id=example.qas_id, - doc_span_index=doc_span_index, - tok_start_to_orig_index=cur_tok_start_to_orig_index, - tok_end_to_orig_index=cur_tok_end_to_orig_index, - token_is_max_context=token_is_max_context, - tokens=tokens, - input_ids=input_ids, - valid_length=valid_length, - p_mask=p_mask, - segment_ids=segment_ids, - paragraph_text=example.paragraph_text, - paragraph_len=paragraph_len, - cls_index=cls_index, - start_position=start_position, - end_position=end_position, - is_impossible=span_is_impossible) - features.append(feature) - - return features - - def __call__(self, record, evaluate=False): - examples = self._transform(*record) - if not examples: - return None - features = [] - - for _example in examples: - feature = [] - feature.append(_example.example_id) - feature.append(_example.input_ids) - feature.append(_example.segment_ids) - feature.append(_example.valid_length) - feature.append(_example.p_mask) - feature.append(_example.start_position) - feature.append(_example.end_position) - feature.append(_example.is_impossible) - feature.append(len(_example.input_ids)) - features.append(feature) - - return features - - -def _check_is_max_context(doc_spans, cur_span_index, position): - """Check if this is the 'max context' doc span for the token.""" - - # Because of the sliding window approach taken to scoring documents, a single - # token can appear in multiple documents. E.g. - # Doc: the man went to the store and bought a gallon of milk - # Span A: the man went to the - # Span B: to the store and bought - # Span C: and bought a gallon of - # ... - # - # Now the word 'bought' will have two scores from spans B and C. We only - # want to consider the score with "maximum context", which we define as - # the *minimum* of its left and right context (the *sum* of left and - # right context will always be the same, of course). - # - # In the example the maximum context for 'bought' would be span C since - # it has 1 left context and 3 right context, while span B has 4 left context - # and 0 right context. - best_score = None - best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): - end = doc_span.start + doc_span.length - 1 - if position < doc_span.start: - continue - if position > end: - continue - num_left_context = position - doc_span.start - num_right_context = end - position - score = min(num_left_context, num_right_context) + \ - 0.01 * doc_span.length - if best_score is None or score > best_score: - best_score = score - best_span_index = span_index - - return cur_span_index == best_span_index diff --git a/scripts/language_model/run_glue.py b/scripts/language_model/run_glue.py index 4cd444e00b..1d56435e98 100644 --- a/scripts/language_model/run_glue.py +++ b/scripts/language_model/run_glue.py @@ -9,6 +9,7 @@ import logging import warnings import sys +import pickle from functools import partial import numpy as np import mxnet as mx @@ -43,75 +44,150 @@ description='XLNet fine-tune examples for classification/regression tasks.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) +# Training config parser.add_argument('--epochs', type=int, default=3, help='number of epochs.') +parser.add_argument('--training_steps', + type=int, + help='If specified, epochs will be ignored.') +parser.add_argument( + '--batch_size', + type=int, + default=128, + help='Batch size. Number of examples per gpu in a minibatch.') -parser.add_argument('--batch_size', type=int, default=128, - help='Batch size. Number of examples per gpu in a minibatch.') +parser.add_argument( + '--accumulate', + type=int, + default=None, + help= + 'The number of batches for gradients accumulation to simulate large batch size. ' + 'Default is None') -parser.add_argument('--dev_batch_size', type=int, default=32, +parser.add_argument('--dev_batch_size', + type=int, + default=32, help='Batch size for dev set and test set') -parser.add_argument('--lr', type=float, default=3e-5, help='Initial learning rate') +parser.add_argument('--dropout', type=float, default=0.1, help='dropout') +parser.add_argument('--attention_dropout', + type=float, + default=0.1, + help='attention dropout') +parser.add_argument('--log_interval', + type=int, + default=10, + help='report interval') +parser.add_argument( + '--early_stop', + type=int, + default=None, + help='Whether to perform early stopping based on the metric on dev set. ' + 'The provided value is the patience. ') -parser.add_argument('--epsilon', type=float, default=1e-6, +# Optimizer config +parser.add_argument('--optimizer', type=str, default='Adam', help='') +parser.add_argument('--lr', + type=float, + default=3e-5, + help='Initial learning rate') +parser.add_argument('--lr_decay', + type=str, + choices=['linear'], + default='linear', + help='lr schedule') +parser.add_argument('--epsilon', + type=float, + default=1e-6, help='Small value to avoid division by 0') -parser.add_argument('--warmup_ratio', type=float, default=0, - help='ratio of warmup steps used in NOAM\'s stepsize schedule') -parser.add_argument('--log_interval', type=int, default=10, help='report interval') -parser.add_argument('--max_len', type=int, default=128, help='Maximum length of the sentence pairs') +parser.add_argument( + '--warmup_ratio', + type=float, + default=0, + help='ratio of warmup steps used in NOAM\'s stepsize schedule') + +# task spesific & data preprocessing +parser.add_argument('--gpu', + type=int, + default=None, + help='Number of gpus for finetuning.') +parser.add_argument('--task_name', + default='MRPC', + type=str, + help='The name of the task to fine-tune.') parser.add_argument( - '--pad', default=True, action='store_true', + '--model_name', + type=str, + default='xlnet_cased_l12_h768_a12', + choices=['xlnet_cased_l24_h1024_a16', 'xlnet_cased_l12_h768_a12'], + help='The name of pre-trained XLNet model to fine-tune') + +parser.add_argument('--dataset', + type=str, + default='126gb', + help='The dataset BERT pre-trained with.') +parser.add_argument('--max_len', + type=int, + default=128, + help='Maximum length of the sentence pairs') +parser.add_argument( + '--pad', + default=True, + action='store_true', help='Whether to pad to maximum length when preparing data batches. ' 'Have to be true currently due to left padding') -parser.add_argument('--seed', type=int, default=2, help='Random seed') - parser.add_argument( - '--accumulate', type=int, default=None, - help='The number of batches for gradients accumulation to simulate large batch size. ' - 'Default is None') -parser.add_argument('--gpu', type=int, default=None, help='Number of gpus for finetuning.') -parser.add_argument('--cpu', type=int, default=None, help='Number of cpus for finetuning.') -parser.add_argument('--task_name', default='MRPC', type=str, - help='The name of the task to fine-tune.') + '--only_inference', + action='store_true', + help= + 'If set, we skip training and only perform inference on dev and test data.' +) -parser.add_argument('--model_name', type=str, default='xlnet_cased_l12_h768_a12', - choices=['xlnet_cased_l24_h1024_a16', 'xlnet_cased_l12_h768_a12'], - help='The name of pre-trained XLNet model to fine-tune') - -parser.add_argument('--dataset', type=str, default='126gb', - help='The dataset BERT pre-trained with.') - -parser.add_argument('--output_dir', type=str, default='./output_dir', - help='The output directory where the model params will be written.') +# Initializing config +parser.add_argument('--seed', type=int, default=2, help='Random seed') +# I/O config parser.add_argument( - '--only_inference', action='store_true', - help='If set, we skip training and only perform inference on dev and test data.') - + '--output_dir', + type=str, + default='./output_dir', + help='The output directory where the model params will be written.') parser.add_argument( - '--model_parameters', type=str, default=None, + '--model_parameters', + type=str, + default=None, help='A parameter file for the model that is loaded into the model' ' before training/inference. It is different from the parameter' ' file written after the model is trained.') -parser.add_argument( - '--early_stop', type=int, default=None, - help='Whether to perform early stopping based on the metric on dev set. ' - 'The provided value is the patience. ') - -parser.add_argument('--dropout', type=float, default=0.1, help='dropout') -parser.add_argument('--attention_dropout', type=float, default=0.1, help='attention dropout') -parser.add_argument('--lr_decay', type=str, default='linear', help='lr decay') args = parser.parse_args() -def split_and_load(arrs, ctx): +def split_array(arr, num_of_splits): + """split an array into equal pieces""" + # TODO Replace this function with gluon.utils.split_data() once targeting MXNet 1.7 + size = arr.shape[0] + if size < num_of_splits: + return [arr[i:i + 1] for i in range(size)] + slice_len, rest = divmod(size, num_of_splits) + div_points = [0] + [(slice_len * index + min(index, rest) + slice_len + + (index < rest)) for index in range(num_of_splits)] + slices = [ + arr[div_points[i]:div_points[i + 1]] for i in range(num_of_splits) + ] + return slices + + +def split_and_load(arrs, _ctxs): """split and load arrays to a list of contexts""" + # TODO Replace split_array() with gluon.utils.split_data() once targeting MXNet 1.7 assert isinstance(arrs, (list, tuple)) # split and load - loaded_arrs = [mx.gluon.utils.split_and_load(arr, ctx, even_split=False) for arr in arrs] + loaded_arrs = [[ + i.as_in_context(ctx) + for i, ctx in zip(split_array(arr, len(_ctxs)), _ctxs) + ] for arr in arrs] return zip(*loaded_arrs) @@ -148,10 +224,9 @@ def convert_examples_to_features(example, # truncate to the truncate_length, tokens_trun = truncate_seqs_equal(tokens_raw, truncate_length) # concate the sequences with special tokens, cls_token is added to the end in XlNet - special_tokens = [[sep_token]] * len(tokens_trun) - special_tokens[-1].append(cls_token) - tokens, segment_ids, _ = concat_sequences(tokens_trun, - special_tokens) + special_tokens = [[sep_token]] * len(tokens_trun) + [[cls_token]] + #special_tokens.append([cls_token]) + tokens, segment_ids, _ = concat_sequences(tokens_trun, special_tokens) # convert the token to ids input_ids = vocab[tokens] valid_length = len(input_ids) @@ -161,34 +236,48 @@ def convert_examples_to_features(example, return input_ids, valid_length, segment_ids -def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, - vocab): +def preprocess_data(tokenizer, + task, + batch_size, + dev_batch_size, + max_len, + vocab, + load_from_pickle=False): #pylint: disable=redefined-outer-name """Train/eval Data preparation function.""" label_dtype = 'int32' if task.class_labels else 'float32' truncate_length = max_len - 3 if task.is_pair else max_len - 2 - trans = partial( - convert_examples_to_features, - tokenizer=tokenizer, - truncate_length=truncate_length, - cls_token=vocab.cls_token, - sep_token=vocab.sep_token, - class_labels=task.class_labels, - label_alias=task.label_alias, - vocab=vocab) + trans = partial(convert_examples_to_features, + tokenizer=tokenizer, + truncate_length=truncate_length, + cls_token=vocab.cls_token, + sep_token=vocab.sep_token, + class_labels=task.class_labels, + label_alias=task.label_alias, + vocab=vocab) # data train # task.dataset_train returns (segment_name, dataset) - train_tsv = task.dataset_train()[1] - data_train = mx.gluon.data.SimpleDataset(list(map(trans, train_tsv))) + filename = 'xlnet_' + args.task_name + '_feature.train' + train_feautre_path = os.path.join(args.output_dir, filename) + if not load_from_pickle: + train_tsv = task.dataset_train()[1] + data_train = list(map(trans, train_tsv)) + with open(train_feautre_path, 'wb') as file: + pickle.dump(data_train, file) + else: + with open(train_feautre_path, 'rb') as file: + data_train = pickle.load(file) + data_train = mx.gluon.data.SimpleDataset(data_train) data_train_len = data_train.transform( lambda _, valid_length, segment_ids, label: valid_length, lazy=False) + # bucket sampler for training pad_val = vocab[vocab.padding_token] batchify_fn = nlp.data.batchify.Tuple( nlp.data.batchify.Pad(axis=0, pad_val=pad_val), # input nlp.data.batchify.Stack(), # length - nlp.data.batchify.Pad(axis=0, pad_val=0), # segment + nlp.data.batchify.Pad(axis=0, pad_val=4), # segment nlp.data.batchify.Stack(label_dtype)) # label batch_sampler = nlp.data.sampler.FixedBucketSampler(data_train_len, batch_size=batch_size, @@ -218,16 +307,16 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, test_batchify_fn = nlp.data.batchify.Tuple( nlp.data.batchify.Pad(axis=0, pad_val=pad_val), nlp.data.batchify.Stack(), nlp.data.batchify.Pad(axis=0, pad_val=0)) + # transform for data test - test_trans = partial( - convert_examples_to_features, - tokenizer=tokenizer, - truncate_length=max_len, - cls_token=vocab.cls_token, - sep_token=vocab.sep_token, - class_labels=None, - is_test=True, - vocab=vocab) + test_trans = partial(convert_examples_to_features, + tokenizer=tokenizer, + truncate_length=max_len, + cls_token=vocab.cls_token, + sep_token=vocab.sep_token, + class_labels=None, + is_test=True, + vocab=vocab) # data test. For MNLI, more than one test set is available test_tsv = task.dataset_test() @@ -243,6 +332,7 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, loader_test_list.append((segment, loader_test)) return loader_train, loader_dev_list, loader_test_list, len(data_train) + logger = logging.getLogger() logger.setLevel(logging.INFO) logging.captureWarnings(True) @@ -250,7 +340,8 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, handler.setLevel(logging.INFO) handler2 = logging.StreamHandler() handler2.setLevel(logging.INFO) -formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) handler2.setFormatter(formatter) logger.addHandler(handler) @@ -273,7 +364,6 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, task = tasks[args.task_name] - # model and loss if args.only_inference and not args.model_parameters: warnings.warn('model_parameters is not set. ' @@ -303,7 +393,9 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, num_classes = len(task.class_labels) loss_function = gluon.loss.SoftmaxCELoss() # reuse the XLnetClassifier class with num_classes=1 for regression -model = XLNetClassifier(xlnet_base, units=xlnet_base._net._units, dropout=0.1, +model = XLNetClassifier(xlnet_base, + units=xlnet_base._net._units, + dropout=0.1, num_classes=num_classes) num_ctxes = len(ctxs) @@ -318,7 +410,10 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, if args.model_parameters: logging.info('loading model params from %s', args.model_parameters) - nlp.utils.load_parameters(model, args.model_parameters, ctx=ctxs, cast_dtype=True) + nlp.utils.load_parameters(model, + args.model_parameters, + ctx=ctxs, + cast_dtype=True) nlp.utils.mkdir(output_dir) @@ -326,10 +421,10 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, model.hybridize(static_alloc=True) loss_function.hybridize(static_alloc=True) -# Get the loader. logging.info('processing dataset...') train_data, dev_data_list, test_data_list, num_train_examples = preprocess_data( - tokenizer, task, args.batch_size, args.dev_batch_size, args.max_len, vocab) + tokenizer, task, args.batch_size, args.dev_batch_size, args.max_len, vocab, + args.pad) def test(loader_test, segment): @@ -379,15 +474,18 @@ def log_metric(metric, is_training=True): metric_nm, metric_val = metric.get() if not isinstance(metric_nm, list): metric_nm, metric_val = [metric_nm], [metric_val] - logging_str = prefix + ' metrics:' + ','.join([i + ':%.4f' for i in metric_nm]) + logging_str = prefix + ' metrics:' + ','.join( + [i + ':%.4f' for i in metric_nm]) logging.info(logging_str, *metric_val) return metric_nm, metric_val -def log_train(batch_id, batch_num, step_loss, _log_interval, epoch_id, learning_rate): + +def log_train(batch_id, batch_num, step_loss, _log_interval, epoch_id, + learning_rate): """Generate and print out the log message for training. """ train_str = '[Epoch %d Batch %d/%d] loss=%.4f, lr=%.7f' - logging.info(train_str, epoch_id + 1, batch_id + 1, batch_num, step_loss / _log_interval, - learning_rate) + logging.info(train_str, epoch_id + 1, batch_id + 1, batch_num, + step_loss / _log_interval, learning_rate) def log_eval(batch_id, batch_num, step_loss, _log_interval): @@ -399,14 +497,27 @@ def log_eval(batch_id, batch_num, step_loss, _log_interval): def train(metric): """Training function.""" if not args.only_inference: - logging.info('Now we are doing XLNet classification training on %s!', ctxs) + logging.info('Now we are doing XLNet classification training on %s!', + ctxs) all_model_params = model.collect_params() - optimizer_params = {'learning_rate': args.lr, 'epsilon': args.epsilon, 'wd': 0} - trainer = gluon.Trainer(all_model_params, 'adam', optimizer_params, update_on_kvstore=False) + optimizer_params = { + 'learning_rate': args.lr, + 'epsilon': args.epsilon, + 'wd': 0 + } + trainer = gluon.Trainer(all_model_params, + args.optimizer, + optimizer_params, + update_on_kvstore=False) step_size = args.batch_size * args.accumulate if args.accumulate else args.batch_size num_train_steps = int(num_train_examples / step_size * args.epochs) + epoch_number = args.epochs + if args.training_steps: + num_train_steps = args.training_steps + epoch_number = 9999 + logging.info('training steps=%d', num_train_steps) warmup_ratio = args.warmup_ratio num_warmup_steps = int(num_train_steps * warmup_ratio) step_num = 0 @@ -427,10 +538,13 @@ def train(metric): patience = args.early_stop tic = time.time() - for epoch_id in range(args.epochs): + finish_flag = False + for epoch_id in range(epoch_number): if args.early_stop and patience == 0: logging.info('Early stopping at epoch %d', epoch_id) break + if finish_flag: + break if not args.only_inference: metric.reset() step_loss = 0 @@ -443,7 +557,8 @@ def train(metric): new_lr = args.lr * step_num / num_warmup_steps elif args.lr_decay == 'linear': non_warmup_steps = step_num - num_warmup_steps - offset = non_warmup_steps / (num_train_steps - num_warmup_steps) + offset = non_warmup_steps / (num_train_steps - + num_warmup_steps) new_lr = max(0, args.lr - offset * args.lr) trainer.set_learning_rate(new_lr) batch_loss = [] @@ -452,12 +567,17 @@ def train(metric): data_list = list(split_and_load(seqs, ctxs)) for splited_data in data_list: input_ids, valid_length, segment_ids, label = splited_data - out = model(input_ids, segment_ids, valid_length=valid_length) + out = model(input_ids, + segment_ids, + valid_length=valid_length) ls = loss_function(out, label).mean() / len(ctxs) - ls.backward() batch_loss.append(ls) + if args.accumulate: + ls = ls / args.accumulate + ls.backward() # update - if not args.accumulate or (batch_id + 1) % args.accumulate == 0: + if not args.accumulate or (batch_id + + 1) % args.accumulate == 0: trainer.allreduce_grads() nlp.utils.clip_grad_global_norm(params, 1) trainer.update(args.accumulate if args.accumulate else 1, @@ -466,12 +586,23 @@ def train(metric): if args.accumulate and args.accumulate > 1: # set grad to zero for gradient accumulation all_model_params.zero_grad() + if batch_id == 0 and epoch_id == 0: + toc = time.time() + logging.info( + 'Time cost for the first forward-backward =%.2fs', + toc - tic) batch_loss = sum([ls.asscalar() for ls in batch_loss]) step_loss += batch_loss if (batch_id + 1) % (args.log_interval) == 0: - log_train(batch_id, len(train_data), step_loss, args.log_interval, - epoch_id, trainer.learning_rate) + log_train(batch_id, len(train_data), step_loss, + args.log_interval, epoch_id, + trainer.learning_rate) step_loss = 0 + if step_num >= num_train_steps: + logging.info('Finish training step: %d', step_num) + finish_flag = True + break + mx.nd.waitall() # inference on dev data @@ -487,7 +618,8 @@ def train(metric): if not args.only_inference: # save params - ckpt_name = 'model_xlnet_{0}_{1}.params'.format(args.task_name, epoch_id) + ckpt_name = 'model_xlnet_{0}_{1}.params'.format( + args.task_name, epoch_id) params_saved = os.path.join(output_dir, ckpt_name) nlp.utils.save_parameters(model, params_saved) logging.info('params saved in: %s', params_saved) @@ -500,10 +632,12 @@ def train(metric): # assuming higher score stands for better model quality metric_history.sort(key=lambda x: x[2][0], reverse=True) epoch_id, metric_nm, metric_val = metric_history[0] - ckpt_name = 'model_xlnet_{0}_{1}.params'.format(args.task_name, epoch_id) + ckpt_name = 'model_xlnet_{0}_{1}.params'.format( + args.task_name, epoch_id) params_saved = os.path.join(output_dir, ckpt_name) nlp.utils.load_parameters(model, params_saved) - metric_str = 'Best model at epoch {}. Validation metrics:'.format(epoch_id + 1) + metric_str = 'Best model at epoch {}. Validation metrics:'.format( + epoch_id + 1) metric_str += ','.join([i + ':%.4f' for i in metric_nm]) logging.info(metric_str, *metric_val) @@ -527,11 +661,12 @@ def evaluate(loader_dev, metric, segment): data_list = list(split_and_load(seqs, ctxs)) for splited_data in data_list: input_ids, valid_length, segment_ids, label = splited_data - label = label.reshape((-1)) out = model(input_ids, segment_ids, valid_length=valid_length) + batch_loss.append(loss_function(out, label).mean() / len(ctxs)) + if not do_regression: + label = label.reshape((-1)) out_list.append(out.as_in_context(mx.cpu(0))) label_list.append(label.as_in_context(mx.cpu(0))) - batch_loss.append(loss_function(out, label).mean() / len(ctxs)) batch_loss = sum([ls.asscalar() for ls in batch_loss]) step_loss += batch_loss diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py index 65a3021050..59e1a27ec5 100644 --- a/scripts/language_model/run_squad.py +++ b/scripts/language_model/run_squad.py @@ -1,137 +1,220 @@ """ Question Answering with XLNet """ +# pylint:disable=redefined-outer-name,logging-format-interpolation + import os import time import argparse import random import logging import warnings -import copy import json import collections import pickle +import sys +import itertools +import multiprocessing as mp +from functools import partial import numpy as np import mxnet as mx import gluonnlp as nlp from gluonnlp.data import SQuAD from model.qa import XLNetForQA -from data.new_qa import SQuADTransform, preprocess_dataset, convert_examples_to_inputs from transformer import model from xlnet_qa_evaluate import predict_extended from utils_squad_evaluate import EVAL_OPTS, main as evaluate_on_squad -os.environ['MXNET_USE_FUSION'] = '0' -log = logging.getLogger('gluonnlp') -log.setLevel(logging.DEBUG) -formatter = logging.Formatter(fmt='%(levelname)s:%(name)s:%(asctime)s %(message)s', - datefmt='%H:%M:%S') - -parser = argparse.ArgumentParser(description='XLNet QA example.' - 'We fine-tune the XLNet model on SQuAD dataset.') - -parser.add_argument('--only_predict', action='store_true', help='Whether to predict only.') - -parser.add_argument('--model_parameters', type=str, default=None, help='Model parameter file') - -parser.add_argument('--model', type=str, default='xlnet_cased_l12_h768_a12', - help='The name of pre-trained XLNet model to fine-tune') - -parser.add_argument('--dataset', type=str, default='126gb', - help='The dataset BERT pre-trained with.') +path = sys.path[0] +sys.path.append(path + '/../bert/data') +#pylint: disable=wrong-import-position +from preprocessing_utils import concat_sequences_extended, get_doc_spans, \ + check_is_max_context, convert_squad_examples, _lcs_match, _convert_index -parser.add_argument('--predict_file', default='./data/dev-v2.0.json', type=str, - help='SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json') - -parser.add_argument('--uncased', action='store_true', - help='if set, inputs are converted to lower case.') +parser = argparse.ArgumentParser( + description='XLNet QA example.' + 'We fine-tune the XLNet model on SQuAD dataset.') +# I/O configuration +parser.add_argument( + '--sentencepiece', + type=str, + default=None, + help= + 'Path to the sentencepiece .model file for both tokenization and vocab.') +parser.add_argument( + '--pretrained_xlnet_parameters', + type=str, + default=None, + help='Pre-trained bert model parameter file. default is None') +parser.add_argument( + '--raw', + action='store_true', + help='Whether do data preprocessing or load from pickled file') +parser.add_argument('--dev_dataset_file', + default='./output_dir/out.dev', + type=str, + help='Path to dev data features') +parser.add_argument('--train_dataset_file', + default='./output_dir/out.train', + type=str, + help='Path to train data features') +parser.add_argument('--model_parameters', + type=str, + default=None, + help='Model parameter file') parser.add_argument( - '--output_dir', type=str, default='./output_dir', + '--output_dir', + type=str, + default='./output_dir', help='The output directory where the model params will be written.' ' default is ./output_dir') -parser.add_argument('--epochs', type=int, default=3, help='number of epochs, default is 3') - -parser.add_argument('--batch_size', type=int, default=32, - help='Batch size. Number of examples per gpu in a minibatch. default is 32') +# Training configuration +parser.add_argument('--seed', type=int, default=3, help='Random seed') +parser.add_argument('--version_2', + action='store_true', + help='Whether use SQuAD v2.0 dataset') +parser.add_argument( + '--model', + type=str, + default='xlnet_cased_l12_h768_a12', + choices=['xlnet_cased_l24_h1024_a16', 'xlnet_cased_l12_h768_a12'], + help='The name of pre-trained XLNet model to fine-tune') +parser.add_argument( + '--dataset', + type=str, + default='126gb', + choices=['126gb'], + help='The dataset BERT pre-trained with. Currently only 126gb is available' +) +parser.add_argument( + '--uncased', + action='store_true', + help= + 'if set, inputs are converted to lower case. Up to 01/04/2020, all released models are cased' +) +parser.add_argument( + '--gpu', + type=int, + default=None, + help='Number of gpus to use for finetuning. CPU is used if not set.') +parser.add_argument('--log_interval', + type=int, + default=10, + help='report interval. default is 10') +parser.add_argument('--debug', + action='store_true', + help='Run the example in test mode for sanity checks') +parser.add_argument('--only_predict', + action='store_true', + help='Whether to predict only.') + +# Hyperparameters +parser.add_argument('--epochs', + type=int, + default=3, + help='number of epochs, default is 3') +parser.add_argument('--training_steps', + type=int, + help='training steps. Note that epochs will be ignored ' + 'if training steps are set') -parser.add_argument('--test_batch_size', type=int, default=24, +parser.add_argument( + '--batch_size', + type=int, + default=32, + help='Batch size. Number of examples per gpu in a minibatch. default is 32' +) + +parser.add_argument('--test_batch_size', + type=int, + default=24, help='Test batch size. default is 24') -parser.add_argument('--optimizer', type=str, default='bertadam', +parser.add_argument('--optimizer', + type=str, + default='bertadam', help='optimization algorithm. default is bertadam') parser.add_argument( - '--accumulate', type=int, default=None, help='The number of batches for ' + '--accumulate', + type=int, + default=None, + help='The number of batches for ' 'gradients accumulation to simulate large batch size. Default is None') -parser.add_argument('--lr', type=float, default=3e-5, help='Initial learning rate. default is 5e-5') +parser.add_argument('--lr', + type=float, + default=3e-5, + help='Initial learning rate. default is 5e-5') parser.add_argument( - '--warmup_ratio', type=float, default=0, + '--warmup_ratio', + type=float, + default=0, help='ratio of warmup steps that linearly increase learning rate from ' '0 to target learning rate. default is 0') +parser.add_argument('--layerwise_decay', + type=float, + default=0.75, + help='Layer-wise lr decay') +parser.add_argument('--wd', type=float, default=0.01, help='weight decay') +parser.add_argument('--dropout', type=float, default=0.1, help='dropout') +parser.add_argument('--attention_dropout', + type=float, + default=0.1, + help='attention dropout') -parser.add_argument('--log_interval', type=int, default=10, help='report interval. default is 10') - +# Data pre/post processing parser.add_argument( - '--max_seq_length', type=int, default=512, + '--max_seq_length', + type=int, + default=512, help='The maximum total input sequence length after WordPiece tokenization.' 'Sequences longer than this will be truncated, and sequences shorter ' 'than this will be padded. default is 512') parser.add_argument( - '--doc_stride', type=int, default=128, + '--doc_stride', + type=int, + default=128, help='When splitting up a long document into chunks, how much stride to ' 'take between chunks. default is 128') parser.add_argument( - '--max_query_length', type=int, default=64, + '--max_query_length', + type=int, + default=64, help='The maximum number of tokens for the question. Questions longer than ' 'this will be truncated to this length. default is 64') +parser.add_argument('--start_top_n', + type=int, + default=5, + help='Number of start-position candidates') +parser.add_argument('--end_top_n', + type=int, + default=5, + help='Number of end-position candidates corresponding ' + 'to a start position') parser.add_argument( - '--n_best_size', type=int, default=20, - help='The total number of n-best predictions to generate in the ' - 'nbest_predictions.json output file. default is 20') - -parser.add_argument( - '--max_answer_length', type=int, default=64, + '--max_answer_length', + type=int, + default=64, help='The maximum length of an answer that can be generated. This is needed ' 'because the start and end predictions are not conditioned on one another.' ' default is 64') -parser.add_argument('--version_2', action='store_true', - help='SQuAD examples whether contain some that do not have an answer.') - parser.add_argument( - '--null_score_diff_threshold', type=float, default=0.0, - help='If null_score - best_non_null is greater than the threshold predict null.' - 'Typical values are between -1.0 and -5.0. default is 0.0') - -parser.add_argument('--gpu', type=int, default=None, - help='Number of gpus to use for finetuning. CPU is used if not set.') - -parser.add_argument('--sentencepiece', type=str, default=None, - help='Path to the sentencepiece .model file for both tokenization and vocab.') - -parser.add_argument('--debug', action='store_true', - help='Run the example in test mode for sanity checks') -parser.add_argument('--pretrained_xlnet_parameters', type=str, default=None, - help='Pre-trained bert model parameter file. default is None') - -parser.add_argument('--layerwise_decay', type=float, default=0.75, help='Layer-wise lr decay') -parser.add_argument('--wd', type=float, default=0.01, help='adam weight decay') -parser.add_argument('--seed', type=int, default=29, help='Random seed') -parser.add_argument('--start_top_n', type=int, default=5, help='to be added') -parser.add_argument('--end_top_n', type=int, default=5, help='to be added') -parser.add_argument('--dropout', type=float, default=0.1, help='dropout') -parser.add_argument('--attention_dropout', type=float, default=0.1, help='attention dropout') -parser.add_argument('--training_steps', type=int, help='training steps') -parser.add_argument('--raw', action='store_true', help='if do data preprocessing or load from pickled file') -parser.add_argument('--dev_dataset_file', default='./output_dir/out.dev', type=str, help='location of dev dataset') -parser.add_argument('--train_dataset_file', default='./output_dir/out.train', type=str, help='location of train dataset') + '--null_score_diff_threshold', + type=float, + default=0.0, + help= + 'If null_score - best_non_null is greater than the threshold predict null.' + 'Typical values are between -1.0 and -5.0. default is 0.0. ' + 'Note that a best value can be automatically found by the evaluation script' +) args = parser.parse_args() @@ -143,6 +226,11 @@ if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) +# set the logger +log = logging.getLogger('gluonnlp') +log.setLevel(logging.DEBUG) +formatter = logging.Formatter( + fmt='%(levelname)s:%(name)s:%(asctime)s %(message)s', datefmt='%H:%M:%S') fh = logging.FileHandler(os.path.join(args.output_dir, 'finetune_squad.log')) fh.setLevel(logging.INFO) fh.setFormatter(formatter) @@ -166,10 +254,9 @@ log.info('Using gradient accumulation. Effective batch size = %d', args.accumulate * args.batch_size) if args.max_seq_length <= args.max_query_length + 3: - raise ValueError('The max_seq_length (%d) must be greater than max_query_length ' - '(%d) + 3' % (args.max_seq_length, args.max_query_length)) - -# vocabulary and tokenizer + raise ValueError( + 'The max_seq_length (%d) must be greater than max_query_length ' + '(%d) + 3' % (args.max_seq_length, args.max_query_length)) get_pretrained = True @@ -183,39 +270,49 @@ 'attention_dropout': args.attention_dropout } +# model, vocabulary and tokenizer xlnet_base, vocab, tokenizer = model.get_model(**get_model_params) -num_layers = len(xlnet_base._net.transformer_cells) -for (i, layer_parameters) in enumerate(xlnet_base._net.transformer_cells): - layer_params = layer_parameters.collect_params() - for key, value in layer_params.items(): - value.lr_mult = args.layerwise_decay**(num_layers - i - 1) - batchify_fn = nlp.data.batchify.Tuple( - nlp.data.batchify.Stack(), - nlp.data.batchify.Stack(), # Already padded in data transform - nlp.data.batchify.Stack(), # Already padded in data transform - nlp.data.batchify.Stack('float32'), - nlp.data.batchify.Stack('float32'), - nlp.data.batchify.Stack('float32'), - nlp.data.batchify.Stack('float32'), - nlp.data.batchify.Stack('float32')) + nlp.data.batchify.Stack('int32'), # example_id + nlp.data.batchify.Pad(axis=0, + pad_val=vocab[vocab.padding_token], + dtype='int32'), # input_ids + nlp.data.batchify.Pad(axis=0, pad_val=4, dtype='int32'), # segment_ids + nlp.data.batchify.Stack('float32'), # valid_length + nlp.data.batchify.Pad(axis=0, pad_val=1), # p_mask + nlp.data.batchify.Stack('float32'), # start_position + nlp.data.batchify.Stack('float32'), # end_position + nlp.data.batchify.Stack('float32')) # is_impossible if pretrained_xlnet_parameters: # only load XLnetModel parameters - nlp.utils.load_parameters(xlnet_base, pretrained_xlnet_parameters, ctx=ctx, ignore_extra=True, + nlp.utils.load_parameters(xlnet_base, + pretrained_xlnet_parameters, + ctx=ctx, + ignore_extra=True, cast_dtype=True) -net = XLNetForQA(xlnet_base=xlnet_base, start_top_n=args.start_top_n, end_top_n=args.end_top_n, +net = XLNetForQA(xlnet_base=xlnet_base, + start_top_n=args.start_top_n, + end_top_n=args.end_top_n, version_2=args.version_2) -net_eval = XLNetForQA(xlnet_base=xlnet_base, start_top_n=args.start_top_n, end_top_n=args.end_top_n, - version_2=args.version_2, is_eval=True, params=net.collect_params()) + +net_eval = XLNetForQA(xlnet_base=xlnet_base, + start_top_n=args.start_top_n, + end_top_n=args.end_top_n, + version_2=args.version_2, + is_eval=True, + params=net.collect_params()) initializer = mx.init.Normal(0.02) if args.model_parameters: # load complete XLNetForQA parameters - nlp.utils.load_parameters(net, args.model_parameters, ctx=ctx, cast_dtype=True) + nlp.utils.load_parameters(net, + args.model_parameters, + ctx=ctx, + cast_dtype=True) else: net.start_logits.initialize(init=initializer, ctx=ctx) net.end_logits.initialize(init=initializer, ctx=ctx) @@ -225,28 +322,267 @@ net.hybridize(static_alloc=True) net_eval.hybridize(static_alloc=True) +SquadXLNetFeautre = collections.namedtuple('SquadXLNetFeautre', [ + 'example_id', 'qas_id', 'valid_length', 'tokens', + 'tok_start_to_orig_index', 'tok_end_to_orig_index', 'token_is_max_context', + 'input_ids', 'p_mask', 'segment_ids', 'start_position', 'end_position', + 'paragraph_text', 'paragraph_len', 'is_impossible' +]) + + +def convert_examples_to_features(example, + tokenizer=None, + cls_token=None, + sep_token=None, + vocab=None, + max_seq_length=384, + doc_stride=128, + max_query_length=64, + is_training=True): + """convert the examples to the XLNet features""" + query_tokenized = tokenizer(example.question_text)[:max_query_length] + #tokenize paragraph and get start/end position of the answer in tokenized paragraph + paragraph_tokenized = tokenizer(example.paragraph_text) + + chartok_to_tok_index = [] + tok_start_to_chartok_index = [] + tok_end_to_chartok_index = [] + char_cnt = 0 + for i, token in enumerate(paragraph_tokenized): + chartok_to_tok_index.extend([i] * len(token)) + tok_start_to_chartok_index.append(char_cnt) + char_cnt += len(token) + tok_end_to_chartok_index.append(char_cnt - 1) + + tok_cat_text = ''.join(paragraph_tokenized).replace(u'▁', ' ') + + # XLNet takes a more complicated strategy to match the origin text + # and the tokenized tokens + n, m = len(example.paragraph_text), len(tok_cat_text) + max_dist = abs(n - m) + 5 + for _ in range(2): + f, g = _lcs_match(max_dist, example.paragraph_text, tok_cat_text) + if f[n - 1, m - 1] > 0.8 * n: + break + max_dist *= 2 + + orig_to_chartok_index = [None] * n + chartok_to_orig_index = [None] * m + i, j = n - 1, m - 1 + while i >= 0 and j >= 0: + if (i, j) not in g: + break + if g[(i, j)] == 2: + orig_to_chartok_index[i] = j + chartok_to_orig_index[j] = i + i, j = i - 1, j - 1 + elif g[(i, j)] == 1: + j = j - 1 + else: + i = i - 1 + + # get start/end mapping + tok_start_to_orig_index = [] + tok_end_to_orig_index = [] + for i in range(len(paragraph_tokenized)): + start_chartok_pos = tok_start_to_chartok_index[i] + end_chartok_pos = tok_end_to_chartok_index[i] + start_orig_pos = _convert_index(chartok_to_orig_index, + start_chartok_pos, + n, + is_start=True) + end_orig_pos = _convert_index(chartok_to_orig_index, + end_chartok_pos, + m, + is_start=False) + + tok_start_to_orig_index.append(start_orig_pos) + tok_end_to_orig_index.append(end_orig_pos) + + tok_start_position, tok_end_position = -1, -1 + + # get mapped start/end position + if is_training and not example.is_impossible: + start_chartok_pos = _convert_index(orig_to_chartok_index, + example.start_position, + is_start=True) + tok_start_position = chartok_to_tok_index[start_chartok_pos] + + end_chartok_pos = _convert_index(orig_to_chartok_index, + example.end_position, + is_start=False) + tok_end_position = chartok_to_tok_index[end_chartok_pos] + assert tok_start_position <= tok_end_position + + # get doc spans using sliding window + doc_spans, doc_spans_indices = get_doc_spans( + paragraph_tokenized, max_seq_length - len(query_tokenized) - 2, + doc_stride) + + # record whether the tokens in a docspan have max context + token_is_max_context = [{ + p: check_is_max_context(doc_spans_indices, i, + p + doc_spans_indices[i][0]) + for p in range(len(doc_span)) + } for (i, doc_span) in enumerate(doc_spans)] + + # get token -> origin text mapping + cur_tok_start_to_orig_index = [[ + tok_start_to_orig_index[p + st] for p in range(len(doc_span)) + ] for doc_span, (st, ed) in zip(doc_spans, doc_spans_indices)] + cur_tok_end_to_orig_index = [[ + tok_end_to_orig_index[p + st] for p in range(len(doc_span)) + ] for doc_span, (st, ed) in zip(doc_spans, doc_spans_indices)] + + # get sequence features: tokens, segment_ids, p_masks + seq_features = [ + concat_sequences_extended( + [doc_span, query_tokenized], [[sep_token]] * 2 + [[cls_token]], + [[0] * len(doc_span), [1] * len(query_tokenized)], [[1], [1], [0]]) + for doc_span in doc_spans + ] + + if example.is_impossible: + positions = [(len(seq_feature[0]) - 1, len(seq_feature[0]) - 1) + for seq_feature in seq_features] + else: + positions = [(tok_start_position - st, tok_end_position - st) + for (st, _) in doc_spans_indices] + features = [ + SquadXLNetFeautre(example_id=example.example_id, + qas_id=example.qas_id, + tok_start_to_orig_index=t2st, + tok_end_to_orig_index=t2ed, + valid_length=len(tokens), + tokens=tokens, + token_is_max_context=is_max, + input_ids=vocab[tokens], + p_mask=p_mask, + segment_ids=segment_ids, + start_position=start, + end_position=end, + paragraph_text=example.paragraph_text, + paragraph_len=len(tokens), + is_impossible=example.is_impossible) + for (tokens, segment_ids, p_mask), (start, end), is_max, t2st, t2ed in + zip(seq_features, positions, token_is_max_context, + cur_tok_start_to_orig_index, cur_tok_end_to_orig_index) + ] + return features + + +def preprocess_dataset(tokenizer, + dataset, + vocab=None, + max_seq_length=384, + doc_stride=128, + max_query_length=64, + num_workers=4, + load_from_pickle=False, + feature_file=None, + is_training=True): + """Loads a dataset into features""" + vocab = tokenizer.vocab if vocab is None else vocab + trans = partial(convert_examples_to_features, + tokenizer=tokenizer, + cls_token=vocab.cls_token, + sep_token=vocab.sep_token, + vocab=vocab, + max_seq_length=max_seq_length, + doc_stride=doc_stride, + max_query_length=max_query_length) + pool = mp.Pool(num_workers) + start = time.time() + if not load_from_pickle: + example_trans = partial(convert_squad_examples, + is_training=is_training) + # convert the raw dataset into raw features + examples = pool.map(example_trans, dataset) + raw_features = pool.map(trans, examples) + if feature_file: + with open(feature_file, 'wb') as file: + pickle.dump(list(raw_features), file) + else: + assert feature_file, 'feature file should be provided.' + with open(feature_file, 'rb') as file: + raw_features = pickle.load(file) + + end = time.time() + pool.close() + log.info('Done! Transform dataset costs %.2f seconds.', (end - start)) + return raw_features + + +def convert_full_features_to_input_features(raw_features): + """convert the full features into the input features""" + data_features = mx.gluon.data.SimpleDataset( + list(itertools.chain.from_iterable(raw_features))) + + data_features = data_features.transform(lambda *example: ( + example[0], # example_id + example[7], # inputs_id + example[9], # segment_ids + example[2], # valid_length, + example[8], # p_mask + example[10], # start_position, + example[11], # end_position + example[14])) # is_impossible + return data_features + def split_array(arr, num_of_splits): - """split an array into a number of splits""" + """split an array into equal pieces""" + # TODO Replace this function with gluon.utils.split_data() once targeting MXNet 1.7 size = arr.shape[0] if size < num_of_splits: return [arr[i:i + 1] for i in range(size)] slice_len, rest = divmod(size, num_of_splits) - div_points = [0] + [(slice_len * index + min(index, rest) + slice_len + (index < rest)) - for index in range(num_of_splits)] - slices = [arr[div_points[i]:div_points[i + 1]] for i in range(num_of_splits)] + div_points = [0] + [(slice_len * index + min(index, rest) + slice_len + + (index < rest)) for index in range(num_of_splits)] + slices = [ + arr[div_points[i]:div_points[i + 1]] for i in range(num_of_splits) + ] return slices -def split_and_load(arrs, ctxs): +def split_and_load(arrs, _ctxs): """split and load arrays to a list of contexts""" + # TODO Replace split_array() with gluon.utils.split_data() once targeting MXNet 1.7 assert isinstance(arrs, (list, tuple)) # split and load - loaded_arrs = [[i.as_in_context(ctx) for i, ctx in zip(split_array(arr, len(ctxs)), ctxs)] - for arr in arrs] + loaded_arrs = [[ + i.as_in_context(ctx) + for i, ctx in zip(split_array(arr, len(_ctxs)), _ctxs) + ] for arr in arrs] return zip(*loaded_arrs) +def _apply_gradient_decay(): + """apply layer-wise gradient decay. + + Note that the description in origin paper about layer-wise learning rate decay + is inaccurate. According to their implementation, they are actually performing + layer-wise gradient decay. Gradient decay and learning rate decay could be the + same by using standard SGD, but different by using Adaptive optimizer(e.g., Adam). + """ + parameter_not_included = [ + 'seg_emb', 'query_key_bias', 'query_emb_bias', 'query_seg_bias' + ] + num_layers = len(xlnet_base._net.transformer_cells) + for (i, layer_parameters) in enumerate(xlnet_base._net.transformer_cells): + layer_params = layer_parameters.collect_params() + for key, value in layer_params.items(): + skip = False + for pn in parameter_not_included: + if pn in key: + skip = True + if skip: + continue + if value.grad_req != 'null': + for arr in value.list_grad(): + arr *= args.layerwise_decay**(num_layers - i - 1) + + def train(): """Training function.""" segment = 'train' if not args.debug else 'dev' @@ -256,41 +592,54 @@ def train(): else: train_data = SQuAD(segment, version='1.1') if args.debug: - sampled_data = [train_data[i] for i in range(100)] + sampled_data = [train_data[i] for i in range(10)] train_data = mx.gluon.data.SimpleDataset(sampled_data) log.info('Number of records in Train data: %s', len(train_data)) - if args.raw: - train_data_transform = preprocess_dataset( - train_data, - SQuADTransform(copy.copy(tokenizer), vocab, max_seq_length=args.max_seq_length, - doc_stride=args.doc_stride, max_query_length=args.max_query_length, - is_pad=True, is_training=True), dataset_file=args.train_dataset_file) - else: - train_data_transform = preprocess_dataset(raw=False, dataset_file=args.train_dataset_file) - log.info('The number of examples after preprocessing: %s', len(train_data_transform)) - - train_dataloader = mx.gluon.data.DataLoader(train_data_transform, batchify_fn=batchify_fn, - batch_size=args.batch_size, num_workers=4, + train_data_features = preprocess_dataset( + tokenizer, + train_data, + vocab=vocab, + max_seq_length=args.max_seq_length, + doc_stride=args.doc_stride, + max_query_length=args.max_query_length, + load_from_pickle=not args.raw, + feature_file=args.train_dataset_file) + + train_data_input = convert_full_features_to_input_features( + train_data_features) + + log.info('The number of examples after preprocessing: %s', + len(train_data_input)) + + train_dataloader = mx.gluon.data.DataLoader(train_data_input, + batchify_fn=batchify_fn, + batch_size=args.batch_size, + num_workers=4, shuffle=True) optimizer_params = {'learning_rate': args.lr, 'wd': args.wd} try: - trainer = mx.gluon.Trainer(net.collect_params(), args.optimizer, optimizer_params, + trainer = mx.gluon.Trainer(net.collect_params(), + args.optimizer, + optimizer_params, update_on_kvstore=False) except ValueError as _: - warnings.warn('AdamW optimizer is not found. Please consider upgrading to ' - 'mxnet>=1.5.0. Now the original Adam optimizer is used instead.') - trainer = mx.gluon.Trainer(net.collect_params(), 'bertadam', optimizer_params, + warnings.warn( + 'AdamW optimizer is not found. Please consider upgrading to ' + 'mxnet>=1.5.0. Now the original Adam optimizer is used instead.') + trainer = mx.gluon.Trainer(net.collect_params(), + 'bertadam', + optimizer_params, update_on_kvstore=False) - num_train_examples = len(train_data_transform) + num_train_examples = len(train_data_input) step_size = args.batch_size * args.accumulate if args.accumulate else args.batch_size num_train_steps = int(num_train_examples / step_size * args.epochs) epoch_number = args.epochs if args.training_steps: num_train_steps = args.training_steps - epoch_number = 999 + epoch_number = 100000 log.info('training steps=%d', num_train_steps) num_warmup_steps = int(num_train_steps * args.warmup_ratio) @@ -369,12 +718,17 @@ def set_new_lr(step_num, batch_id): if not args.accumulate or (batch_id + 1) % args.accumulate == 0: trainer.allreduce_grads() nlp.utils.clip_grad_global_norm(params, 1) + _apply_gradient_decay() trainer.update(1, ignore_stale_grad=True) + if args.version_2: - step_loss_sep_tmp = np.array([[span_ls.mean().asscalar(), cls_ls.mean().asscalar()] for span_ls, cls_ls in batch_loss_sep]) + step_loss_sep_tmp = np.array( + [[span_ls.mean().asscalar(), + cls_ls.mean().asscalar()] + for span_ls, cls_ls in batch_loss_sep]) step_loss_sep_tmp = list(np.sum(step_loss_sep_tmp, axis=0)) - step_loss_span += step_loss_sep_tmp[0] - step_loss_cls += step_loss_sep_tmp[1] + step_loss_span += step_loss_sep_tmp[0] / len(ctx) + step_loss_cls += step_loss_sep_tmp[1] / len(ctx) step_loss += sum([ls.asscalar() for ls in batch_loss]) if (batch_id + 1) % log_interval == 0: @@ -394,7 +748,9 @@ def set_new_lr(step_num, batch_id): if args.accumulate: step_loss_span = step_loss_span / args.accumulate step_loss_cls = step_loss_cls / args.accumulate - log.info('span_loss: %.4f, cls_loss: %.4f', step_loss_span / log_interval, step_loss_cls / log_interval) + log.info('span_loss: %.4f, cls_loss: %.4f', + step_loss_span / log_interval, + step_loss_cls / log_interval) tic = time.time() step_loss = 0.0 @@ -406,20 +762,21 @@ def set_new_lr(step_num, batch_id): finish_flag = True break epoch_toc = time.time() - log.info('Time cost=%.2f s, Thoughput=%.2f samples/s', epoch_toc - epoch_tic, - total_num / (epoch_toc - epoch_tic)) + log.info('Time cost=%.2f s, Thoughput=%.2f samples/s', + epoch_toc - epoch_tic, total_num / (epoch_toc - epoch_tic)) ckpt_name = 'model_xlnet_squad_{0}.params'.format(epoch_id + 1) params_saved = os.path.join(args.output_dir, ckpt_name) nlp.utils.save_parameters(net, params_saved) log.info('params saved in: %s', params_saved) -RawResultExtended = collections.namedtuple( - 'RawResultExtended', - ['start_top_log_probs', 'start_top_index', 'end_top_log_probs', 'end_top_index', 'cls_logits']) +RawResultExtended = collections.namedtuple('RawResultExtended', [ + 'start_top_log_probs', 'start_top_index', 'end_top_log_probs', + 'end_top_index', 'cls_logits' +]) -def evaluate(prefix='p'): +def evaluate(prefix=''): """Evaluate the model on validation dataset. """ log.info('Loading dev data...') @@ -436,26 +793,26 @@ def evaluate(prefix='p'): dev_data = mx.gluon.data.SimpleDataset(sampled_data) log.info('Number of records in dev data: %d', len(dev_data)) - - if args.raw: - dev_dataset = dev_data.transform( - SQuADTransform(copy.copy(tokenizer), vocab, max_seq_length=args.max_seq_length, - doc_stride=args.doc_stride, max_query_length=args.max_query_length, - is_pad=True, is_training=False)._transform, lazy=False) - with open(args.dev_dataset_file, 'wb') as file: - pickle.dump(list(dev_dataset), file) - else: - with open(args.dev_dataset_file , 'rb') as file: - dev_dataset = pickle.load(file) - dev_dataset = mx.gluon.data.SimpleDataset(dev_dataset) - - dev_data_transform = convert_examples_to_inputs(dev_dataset) - - log.info('The number of examples after preprocessing: %d', len(dev_data_transform)) - - dev_dataloader = mx.gluon.data.DataLoader(dev_data_transform, batchify_fn=batchify_fn, - num_workers=4, batch_size=args.test_batch_size, - shuffle=False, last_batch='keep') + dev_data_features = preprocess_dataset( + tokenizer, + dev_data, + vocab=vocab, + max_seq_length=args.max_seq_length, + doc_stride=args.doc_stride, + max_query_length=args.max_query_length, + load_from_pickle=not args.raw, + feature_file=args.dev_dataset_file) + + dev_data_input = convert_full_features_to_input_features(dev_data_features) + log.info('The number of examples after preprocessing: %d', + len(dev_data_input)) + + dev_dataloader = mx.gluon.data.DataLoader(dev_data_input, + batchify_fn=batchify_fn, + num_workers=4, + batch_size=args.test_batch_size, + shuffle=False, + last_batch='keep') log.info('start prediction') @@ -468,7 +825,10 @@ def evaluate(prefix='p'): for splited_data in data_list: example_ids, inputs, token_types, valid_length, p_mask, _, _, _ = splited_data total_num += len(inputs) - outputs = net_eval(inputs, token_types, valid_length, p_mask=p_mask) + outputs = net_eval(inputs, + token_types, + valid_length, + p_mask=p_mask) example_ids = example_ids.asnumpy().tolist() for c, example_ids in enumerate(example_ids): result = RawResultExtended( @@ -483,32 +843,35 @@ def evaluate(prefix='p'): log.info('Batch: %d/%d', batch_id + 1, len(dev_dataloader)) epoch_toc = time.time() - log.info('Time cost=%2f s, Thoughput=%.2f samples/s', epoch_toc - epoch_tic, - total_num / (epoch_toc - epoch_tic)) + log.info('Time cost=%2f s, Thoughput=%.2f samples/s', + epoch_toc - epoch_tic, total_num / (epoch_toc - epoch_tic)) log.info('Get prediction results...') all_predictions = collections.OrderedDict() all_nbest_json = collections.OrderedDict() scores_diff_json = collections.OrderedDict() - for features in dev_dataset: + for features in dev_data_features: results = all_results[features[0].example_id] example_qas_id = features[0].qas_id score_diff, best_non_null_entry, nbest_json = predict_extended( - features=features, results=results, - sp_model=nlp.data.SentencepieceTokenizer(tokenizer._sentencepiece_path)._processor, + features=features, + results=results, n_best_size=args.n_best_size, - max_answer_length=args.max_answer_length, start_n_top=args.start_top_n, + max_answer_length=args.max_answer_length, + start_n_top=args.start_top_n, end_n_top=args.end_top_n) scores_diff_json[example_qas_id] = score_diff all_predictions[example_qas_id] = best_non_null_entry all_nbest_json[example_qas_id] = nbest_json - output_prediction_file = os.path.join(args.output_dir, 'predictions_{}.json'.format(prefix)) - output_nbest_file = os.path.join(args.output_dir, 'nbest_predictions_{}.json'.format(prefix)) + output_prediction_file = os.path.join(args.output_dir, + 'predictions_{}.json'.format(prefix)) + output_nbest_file = os.path.join( + args.output_dir, 'nbest_predictions_{}.json'.format(prefix)) if args.version_2: - output_null_log_odds_file = os.path.join(args.output_dir, - 'null_odds_{}.json'.format(prefix)) + output_null_log_odds_file = os.path.join( + args.output_dir, 'null_odds_{}.json'.format(prefix)) else: output_null_log_odds_file = None @@ -521,12 +884,17 @@ def evaluate(prefix='p'): writer.write(json.dumps(scores_diff_json, indent=4) + '\n') if args.version_2: - evaluate_options = EVAL_OPTS(data_file=dev_data_path, pred_file=output_prediction_file, - na_prob_file=output_null_log_odds_file, - na_prob_thresh=args.null_score_diff_threshold) + evaluate_options = EVAL_OPTS( + data_file=dev_data_path, + pred_file=output_prediction_file, + na_prob_file=output_null_log_odds_file, + na_prob_thresh=args.null_score_diff_threshold) else: - evaluate_options = EVAL_OPTS(data_file=dev_data_path, pred_file=output_prediction_file, - na_prob_file=None, na_prob_thresh=args.null_score_diff_threshold) + evaluate_options = EVAL_OPTS( + data_file=dev_data_path, + pred_file=output_prediction_file, + na_prob_file=None, + na_prob_thresh=args.null_score_diff_threshold) results = evaluate_on_squad(evaluate_options) return results diff --git a/scripts/language_model/xlnet_qa_evaluate.py b/scripts/language_model/xlnet_qa_evaluate.py new file mode 100644 index 0000000000..3421192d1a --- /dev/null +++ b/scripts/language_model/xlnet_qa_evaluate.py @@ -0,0 +1,152 @@ +# Copyright 2018 The Google AI Language Team Authors, Allenai and DMLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""XLNet SQuAD evaluate.""" + +from collections import namedtuple, OrderedDict + +from mxnet import nd + +_PrelimPrediction = namedtuple( # pylint: disable=invalid-name + 'PrelimPrediction', [ + 'feature_id', 'start_index', 'end_index', 'start_log_prob', + 'end_log_prob' + ]) + +_NbestPrediction = namedtuple( # pylint: disable=invalid-name + 'NbestPrediction', ['text', 'start_log_prob', 'end_log_prob']) + + +def predict_extended(features, + results, + n_best_size, + max_answer_length=64, + start_n_top=5, + end_n_top=5): + """Get prediction results for XLNet. + + Parameters + ---------- + features : list of SQuADFeature + List of squad features for the example. + results : list of data.qa.PredResult + List of model predictions for span start and span end. + tokenizer: callable + Tokenizer function. + max_answer_length: int, default 64 + Maximum length of the answer tokens. + null_score_diff_threshold: float, default 0.0 + If null_score - best_non_null is greater than the threshold predict null. + n_best_size: int, default 10 + The total number of n-best predictions. + version_2: bool, default False + If true, the SQuAD examples contain some that do not have an answer. + + Returns + ------- + prediction: str + The final prediction. + nbest : list of (str, float) + n-best predictions with their probabilities. + """ + + prelim_predictions = [] + score_null = 1000000 # large and positive + for features_id, (result, feature) in enumerate(zip(results, features)): + cur_null_score = result.cls_logits[0] + score_null = min(score_null, cur_null_score) + for i in range(start_n_top): + for j in range(end_n_top): + start_log_prob = result.start_top_log_probs[i] + start_index = int(result.start_top_index[i]) + j_index = j * end_n_top + i + end_log_prob = result.end_top_log_probs[j_index] + end_index = int(result.end_top_index[j_index]) + # We could hypothetically create invalid predictions, e.g., predict + # that the start of the span is in the question. We throw out all + # invalid predictions. + if start_index >= feature.paragraph_len - 1: + continue + if end_index >= feature.paragraph_len - 1: + continue + + if not feature.token_is_max_context.get(start_index, False): + continue + if end_index < start_index: + continue + length = end_index - start_index + 1 + if length > max_answer_length: + continue + prelim_predictions.append( + _PrelimPrediction(feature_id=features_id, + start_index=start_index, + end_index=end_index, + start_log_prob=start_log_prob, + end_log_prob=end_log_prob)) + + prelim_predictions = sorted(prelim_predictions, + key=lambda x: + (x.start_log_prob + x.end_log_prob), + reverse=True) + + seen_predictions = {} + nbest = [] + for pred in prelim_predictions: + if len(nbest) >= n_best_size: + break + feature = features[pred.feature_id] + tok_start_to_orig_index = feature.tok_start_to_orig_index + tok_end_to_orig_index = feature.tok_end_to_orig_index + start_orig_pos = tok_start_to_orig_index[pred.start_index] + end_orig_pos = tok_end_to_orig_index[pred.end_index] + + paragraph_text = feature.paragraph_text + final_text = paragraph_text[start_orig_pos:end_orig_pos + 1].strip() + if final_text in seen_predictions: + continue + seen_predictions[final_text] = True + nbest.append( + _NbestPrediction(text=final_text, + start_log_prob=pred.start_log_prob, + end_log_prob=pred.end_log_prob)) + + # In very rare edge cases we could have no valid predictions. So we + # just create a nonce prediction in this case to avoid failure. + if not nbest: + nbest.append( + _NbestPrediction(text='', start_log_prob=-1e6, end_log_prob=-1e6)) + + assert len(nbest) >= 1 + + total_scores = [] + best_non_null_entry = None + for entry in nbest: + total_scores.append(entry.start_log_prob + entry.end_log_prob) + if not best_non_null_entry: + best_non_null_entry = entry + probs = nd.softmax(nd.array(total_scores)).asnumpy() + + nbest_json = [] + + for (i, entry) in enumerate(nbest): + output = OrderedDict() + output['text'] = entry.text + output['probability'] = float(probs[i]) + output['start_log_prob'] = float(entry.start_log_prob) + output['end_log_prob'] = float(entry.end_log_prob) + nbest_json.append(output) + + assert len(nbest_json) >= 1 + assert best_non_null_entry is not None + score_diff = score_null + return score_diff, best_non_null_entry.text, nbest_json From ee03b7c702ce66254172dc0d27a5cabf65ac8bc2 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 15 Jan 2020 17:28:36 +0800 Subject: [PATCH 29/59] add glue logger --- scripts/bert/finetune_classifier.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/bert/finetune_classifier.py b/scripts/bert/finetune_classifier.py index f6924eb364..d02122bdb9 100644 --- a/scripts/bert/finetune_classifier.py +++ b/scripts/bert/finetune_classifier.py @@ -74,7 +74,7 @@ formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--optimizer', - type='str', + type=str, default='bertadam', help='The optimizer to be used for training') parser.add_argument('--epochs', type=int, default=3, help='number of epochs.') @@ -192,8 +192,19 @@ args = parser.parse_args() -logging.getLogger().setLevel(logging.INFO) +log = logging.getLogger() +log.setLevel(logging.INFO) logging.captureWarnings(True) +fh = logging.FileHandler('log_{0}.txt'.format(args.task_name)) +formatter = logging.Formatter( + fmt='%(levelname)s:%(name)s:%(asctime)s %(message)s', datefmt='%H:%M:%S') +fh.setLevel(logging.INFO) +fh.setFormatter(formatter) +console = logging.StreamHandler() +console.setLevel(logging.INFO) +console.setFormatter(formatter) +log.addHandler(console) +log.addHandler(fh) logging.info(args) batch_size = args.batch_size From 26cac4b83cc78384f4ed3c1c1327c9f641934ef3 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 15 Jan 2020 19:29:31 +0800 Subject: [PATCH 30/59] fix --- scripts/bert/data/preprocessing_utils.py | 47 ++++++++++++++++++++++-- scripts/language_model/run_glue.py | 15 ++------ scripts/language_model/run_squad.py | 8 ++-- 3 files changed, 51 insertions(+), 19 deletions(-) diff --git a/scripts/bert/data/preprocessing_utils.py b/scripts/bert/data/preprocessing_utils.py index 9c47bd32e1..9794607866 100644 --- a/scripts/bert/data/preprocessing_utils.py +++ b/scripts/bert/data/preprocessing_utils.py @@ -71,7 +71,11 @@ def concat_sequences(seqs, separators, separator_mask=None): it will be: [1, 2, 3, 4, 7, 5, 6] - seqs : list of sequences or a single sequence + seqs : list + sequences or a single sequence + + separator_mask : int + The mask value for separator Returns ------- @@ -81,7 +85,7 @@ def concat_sequences(seqs, separators, separator_mask=None): """ assert isinstance(seqs, collections.abc.Iterable) and len(seqs) > 0 if not separator_mask: - separator_mask = [] + separator_mask = 1 concat = sum(( seq + sep for sep, seq in itertools.zip_longest(separators, seqs, fillvalue=[])), @@ -100,7 +104,44 @@ def concat_sequences_extended(seqs, separators, seq_p_mask, separator_mask=None): - """TBA""" + """ + Insert special tokens for sequence list or a single sequence. + Note that different from concat_sequence(), one can specific mask for sequence and + mask for separator on element level. + For sequence pairs, the input is a list of 2 strings: + text_a, text_b. + Inputs: + text_a: 'is this jacksonville ?' + text_b: 'no it is not' + separator: [[SEP], [SEP], [CLS]] + seq_p_mask: [[1, 1, 1, 1], [0, 0, 0, 0, 0]] + separator_mask: [[1], [1], [0]] + Processed: + tokens: 'is this jacksonville ? [SEP] no it is not . [SEP] [CLS]' + segment_ids: 0 0 0 0 0 1 1 1 1 1 1 2 + p_mask: 1 1 1 1 1 0 0 0 0 0 1 0 + valid_length: 11 + + Parameters + ---------- + separator : list + The special tokens to be appended to each sequence. For example: + + seqs : list + sequences or a single sequence + + seq_p_mask : list + mask value for each element in seqs. Must have the same shape with seqs + + separator_mask : list + The mask value for separator + + Returns + ------- + np.array: input token ids in 'int32', shape (batch_size, seq_length) + np.array: segment ids in 'int32', shape (batch_size, seq_length) + np.array: mask for special tokens + """ assert isinstance(seqs, collections.abc.Iterable) and len(seqs) > 0 assert len(seq_p_mask) == len(seqs), 'sequence position mask ' \ 'should have the same length with sequences.' diff --git a/scripts/language_model/run_glue.py b/scripts/language_model/run_glue.py index 1d56435e98..b2ad13244d 100644 --- a/scripts/language_model/run_glue.py +++ b/scripts/language_model/run_glue.py @@ -241,8 +241,7 @@ def preprocess_data(tokenizer, batch_size, dev_batch_size, max_len, - vocab, - load_from_pickle=False): + vocab): #pylint: disable=redefined-outer-name """Train/eval Data preparation function.""" label_dtype = 'int32' if task.class_labels else 'float32' @@ -258,16 +257,8 @@ def preprocess_data(tokenizer, # data train # task.dataset_train returns (segment_name, dataset) - filename = 'xlnet_' + args.task_name + '_feature.train' - train_feautre_path = os.path.join(args.output_dir, filename) - if not load_from_pickle: - train_tsv = task.dataset_train()[1] - data_train = list(map(trans, train_tsv)) - with open(train_feautre_path, 'wb') as file: - pickle.dump(data_train, file) - else: - with open(train_feautre_path, 'rb') as file: - data_train = pickle.load(file) + train_tsv = task.dataset_train()[1] + data_train = list(map(trans, train_tsv)) data_train = mx.gluon.data.SimpleDataset(data_train) data_train_len = data_train.transform( lambda _, valid_length, segment_ids, label: valid_length, lazy=False) diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py index 59e1a27ec5..7a1b727dd0 100644 --- a/scripts/language_model/run_squad.py +++ b/scripts/language_model/run_squad.py @@ -48,8 +48,8 @@ default=None, help='Pre-trained bert model parameter file. default is None') parser.add_argument( - '--raw', - action='store_true', + '--load_pickle', + action='store_false', help='Whether do data preprocessing or load from pickled file') parser.add_argument('--dev_dataset_file', default='./output_dir/out.dev', @@ -603,7 +603,7 @@ def train(): max_seq_length=args.max_seq_length, doc_stride=args.doc_stride, max_query_length=args.max_query_length, - load_from_pickle=not args.raw, + load_from_pickle=args.load_pickle, feature_file=args.train_dataset_file) train_data_input = convert_full_features_to_input_features( @@ -800,7 +800,7 @@ def evaluate(prefix=''): max_seq_length=args.max_seq_length, doc_stride=args.doc_stride, max_query_length=args.max_query_length, - load_from_pickle=not args.raw, + load_from_pickle=args.load_pickle, feature_file=args.dev_dataset_file) dev_data_input = convert_full_features_to_input_features(dev_data_features) From 883f2c695a5f2c0b17317fe2d4bbc5dbd0bae1f8 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 15 Jan 2020 19:42:21 +0800 Subject: [PATCH 31/59] pylint --- scripts/language_model/run_glue.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/language_model/run_glue.py b/scripts/language_model/run_glue.py index b2ad13244d..325eb04d55 100644 --- a/scripts/language_model/run_glue.py +++ b/scripts/language_model/run_glue.py @@ -9,7 +9,6 @@ import logging import warnings import sys -import pickle from functools import partial import numpy as np import mxnet as mx @@ -414,8 +413,7 @@ def preprocess_data(tokenizer, logging.info('processing dataset...') train_data, dev_data_list, test_data_list, num_train_examples = preprocess_data( - tokenizer, task, args.batch_size, args.dev_batch_size, args.max_len, vocab, - args.pad) + tokenizer, task, args.batch_size, args.dev_batch_size, args.max_len, vocab) def test(loader_test, segment): From 64d2d033d3f0ad720f0f3b830b21465faeb47a7c Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 15 Jan 2020 22:24:03 +0800 Subject: [PATCH 32/59] fix --- scripts/bert/finetune_squad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/bert/finetune_squad.py b/scripts/bert/finetune_squad.py index 54b2a964fd..be3e520262 100644 --- a/scripts/bert/finetune_squad.py +++ b/scripts/bert/finetune_squad.py @@ -255,7 +255,6 @@ 'BertForQA model parameters.') lower = args.uncased -epochs = args.epochs batch_size = args.batch_size test_batch_size = args.test_batch_size lr = args.lr @@ -385,6 +384,7 @@ def train(): num_train_examples = len(train_data_transform) step_size = batch_size * accumulate if accumulate else batch_size num_train_steps = int(num_train_examples / step_size * args.epochs) + epochs = args.epochs if args.training_steps: num_train_steps = args.training_steps epochs = 9999 From ddbf8c82f6743602fda7079641242949484ff666 Mon Sep 17 00:00:00 2001 From: Wang Date: Fri, 17 Jan 2020 15:07:57 +0800 Subject: [PATCH 33/59] fix hyperparameter --- scripts/bert/index.rst | 2 +- scripts/language_model/run_squad.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/bert/index.rst b/scripts/bert/index.rst index 279e9a7092..9146810330 100644 --- a/scripts/bert/index.rst +++ b/scripts/bert/index.rst @@ -148,7 +148,7 @@ Question Answering on SQuAD +=========+=========================================================================================================================================+==========================================================================================================================================+==========================================================================================================================================+ | Model | bert_12_768_12 | bert_24_1024_16 | bert_24_1024_16 | +---------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ -| F1 / EM | 88.53 / 80.98 | 90.97 / 84.05 | 77.96 / 81.02 | +| F1 / EM | 88.58 / 81.26 | 90.97 / 84.05 | 78.14 / 81.26 | +---------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ | Log | `log `__ | `log `__ | `log `__ | +---------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py index 7a1b727dd0..51f94828a0 100644 --- a/scripts/language_model/run_squad.py +++ b/scripts/language_model/run_squad.py @@ -49,7 +49,7 @@ help='Pre-trained bert model parameter file. default is None') parser.add_argument( '--load_pickle', - action='store_false', + action='store_true', help='Whether do data preprocessing or load from pickled file') parser.add_argument('--dev_dataset_file', default='./output_dir/out.dev', From 3a92050b748f6b7e636e213ee0fd0d284e6ed449 Mon Sep 17 00:00:00 2001 From: Wang Date: Fri, 17 Jan 2020 17:04:03 +0800 Subject: [PATCH 34/59] fix --- scripts/bert/data/preprocessing_utils.py | 5 +- scripts/language_model/model/qa.py | 100 ++++++++++++++++---- scripts/language_model/run_squad.py | 25 ++--- scripts/language_model/transformer/model.py | 1 - 4 files changed, 99 insertions(+), 32 deletions(-) diff --git a/scripts/bert/data/preprocessing_utils.py b/scripts/bert/data/preprocessing_utils.py index 9794607866..f7c7a1782e 100644 --- a/scripts/bert/data/preprocessing_utils.py +++ b/scripts/bert/data/preprocessing_utils.py @@ -311,7 +311,7 @@ def check_is_max_context(doc_spans, cur_span_index, position): SquadExample = collections.namedtuple('SquadExample', [ 'qas_id', 'question_text', 'paragraph_text', 'doc_tokens', 'example_id', - 'orig_answer_text', 'start_position', 'end_position', 'is_impossible' + 'orig_answer_text', 'start_position', 'end_position', 'start_offset', 'end_offset', 'is_impossible' ]) @@ -350,7 +350,6 @@ def convert_squad_examples(record, is_training): answer_offset] if not is_impossible else -1 end_position = char_to_word_offset[answer_offset + answer_length - 1] if not is_impossible else -1 - example = SquadExample(qas_id=qas_id, question_text=question_text, paragraph_text=paragraph_text, @@ -359,6 +358,8 @@ def convert_squad_examples(record, is_training): orig_answer_text=orig_answer_text, start_position=start_position, end_position=end_position, + start_offset=answer_offset, + end_offset=answer_offset + len(orig_answer_text) - 1, is_impossible=is_impossible) return example diff --git a/scripts/language_model/model/qa.py b/scripts/language_model/model/qa.py index 38934701e1..1e8fcc98d1 100644 --- a/scripts/language_model/model/qa.py +++ b/scripts/language_model/model/qa.py @@ -5,7 +5,7 @@ class PoolerStartLogits(HybridBlock): - """ Compute SQuAD start_logits from sequence hidden states. """ + """ Compute SQuAD start_logits from sequence hidden states.""" def __init__(self, prefix=None, params=None): super(PoolerStartLogits, self).__init__(prefix=prefix, params=params) self.dense = nn.Dense(1, flatten=False) @@ -15,12 +15,19 @@ def __call__(self, hidden_states, p_masks=None): return super(PoolerStartLogits, self).__call__(hidden_states, p_masks) def hybrid_forward(self, F, hidden_states, p_mask): - # pylint: disable=arguments-differ - """ Args: - **p_mask**: (`optional`) ``torch.FloatTensor`` of shape `(batch_size, seq_len)` - invalid position mask such as query and special symbols (PAD, SEP, CLS) - 1.0 means token should be masked. + """Get start logits from the model output. + + Parameters + ---------- + hidden_states : NDArray, shape (batch_size, seq_length, hidden_size) + p_mask : NDArray or None, shape(batch_size, seq_length) + + Returns + ------- + x : NDarray, shape(batch_size, seq_length) + Masked start logits. """ + # pylint: disable=arguments-differ x = self.dense(hidden_states).squeeze(-1) if p_mask is not None: x = x * (1 - p_mask) - 1e30 * p_mask @@ -50,6 +57,22 @@ def __call__(self, def forward(self, hidden_states, start_states, start_positions, p_mask): # pylint: disable=arguments-differ + """Get end logits from the model output and start states or start positions. + + Parameters + ---------- + hidden_states : NDArray, shape (batch_size, seq_length, hidden_size) + start_states : NDArray, shape (batch_size, seq_length, start_n_top, hidden_size) + Used during inference + start_positions : NDArray, shape (batch_size) + Ground-truth start positions used during training. + p_mask : NDArray or None, shape(batch_size, seq_length) + + Returns + ------- + x : NDarray, shape(batch_size, seq_length) + Masked end logits. + """ F = mx.ndarray if not self._eval: start_states = F.gather_nd( @@ -58,7 +81,7 @@ def forward(self, hidden_states, start_states, start_positions, p_mask): F.contrib.arange_like(hidden_states, axis=0).expand_dims(1), start_positions.expand_dims( - 1)).transpose()) #shape(bsz, hsz) + 1)).transpose()) # shape(bsz, hsz) start_states = start_states.expand_dims(1) start_states = F.broadcast_like( start_states, hidden_states) # shape (bsz, slen, hsz) @@ -95,17 +118,31 @@ def __call__(self, hidden_states, start_states=None, cls_index=None): # pylint: disable=arguments-differ return super(XLNetPoolerAnswerClass, self).__call__(hidden_states, start_states, cls_index) - # pylint: disable=unused-argument - def forward(self, sequence, start_states, cls_index): + def forward(self, hidden_states, start_states, cls_index): # pylint: disable=arguments-differ - # get the cls_token's state, currently the last state + """Get answerability logits from the model output and start states. + + Parameters + ---------- + hidden_states : NDArray, shape (batch_size, seq_length, hidden_size) + start_states : NDArray, shape (batch_size, hidden_size) + Typically weighted average hidden_states along second dimension. + cls_index : NDArray, shape (batch_size) + Index of [CLS] token in sequence. + + Returns + ------- + x : NDarray, shape(batch_size,) + CLS logits. + """ F = mx.ndarray - index = F.contrib.arange_like(sequence, axis=0, - ctx=sequence.context).expand_dims(1) + index = F.contrib.arange_like(hidden_states, + axis=0, + ctx=hidden_states.context).expand_dims(1) valid_length_rs = cls_index.reshape((-1, 1)) - 1 gather_index = F.concat(index, valid_length_rs).T - cls_token_state = F.gather_nd(sequence, gather_index) + cls_token_state = F.gather_nd(hidden_states, gather_index) x = self.dense_0(F.concat(start_states, cls_token_state, dim=-1)) x = self._dropout(x) @@ -118,7 +155,15 @@ class XLNetForQA(Block): Parameters ---------- - bert: XLNet base + xlnet_base: XLNet Block + start_top_n : int + Number of start position candidates during inference. + end_top_n : int + Number of end position candidates for each start position during inference. + version_2 : Bool + model for squad2.0 includes an extra answer class to predict answerability. + is_eval : Bool + If set to True, do inference. prefix : str or None See document of `mx.gluon.Block`. params : ParameterDict or None @@ -155,7 +200,7 @@ def __call__(self, p_mask=None, is_impossible=None, mems=None): - #pylint: disable=arguments-differ, dangerous-default-value + #pylint: disable=arguments-differ """Generate the unnormalized score for the given the input sequences.""" valid_length = [] if valid_length is None else valid_length return super(XLNetForQA, @@ -201,11 +246,32 @@ def forward(self, inputs, token_types, valid_length, p_mask, label, first sentence or the second one. valid_length : NDArray or None, shape (batch_size,) Valid length of the sequence. This is used to mask the padded tokens. + p_mask : NDArray or None, shape (batch_size, seq_length) + We do not want special tokens(e.g., [SEP], [PAD]) and question tokens to be + included in answer. Set to 1 to mask the token. + label : NDArray, shape (batch_size, 1) + Ground-truth label(start/end position) for loss computation. + is_impossible : NDArray or None, shape (batch_size ,1) + Ground-truth label(is impossible) for loss computation. Set to None for squad1. + mems : NDArray + We do not use memory(a Transformer XL component) during finetuning. Returns ------- - outputs : NDArray - Shape (batch_size, seq_length, 2) + For training we have: + total_loss : list of NDArray + For squad1, we will only have one span loss of Shape (batch_size, ) + For squad2, we will have a span loss (batch_size, ) and a cls_loss (batch_size, ) + total_loss_sum : NDArray + For squad1, it equals to span_loss + For squad2, it equals to span_loss + cls_loss + + For inference we have: + start_top_log_probs : NDArray, shape (batch_size, start_n_top, ) + start_top_index : NDArray, shape (batch_size, start_n_top) + end_top_log_probs : NDArray, shape (batch_size, start_n_top * end_n_top) + end_top_index : NDArray, shape (batch_size, start_n_top * end_n_top) + cls_logits : NDArray or None, shape (batch_size, ) """ if isinstance(valid_length, list) and len(valid_length) == 0: valid_length = None diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py index 51f94828a0..9185098c0d 100644 --- a/scripts/language_model/run_squad.py +++ b/scripts/language_model/run_squad.py @@ -278,7 +278,7 @@ nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token], dtype='int32'), # input_ids - nlp.data.batchify.Pad(axis=0, pad_val=4, dtype='int32'), # segment_ids + nlp.data.batchify.Pad(axis=0, pad_val=3, dtype='int32'), # segment_ids nlp.data.batchify.Stack('float32'), # valid_length nlp.data.batchify.Pad(axis=0, pad_val=1), # p_mask nlp.data.batchify.Stack('float32'), # start_position @@ -292,10 +292,12 @@ ctx=ctx, ignore_extra=True, cast_dtype=True) - + +units = xlnet_base._net._units net = XLNetForQA(xlnet_base=xlnet_base, start_top_n=args.start_top_n, end_top_n=args.end_top_n, + units=units, version_2=args.version_2) net_eval = XLNetForQA(xlnet_base=xlnet_base, @@ -400,16 +402,16 @@ def convert_examples_to_features(example, tok_end_to_orig_index.append(end_orig_pos) tok_start_position, tok_end_position = -1, -1 - # get mapped start/end position if is_training and not example.is_impossible: start_chartok_pos = _convert_index(orig_to_chartok_index, - example.start_position, + example.start_offset, is_start=True) + tok_start_position = chartok_to_tok_index[start_chartok_pos] end_chartok_pos = _convert_index(orig_to_chartok_index, - example.end_position, + example.end_offset, is_start=False) tok_end_position = chartok_to_tok_index[end_chartok_pos] assert tok_start_position <= tok_end_position @@ -448,6 +450,7 @@ def convert_examples_to_features(example, else: positions = [(tok_start_position - st, tok_end_position - st) for (st, _) in doc_spans_indices] + features = [ SquadXLNetFeautre(example_id=example.example_id, qas_id=example.qas_id, @@ -477,7 +480,7 @@ def preprocess_dataset(tokenizer, max_seq_length=384, doc_stride=128, max_query_length=64, - num_workers=4, + num_workers=16, load_from_pickle=False, feature_file=None, is_training=True): @@ -498,10 +501,10 @@ def preprocess_dataset(tokenizer, is_training=is_training) # convert the raw dataset into raw features examples = pool.map(example_trans, dataset) - raw_features = pool.map(trans, examples) + raw_features = list(map(trans, examples)) #pool.map(trans, examples) if feature_file: with open(feature_file, 'wb') as file: - pickle.dump(list(raw_features), file) + pickle.dump(raw_features, file) else: assert feature_file, 'feature file should be provided.' with open(feature_file, 'rb') as file: @@ -517,7 +520,6 @@ def convert_full_features_to_input_features(raw_features): """convert the full features into the input features""" data_features = mx.gluon.data.SimpleDataset( list(itertools.chain.from_iterable(raw_features))) - data_features = data_features.transform(lambda *example: ( example[0], # example_id example[7], # inputs_id @@ -585,14 +587,14 @@ def _apply_gradient_decay(): def train(): """Training function.""" - segment = 'train' if not args.debug else 'dev' + segment = 'train' log.info('Loading %s data...', segment) if args.version_2: train_data = SQuAD(segment, version='2.0') else: train_data = SQuAD(segment, version='1.1') if args.debug: - sampled_data = [train_data[i] for i in range(10)] + sampled_data = [train_data[i] for i in range(100)] train_data = mx.gluon.data.SimpleDataset(sampled_data) log.info('Number of records in Train data: %s', len(train_data)) @@ -608,7 +610,6 @@ def train(): train_data_input = convert_full_features_to_input_features( train_data_features) - log.info('The number of examples after preprocessing: %s', len(train_data_input)) diff --git a/scripts/language_model/transformer/model.py b/scripts/language_model/transformer/model.py index 5a3dc6fe26..de4d7dbbe6 100644 --- a/scripts/language_model/transformer/model.py +++ b/scripts/language_model/transformer/model.py @@ -55,7 +55,6 @@ def get_model(name, **kwargs): 'xlnet_cased_l12_h768_a12': xlnet_cased_l12_h768_a12, 'xlnet_cased_l24_h1024_a16': xlnet_cased_l24_h1024_a16 } - print(name, kwargs) name = name.lower() if name not in models: raise ValueError('Model %s is not supported. Available options are\n\t%s' % From 5ce18477270abf60b3c56d044501db7ac56248c4 Mon Sep 17 00:00:00 2001 From: Wang Date: Fri, 17 Jan 2020 17:08:09 +0800 Subject: [PATCH 35/59] add multi workers for data preprocessing --- scripts/language_model/run_squad.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py index 9185098c0d..574887bd92 100644 --- a/scripts/language_model/run_squad.py +++ b/scripts/language_model/run_squad.py @@ -205,7 +205,8 @@ help='The maximum length of an answer that can be generated. This is needed ' 'because the start and end predictions are not conditioned on one another.' ' default is 64') - +parser.add_argument('--num_workers', type=int, default=4, + help='Number of workers used for data preprocessing') parser.add_argument( '--null_score_diff_threshold', type=float, @@ -604,6 +605,7 @@ def train(): vocab=vocab, max_seq_length=args.max_seq_length, doc_stride=args.doc_stride, + num_workers=args.num_workers, max_query_length=args.max_query_length, load_from_pickle=args.load_pickle, feature_file=args.train_dataset_file) @@ -800,6 +802,7 @@ def evaluate(prefix=''): vocab=vocab, max_seq_length=args.max_seq_length, doc_stride=args.doc_stride, + num_workers=args.num_workers, max_query_length=args.max_query_length, load_from_pickle=args.load_pickle, feature_file=args.dev_dataset_file) From d2c0b815044893876a96b58551903ef58666cd18 Mon Sep 17 00:00:00 2001 From: Wang Date: Fri, 17 Jan 2020 17:20:58 +0800 Subject: [PATCH 36/59] fix pylint --- scripts/bert/data/preprocessing_utils.py | 4 +++- scripts/language_model/run_squad.py | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/bert/data/preprocessing_utils.py b/scripts/bert/data/preprocessing_utils.py index f7c7a1782e..8211945021 100644 --- a/scripts/bert/data/preprocessing_utils.py +++ b/scripts/bert/data/preprocessing_utils.py @@ -311,7 +311,8 @@ def check_is_max_context(doc_spans, cur_span_index, position): SquadExample = collections.namedtuple('SquadExample', [ 'qas_id', 'question_text', 'paragraph_text', 'doc_tokens', 'example_id', - 'orig_answer_text', 'start_position', 'end_position', 'start_offset', 'end_offset', 'is_impossible' + 'orig_answer_text', 'start_position', 'end_position', 'start_offset', 'end_offset', + 'is_impossible' ]) @@ -350,6 +351,7 @@ def convert_squad_examples(record, is_training): answer_offset] if not is_impossible else -1 end_position = char_to_word_offset[answer_offset + answer_length - 1] if not is_impossible else -1 + answer_offset = -1 if is_impossible else answer_offset example = SquadExample(qas_id=qas_id, question_text=question_text, paragraph_text=paragraph_text, diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py index 574887bd92..db797989c7 100644 --- a/scripts/language_model/run_squad.py +++ b/scripts/language_model/run_squad.py @@ -293,7 +293,7 @@ ctx=ctx, ignore_extra=True, cast_dtype=True) - + units = xlnet_base._net._units net = XLNetForQA(xlnet_base=xlnet_base, start_top_n=args.start_top_n, @@ -408,7 +408,6 @@ def convert_examples_to_features(example, start_chartok_pos = _convert_index(orig_to_chartok_index, example.start_offset, is_start=True) - tok_start_position = chartok_to_tok_index[start_chartok_pos] end_chartok_pos = _convert_index(orig_to_chartok_index, From 8d3e9a03c77a44e25551b12f755b3801719dd86b Mon Sep 17 00:00:00 2001 From: Wang Date: Sat, 18 Jan 2020 00:24:43 +0800 Subject: [PATCH 37/59] fix start position --- scripts/language_model/run_squad.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py index db797989c7..0337721b69 100644 --- a/scripts/language_model/run_squad.py +++ b/scripts/language_model/run_squad.py @@ -29,7 +29,8 @@ sys.path.append(path + '/../bert/data') #pylint: disable=wrong-import-position from preprocessing_utils import concat_sequences_extended, get_doc_spans, \ - check_is_max_context, convert_squad_examples, _lcs_match, _convert_index + check_is_max_context, convert_squad_examples, _lcs_match, _convert_index, \ + align_position2doc_spans parser = argparse.ArgumentParser( description='XLNet QA example.' @@ -418,7 +419,7 @@ def convert_examples_to_features(example, # get doc spans using sliding window doc_spans, doc_spans_indices = get_doc_spans( - paragraph_tokenized, max_seq_length - len(query_tokenized) - 2, + paragraph_tokenized, max_seq_length - len(query_tokenized) - 3, doc_stride) # record whether the tokens in a docspan have max context @@ -444,12 +445,19 @@ def convert_examples_to_features(example, for doc_span in doc_spans ] - if example.is_impossible: + # get the start/end positions aligned to doc spans. If is_impossible or position out of span + # set position to cls_index, i.e., last token in the sequence. + if not example.is_impossible: + positions = [ + align_position2doc_spans([tok_start_position, tok_end_position], + doc_idx, + offset=0, + default_value=len(seq[0]) - 1) + for (doc_idx, seq) in zip(doc_spans_indices, seq_features) + ] + else: positions = [(len(seq_feature[0]) - 1, len(seq_feature[0]) - 1) for seq_feature in seq_features] - else: - positions = [(tok_start_position - st, tok_end_position - st) - for (st, _) in doc_spans_indices] features = [ SquadXLNetFeautre(example_id=example.example_id, From 264697afb8dfd1185e8c122d66bbe4d5c523d4ed Mon Sep 17 00:00:00 2001 From: Wang Date: Sat, 18 Jan 2020 00:43:09 +0800 Subject: [PATCH 38/59] fix loss log --- scripts/language_model/run_squad.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py index 0337721b69..99c0d00305 100644 --- a/scripts/language_model/run_squad.py +++ b/scripts/language_model/run_squad.py @@ -719,10 +719,10 @@ def set_new_lr(step_num, batch_id): p_mask=p_mask, # pylint: disable=line-too-long is_impossible=is_impossible) ls = out.mean() / len(ctx) - if args.accumulate: - ls = ls / args.accumulate batch_loss_sep.append(out_sep) batch_loss.append(ls) + if args.accumulate: + ls = ls / args.accumulate ls.backward() # update if not args.accumulate or (batch_id + 1) % args.accumulate == 0: From 94c3df5e1e99c0b34c41ca0fd7fe9f6d89fc2b76 Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 19 Jan 2020 00:56:46 +0800 Subject: [PATCH 39/59] add n_best_size --- scripts/language_model/run_squad.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py index 99c0d00305..3e06139ed6 100644 --- a/scripts/language_model/run_squad.py +++ b/scripts/language_model/run_squad.py @@ -199,6 +199,10 @@ default=5, help='Number of end-position candidates corresponding ' 'to a start position') +parser.add_argument('--n_best_size', + type=int, + default=5, + help='top N results written to file') parser.add_argument( '--max_answer_length', type=int, From 75758d9edba329aa807854b2acd28b1e05053f0f Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 19 Jan 2020 01:03:39 +0800 Subject: [PATCH 40/59] fix units --- scripts/language_model/run_squad.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py index 3e06139ed6..b51b747b80 100644 --- a/scripts/language_model/run_squad.py +++ b/scripts/language_model/run_squad.py @@ -310,6 +310,7 @@ start_top_n=args.start_top_n, end_top_n=args.end_top_n, version_2=args.version_2, + units=units, is_eval=True, params=net.collect_params()) From dae69fa93de406a6b2a0a0b4a9556fabf8e56ee6 Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 19 Jan 2020 14:48:48 +0800 Subject: [PATCH 41/59] fix seq loss --- scripts/language_model/run_squad.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py index b51b747b80..5eec3edb1b 100644 --- a/scripts/language_model/run_squad.py +++ b/scripts/language_model/run_squad.py @@ -760,9 +760,6 @@ def set_new_lr(step_num, batch_id): log_num / (toc - tic)) if args.version_2: - if args.accumulate: - step_loss_span = step_loss_span / args.accumulate - step_loss_cls = step_loss_cls / args.accumulate log.info('span_loss: %.4f, cls_loss: %.4f', step_loss_span / log_interval, step_loss_cls / log_interval) From 9079a39e127e9410d17aef7bc5c5ffb46233fde5 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 20 Jan 2020 18:05:56 +0800 Subject: [PATCH 42/59] add & fix --- scripts/bert/data/preprocessing_utils.py | 242 ++++++++++++----------- scripts/bert/index.rst | 55 ++++-- scripts/language_model/index.rst | 17 ++ scripts/language_model/run_squad.py | 9 +- 4 files changed, 182 insertions(+), 141 deletions(-) diff --git a/scripts/bert/data/preprocessing_utils.py b/scripts/bert/data/preprocessing_utils.py index 8211945021..2f76e7f233 100644 --- a/scripts/bert/data/preprocessing_utils.py +++ b/scripts/bert/data/preprocessing_utils.py @@ -1,10 +1,8 @@ """Utility classes and functions for data processing""" __all__ = [ - 'truncate_seqs_equal', 'concat_sequences', 'tokenize_and_align_positions', - 'get_doc_spans', 'align_position2doc_spans', 'improve_answer_span', - 'check_is_max_context', 'convert_squad_examples' -] + 'truncate_seqs_equal', 'concat_sequences', 'tokenize_and_align_positions', 'get_doc_spans', + 'align_position2doc_spans', 'improve_answer_span', 'check_is_max_context'] import collections import itertools @@ -37,104 +35,60 @@ def truncate_seqs_equal(seqs, max_len): max_len -= minval else: # Truncate all lens.data[~lens.mask] = [ - quotient + 1 if i < remainder else quotient - for i in range(lens.count()) + quotient + 1 if i < remainder else quotient for i in range(lens.count()) ] break seqs = [seq[:length] for (seq, length) in zip(seqs, lens.data.tolist())] return seqs -def concat_sequences(seqs, separators, separator_mask=None): - """ - Insert special tokens for sequence list or a single sequence. - For sequence pairs, the input is a list of 2 strings: - text_a, text_b. - Inputs: - text_a: 'is this jacksonville ?' - text_b: 'no it is not' - separator: [[SEP], [SEP]] - - Processed: - tokens: 'is this jacksonville ? [SEP] no it is not . [SEP]' - segment_ids: 0 0 0 0 0 1 1 1 1 1 1 - p_mask: 0 0 0 0 1 0 0 0 0 0 1 - valid_length: 11 +def concat_sequences(seqs, separators, seq_mask=0, separator_mask=1): + """Concatenate sequences in a list into a single sequence, using specified separators. - Parameters - ---------- - separator : list - The special tokens to be appended to each sequence. For example: - Given: - seqs: [[1, 2], [3, 4], [5, 6]] - separator: [[], 7] - it will be: - [1, 2, 3, 4, 7, 5, 6] - - seqs : list - sequences or a single sequence + Example 1: + seqs: [['is', 'this' ,'jacksonville', '?'], ['no' ,'it' ,'is' ,'not', '.']] + separator: [[SEP], [SEP], [CLS]] + seq_mask: 0 + separator_mask: 1 + Returns: + tokens: is this jacksonville ? [SEP] no it is not . [SEP] [CLS] + segment_ids: 0 0 0 0 0 1 1 1 1 1 1 2 + p_mask: 0 0 0 0 1 0 0 0 0 0 1 1 - separator_mask : int - The mask value for separator + Example 2: + separator_mask can also be a list. + seqs: [['is', 'this' ,'jacksonville', '?'], ['no' ,'it' ,'is' ,'not', '.']] + separator: [[SEP], [SEP], [CLS]] + seq_mask: 0 + separator_mask: [[1], [1], [0]] - Returns - ------- - np.array: input token ids in 'int32', shape (batch_size, seq_length) - np.array: segment ids in 'int32', shape (batch_size, seq_length) - np.array: mask for special tokens - """ - assert isinstance(seqs, collections.abc.Iterable) and len(seqs) > 0 - if not separator_mask: - separator_mask = 1 - concat = sum(( - seq + sep - for sep, seq in itertools.zip_longest(separators, seqs, fillvalue=[])), - []) - segment_ids = sum( - ([i] * (len(seq) + len(sep)) for i, (sep, seq) in enumerate( - itertools.zip_longest(separators, seqs, fillvalue=[]))), []) - p_mask = sum(( - [0] * len(seq) + [separator_mask] * len(sep) - for sep, seq in itertools.zip_longest(separators, seqs, fillvalue=[])), - []) - return concat, segment_ids, p_mask + Returns: + tokens: 'is this jacksonville ? [SEP] no it is not . [SEP] [CLS]' + segment_ids: 0 0 0 0 0 1 1 1 1 1 1 2 + p_mask: 1 1 1 1 1 0 0 0 0 0 1 0 + Example 3: + seq_mask can also be a list. + seqs: [['is', 'this' ,'jacksonville', '?'], ['no' ,'it' ,'is' ,'not', '.']] + separator: [[SEP], [SEP], [CLS]] + seq_mask: [[1, 1, 1, 1], [0, 0, 0, 0, 0]] + separator_mask: [[1], [1], [0]] -def concat_sequences_extended(seqs, - separators, - seq_p_mask, - separator_mask=None): - """ - Insert special tokens for sequence list or a single sequence. - Note that different from concat_sequence(), one can specific mask for sequence and - mask for separator on element level. - For sequence pairs, the input is a list of 2 strings: - text_a, text_b. - Inputs: - text_a: 'is this jacksonville ?' - text_b: 'no it is not' - separator: [[SEP], [SEP], [CLS]] - seq_p_mask: [[1, 1, 1, 1], [0, 0, 0, 0, 0]] - separator_mask: [[1], [1], [0]] - Processed: + Returns: tokens: 'is this jacksonville ? [SEP] no it is not . [SEP] [CLS]' segment_ids: 0 0 0 0 0 1 1 1 1 1 1 2 p_mask: 1 1 1 1 1 0 0 0 0 0 1 0 - valid_length: 11 Parameters ---------- - separator : list - The special tokens to be appended to each sequence. For example: - seqs : list - sequences or a single sequence - - seq_p_mask : list - mask value for each element in seqs. Must have the same shape with seqs - - separator_mask : list - The mask value for separator + sequences to be concatenated + separator : list + The special tokens to separate sequences. + seq_mask : int or list + A single mask value for all sequence items or a list of values for each item in sequences + separator_mask : int or list + A single mask value for all separators or a list of values for each separator Returns ------- @@ -143,26 +97,45 @@ def concat_sequences_extended(seqs, np.array: mask for special tokens """ assert isinstance(seqs, collections.abc.Iterable) and len(seqs) > 0 - assert len(seq_p_mask) == len(seqs), 'sequence position mask ' \ - 'should have the same length with sequences.' - if not separator_mask: - separator_mask = [] - concat = sum(( - seq + sep - for sep, seq in itertools.zip_longest(separators, seqs, fillvalue=[])), + assert isinstance(seq_mask, (list, int)) + assert isinstance(separator_mask, (list, int)) + concat = sum((seq + sep for sep, seq in itertools.zip_longest(separators, seqs, fillvalue=[])), []) segment_ids = sum( - ([i] * (len(seq) + len(sep)) for i, (sep, seq) in enumerate( - itertools.zip_longest(separators, seqs, fillvalue=[]))), []) - p_mask = sum( - (s_mask + mask for sep, seq, s_mask, mask in itertools.zip_longest( - separators, seqs, seq_p_mask, separator_mask, fillvalue=[])), []) + ([i] * (len(seq) + len(sep)) + for i, (sep, seq) in enumerate(itertools.zip_longest(separators, seqs, fillvalue=[]))), + []) + if isinstance(seq_mask, int): + seq_mask = [[seq_mask] * len(seq) for seq in seqs] + if isinstance(separator_mask, int): + separator_mask = [[separator_mask] * len(sep) for sep in separators] + + p_mask = sum((s_mask + mask for sep, seq, s_mask, mask in itertools.zip_longest( + separators, seqs, seq_mask, separator_mask, fillvalue=[])), []) return concat, segment_ids, p_mask -def tokenize_and_align_positions(origin_text, start_position, end_position, - tokenizer): - """Tokenize the text and align the origin positions to the corresponding position""" +def tokenize_and_align_positions(origin_text, start_position, end_position, tokenizer): + """Tokenize the text and align the origin positions to the corresponding position. + + Parameters + ---------- + origin_text : list + list of tokens to be tokenized. + start_position : int + Start position in the origin_text + end_position : int + End position in the origin_text + tokenizer : callable function, e.g., BERTTokenizer. + + Returns + ------- + int: Aligned start position + int: Aligned end position + list: tokenized text + list: map from the origin index to the tokenized sequence index + list: map from tokenized sequence index to the origin index + """ orig_to_tok_index = [] tok_to_orig_index = [] tokenized_text = [] @@ -179,9 +152,9 @@ def tokenize_and_align_positions(origin_text, start_position, end_position, def get_doc_spans(full_doc, max_length, doc_stride): - """A simple function that applying a sliding window on the doc and get doc spans + """Obtain document spans by sliding a window across the document - Parameters + Parameters ---------- full_doc: list The origin doc text @@ -200,36 +173,49 @@ def get_doc_spans(full_doc, max_length, doc_stride): while start_offset < len(full_doc): length = min(max_length, len(full_doc) - start_offset) end_offset = start_offset + length - doc_spans.append( - (full_doc[start_offset:end_offset], (start_offset, end_offset))) + doc_spans.append((full_doc[start_offset:end_offset], (start_offset, end_offset))) if start_offset + length == len(full_doc): break start_offset += min(length, doc_stride) return list(zip(*doc_spans)) -def align_position2doc_spans(positions, - doc_spans_indices, - offset=0, - default_value=-1, +def align_position2doc_spans(positions, doc_spans_indices, offset=0, default_value=-1, all_in_span=True): - """Align the origin positions to the corresponding position in doc spans""" + """Align original positions to the corresponding document span positions + + Parameters + ---------- + positions: list or int + A single or a list of positions to be aligned + dic_spans_indices: list or tuple + (start_position, end_position) + offset: int + Offset of aligned positions. Sometimes the doc spans would be added + after a question text, in this case, the new position should add + len(question_text) + default_value: int + The default value to return if the positions are not in the doc span. + all_in_span: bool + If set to True, then as long as one position is out of span, all positions + would be set to default_value. + Returns + ------- + list: a list of aligned positions + """ if not isinstance(positions, list): positions = [positions] doc_start, doc_end = doc_spans_indices - if all_in_span and not all( - [p in range(doc_start, doc_end) for p in positions]): + if all_in_span and not all([p in range(doc_start, doc_end) for p in positions]): return [default_value] * len(positions) new_positions = [ - p - doc_start + - offset if p in range(doc_start, doc_end) else default_value + p - doc_start + offset if p in range(doc_start, doc_end) else default_value for p in positions ] return new_positions -def improve_answer_span(doc_tokens, input_start, input_end, tokenizer, - orig_answer_text): +def improve_answer_span(doc_tokens, input_start, input_end, tokenizer, orig_answer_text): """Returns tokenized answer spans that better match the annotated answer. The SQuAD annotations are character based. We first project them to @@ -254,6 +240,21 @@ def improve_answer_span(doc_tokens, input_start, input_end, tokenizer, the word "Japanese". Since our WordPiece tokenizer does not split "Japanese", we just use "Japanese" as the annotation. This is fairly rare in SQuAD, but does happen. + + Parameters + ---------- + doc_tokens: list + A list of doc tokens + input_start: int + start position of the answer + input_end: int + end position of the answer + tokenizer: callable function + orig_answer_text: str + origin answer text. + Returns + ------- + tuple: a tuple of improved start position and end position """ tok_answer_text = ' '.join(tokenizer(orig_answer_text)) @@ -287,6 +288,18 @@ def check_is_max_context(doc_spans, cur_span_index, position): and 0 right context. Note that position is the absolute position in the origin text. + + Parameters + ---------- + doc_spans: list + A list of doc spans + cur_span_index: int + The index of doc span to be checked in doc_spans. + position: int + Position of the token to be checked. + Returns + ------- + bool: True if the token has 'max context'. """ best_score = None best_span_index = None @@ -309,6 +322,7 @@ def check_is_max_context(doc_spans, cur_span_index, position): return cur_span_index == best_span_index + SquadExample = collections.namedtuple('SquadExample', [ 'qas_id', 'question_text', 'paragraph_text', 'doc_tokens', 'example_id', 'orig_answer_text', 'start_position', 'end_position', 'start_offset', 'end_offset', diff --git a/scripts/bert/index.rst b/scripts/bert/index.rst index dec9e80111..0e131fd1c8 100644 --- a/scripts/bert/index.rst +++ b/scripts/bert/index.rst @@ -114,15 +114,27 @@ Results using `bert_12_768_12`: .. editing URL for the following table: https://tinyurl.com/y4n8q84w -+---------------------+--------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------+ -| Dataset | MRPC | RTE | SST-2 | MNLI-M/MM | XNLI (Chinese) | -+=====================+==============================================================================================================+=============================================================================================================+=============================================================================================================+==============================================================================================================+==============================================================================================================+ -| Validation Accuracy | 88.7% | 70.8% | 93% | 84.55%, 84.66% | 78.27% | -+---------------------+--------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------+ -| Log | `log `__ | `log `__ | `log `__ | `log `__ | `log `__ | -+---------------------+--------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------+ -| Command | `command `__ | `command `__ | `command `__ | `command `__ | `command `__ | -+---------------------+--------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------+ ++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +|Task Name |Metrics |Results on Dev Set |log |command | ++=================+=====================+=======================+============================================================================================================================================+=================================================================================================================================================================+ +| CoLA |Matthew Corr. |60.32 |`log `__ |`command `__ | ++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| SST-2 |Accuracy |93.46 |`log `__ |`command `__ | ++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| MRPC |Accuracy/F1 |88.73/91.96 |`log `__ |`command `__ | ++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| STS-B |Pearson Corr. |90.34 |`log `__ |`command `__ | ++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| QQP |Accuracy |91 |`log `__ |`command `__ | ++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| MNLI |Accuracy(m/mm) |84.29/85.07 |`log `__ |`command `__ | ++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| XNLI (Chinese) |Accuracy |78.43 |`log `__ |`command `__ | ++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| RTE |Accuracy |74 |`log `__ |`command `__ | ++-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + Results using `roberta_12_768_12`: @@ -143,18 +155,19 @@ Results using `roberta_12_768_12`: Question Answering on SQuAD ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+---------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ -| Dataset | SQuAD 1.1 | SQuAD 1.1 | SQuAD 2.0 | -+=========+=========================================================================================================================================+==========================================================================================================================================+==========================================================================================================================================+ -| Model | bert_12_768_12 | bert_24_1024_16 | bert_24_1024_16 | -+---------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ -| F1 / EM | 88.58 / 81.26 | 90.97 / 84.05 | 78.14 / 81.26 | -+---------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ -| Log | `log `__ | `log `__ | `log `__ | -+---------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ -| Command | `command `__ | `command `__ | `command `__ | -+---------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ - ++-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ +| Dataset | SQuAD 1.1 | SQuAD 1.1 | SQuAD 2.0 | ++===========+=========================================================================================================================================+==========================================================================================================================================+==========================================================================================================================================+ +| Model | bert_12_768_12 | bert_24_1024_16 | bert_24_1024_16 | ++-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ +| F1 / EM | 88.58 / 81.26 | 90.97 / 84.22 | 77.96 / 81.02 | ++-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ +| Log | `log `__ | `log `__ | `log `__ | ++-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ +| Command | `command `__ | `command `__ | `command `__ | ++-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ +| Prediction| `predictions.json `__ | `predictions.json `__ | `predictions.json `__ | ++-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ For all model settings above, we set learing rate = 3e-5 and optimizer = adam. Note that the BERT model is memory-consuming. If you have limited GPU memory, you can use the following command to accumulate gradient to achieve the same result with a large batch size by setting *accumulate* and *batch_size* arguments accordingly. diff --git a/scripts/language_model/index.rst b/scripts/language_model/index.rst index f6a5c4908c..7fd327e640 100644 --- a/scripts/language_model/index.rst +++ b/scripts/language_model/index.rst @@ -264,5 +264,22 @@ We followed the hyperparameters reported by the paper authors. | RTE |Accuracy |84.12 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +Question Answering on SQuAD +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ++-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ +| Dataset | SQuAD 1.1 | SQuAD 1.1 | SQuAD 2.0 | SQuAD 2.0 | ++===========+=========================================================================================================================================+==========================================================================================================================================+==========================================================================================================================================+==========================================================================================================================================+ +| Model | xlnet_12_768_12 | xlnet_24_1024_16 | xlnet_12_768_12 | xlnet_24_1024_16 | ++-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ +| F1 / EM | 88.58 / 81.26 | 90.97 / 84.22 | 77.96 / 81.02 | TBA | ++-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ +| Log | `log `__ | `log `__ | `log `__ | TBA | ++-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ +| Command | `command `__ | `command `__ | `command `__ | TBA | ++-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ +| Prediction| `predictions.json `__ | `predictions.json `__ | `predictions.json `__ | TBA | ++-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ +For xlnet_24_1024_16, we used the hyperparameters reported by the paper authors. diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py index 5eec3edb1b..1770a19535 100644 --- a/scripts/language_model/run_squad.py +++ b/scripts/language_model/run_squad.py @@ -303,13 +303,11 @@ net = XLNetForQA(xlnet_base=xlnet_base, start_top_n=args.start_top_n, end_top_n=args.end_top_n, - units=units, - version_2=args.version_2) + units=units) net_eval = XLNetForQA(xlnet_base=xlnet_base, start_top_n=args.start_top_n, end_top_n=args.end_top_n, - version_2=args.version_2, units=units, is_eval=True, params=net.collect_params()) @@ -325,8 +323,7 @@ else: net.start_logits.initialize(init=initializer, ctx=ctx) net.end_logits.initialize(init=initializer, ctx=ctx) - if args.version_2: - net.answer_class.initialize(init=initializer, ctx=ctx) + net.answer_class.initialize(init=initializer, ctx=ctx) net.hybridize(static_alloc=True) net_eval.hybridize(static_alloc=True) @@ -479,7 +476,7 @@ def convert_examples_to_features(example, end_position=end, paragraph_text=example.paragraph_text, paragraph_len=len(tokens), - is_impossible=example.is_impossible) + is_impossible=(start == len(tokens) - 1)) for (tokens, segment_ids, p_mask), (start, end), is_max, t2st, t2ed in zip(seq_features, positions, token_is_max_context, cur_tok_start_to_orig_index, cur_tok_end_to_orig_index) From 619da751691618ccb75907c5fd259b23e4db270e Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 20 Jan 2020 18:41:26 +0800 Subject: [PATCH 43/59] fix is_impossible --- scripts/language_model/run_squad.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py index 1770a19535..f1ab8d9cf4 100644 --- a/scripts/language_model/run_squad.py +++ b/scripts/language_model/run_squad.py @@ -28,7 +28,7 @@ path = sys.path[0] sys.path.append(path + '/../bert/data') #pylint: disable=wrong-import-position -from preprocessing_utils import concat_sequences_extended, get_doc_spans, \ +from preprocessing_utils import concat_sequences, get_doc_spans, \ check_is_max_context, convert_squad_examples, _lcs_match, _convert_index, \ align_position2doc_spans @@ -441,7 +441,7 @@ def convert_examples_to_features(example, # get sequence features: tokens, segment_ids, p_masks seq_features = [ - concat_sequences_extended( + concat_sequences( [doc_span, query_tokenized], [[sep_token]] * 2 + [[cls_token]], [[0] * len(doc_span), [1] * len(query_tokenized)], [[1], [1], [0]]) for doc_span in doc_spans @@ -708,9 +708,8 @@ def set_new_lr(step_num, batch_id): batch_loss_sep = [] with mx.autograd.record(): for splited_data in data_list: - _, inputs, token_types, valid_length, p_mask, start_label, end_label, _is_impossible = splited_data # pylint: disable=line-too-long + _, inputs, token_types, valid_length, p_mask, start_label, end_label, is_impossible = splited_data # pylint: disable=line-too-long valid_length = valid_length.astype('float32') - is_impossible = _is_impossible if args.version_2 else None log_num += len(inputs) total_num += len(inputs) out_sep, out = net( @@ -846,8 +845,7 @@ def evaluate(prefix=''): start_top_index=outputs[1][c].asnumpy().tolist(), end_top_log_probs=outputs[2][c].asnumpy().tolist(), end_top_index=outputs[3][c].asnumpy().tolist(), - cls_logits=outputs[4][c].asnumpy().tolist() - if outputs[4] is not None else [-1e30]) + cls_logits=outputs[4][c].asnumpy().tolist()) all_results[example_ids].append(result) if batch_id % args.log_interval == 0: log.info('Batch: %d/%d', batch_id + 1, len(dev_dataloader)) From 918162e482ae3397b4dbd32787df369907df8067 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 20 Jan 2020 18:42:18 +0800 Subject: [PATCH 44/59] dic_string --- scripts/bert/data/preprocessing_utils.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/scripts/bert/data/preprocessing_utils.py b/scripts/bert/data/preprocessing_utils.py index 2f76e7f233..b4765d9a33 100644 --- a/scripts/bert/data/preprocessing_utils.py +++ b/scripts/bert/data/preprocessing_utils.py @@ -331,7 +331,7 @@ def check_is_max_context(doc_spans, cur_span_index, position): def convert_squad_examples(record, is_training): - """read a single entry of gluonnlp.data.SQuAD and convert it to an example""" + """read a single entry of gluonnlp.data.SQuAD and convert it to an example.""" example_id = record[0] qas_id = record[1] question_text = record[2] @@ -380,9 +380,9 @@ def convert_squad_examples(record, is_training): return example -def preprocess_text(inputs, lower=False, remove_space=True, +def _preprocess_text(inputs, lower=False, remove_space=True, keep_accents=False): - """Simple text preprocess""" + """Remove space, convert to lower case, keep accents""" if remove_space: outputs = ' '.join(inputs.strip().split()) else: @@ -398,7 +398,7 @@ def preprocess_text(inputs, lower=False, remove_space=True, def _convert_index(index, pos, M=None, is_start=True): - """Working together with _lcs_match(), convert the token index to context index""" + """Working best with _lcs_match(), convert the token index to origin text index""" if index[pos] is not None: return index[pos] N = len(index) @@ -436,7 +436,9 @@ def _convert_index(index, pos, M=None, is_start=True): def _lcs_match(max_dist, seq1, seq2, lower=False): - """unlike standard LCS, this is specifically optimized for the setting + """Longest common sequence match. + + unlike standard LCS, this is specifically optimized for the setting because the mismatch between sentence pieces and original text will be small """ f = np.zeros((max(len(seq1), 1024), max(len(seq2), 1024)), @@ -456,7 +458,7 @@ def _lcs_match(max_dist, seq1, seq2, lower=False): f[i, j] = f[i, j - 1] f_prev = f[i - 1, j - 1] if i > 0 and j > 0 else 0 - if (preprocess_text(token, lower=lower, + if (_preprocess_text(token, lower=lower, remove_space=False) == seq2[j] and f_prev + 1 > f[i, j]): g[(i, j)] = 2 From 07fed44a2015f5e675db714b7e1c8efc129d0a5c Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 20 Jan 2020 18:42:50 +0800 Subject: [PATCH 45/59] fix correspodining qa model --- scripts/language_model/model/qa.py | 42 ++++++++++++------------------ 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/scripts/language_model/model/qa.py b/scripts/language_model/model/qa.py index 1e8fcc98d1..42ab647392 100644 --- a/scripts/language_model/model/qa.py +++ b/scripts/language_model/model/qa.py @@ -160,8 +160,6 @@ class XLNetForQA(Block): Number of start position candidates during inference. end_top_n : int Number of end position candidates for each start position during inference. - version_2 : Bool - model for squad2.0 includes an extra answer class to predict answerability. is_eval : Bool If set to True, do inference. prefix : str or None @@ -173,7 +171,6 @@ def __init__(self, xlnet_base, start_top_n=None, end_top_n=None, - version_2=False, is_eval=False, units=768, prefix=None, @@ -186,11 +183,9 @@ def __init__(self, self.loss = loss.SoftmaxCELoss() self.start_logits = PoolerStartLogits() self.end_logits = PoolerEndLogits(units=units, is_eval=is_eval) - self.version2 = version_2 self.eval = is_eval - if version_2: - self.answer_class = XLNetPoolerAnswerClass(units=units) - self.cls_loss = loss.SigmoidBinaryCrossEntropyLoss() + self.answer_class = XLNetPoolerAnswerClass(units=units) + self.cls_loss = loss.SigmoidBinaryCrossEntropyLoss() def __call__(self, inputs, @@ -282,7 +277,7 @@ def forward(self, inputs, token_types, valid_length, p_mask, label, p_masks=p_mask) # shape (bsz, slen) bsz, slen, hsz = output.shape if not self.eval: - #training + # training start_positions, end_positions = label end_logit = self.end_logits(output, start_positions=start_positions, @@ -290,19 +285,19 @@ def forward(self, inputs, token_types, valid_length, p_mask, label, span_loss = (self.loss(start_logits, start_positions) + self.loss(end_logit, end_positions)) / 2 - cls_loss = None total_loss = [span_loss] - if self.version2: - start_log_probs = mx.nd.softmax(start_logits, axis=-1) - start_states = mx.nd.batch_dot(output, - start_log_probs.expand_dims(-1), - transpose_a=True).squeeze(-1) - - cls_logits = self.answer_class(output, start_states, - valid_length) - cls_loss = self.cls_loss(cls_logits, is_impossible) - total_loss.append(0.5 * cls_loss) - total_loss_sum = span_loss + 0.5 * cls_loss if cls_loss is not None else span_loss + + # get cls loss + start_log_probs = mx.nd.softmax(start_logits, axis=-1) + start_states = mx.nd.batch_dot(output, + start_log_probs.expand_dims(-1), + transpose_a=True).squeeze(-1) + + cls_logits = self.answer_class(output, start_states, + valid_length) + cls_loss = self.cls_loss(cls_logits, is_impossible) + total_loss.append(0.5 * cls_loss) + total_loss_sum = span_loss + 0.5 * cls_loss return total_loss, total_loss_sum else: #inference @@ -348,11 +343,8 @@ def forward(self, inputs, token_types, valid_length, p_mask, label, start_states = mx.nd.batch_dot(output, start_probs.expand_dims(-1), transpose_a=True).squeeze(-1) - - cls_logits = None - if self.version2: - cls_logits = self.answer_class(output, start_states, - valid_length) + cls_logits = self.answer_class(output, start_states, + valid_length) outputs = (start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits) From 1243b3464453624e7f5f01548bf64b3a556bd3d7 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 20 Jan 2020 18:43:12 +0800 Subject: [PATCH 46/59] fix init --- scripts/bert/data/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/bert/data/__init__.py b/scripts/bert/data/__init__.py index 04d366d0cd..1b16205fff 100644 --- a/scripts/bert/data/__init__.py +++ b/scripts/bert/data/__init__.py @@ -17,4 +17,4 @@ # pylint: disable=wildcard-import """BERT data.""" -from . import qa, classification, embedding, transform, dataloader, pretrain +from . import classification, embedding, transform, dataloader, pretrain From 5f925f3b946241b1af0ce7dc934592342d9a04a1 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 22 Jan 2020 11:56:17 +0800 Subject: [PATCH 47/59] add squad result --- scripts/bert/data/preprocessing_utils.py | 91 +++++++++++++++++------- scripts/language_model/index.rst | 41 +++++++---- scripts/language_model/run_squad.py | 32 ++++----- 3 files changed, 104 insertions(+), 60 deletions(-) diff --git a/scripts/bert/data/preprocessing_utils.py b/scripts/bert/data/preprocessing_utils.py index b4765d9a33..2706a1e05a 100644 --- a/scripts/bert/data/preprocessing_utils.py +++ b/scripts/bert/data/preprocessing_utils.py @@ -2,7 +2,8 @@ __all__ = [ 'truncate_seqs_equal', 'concat_sequences', 'tokenize_and_align_positions', 'get_doc_spans', - 'align_position2doc_spans', 'improve_answer_span', 'check_is_max_context'] + 'align_position2doc_spans', 'improve_answer_span', 'check_is_max_context' +] import collections import itertools @@ -199,6 +200,7 @@ def align_position2doc_spans(positions, doc_spans_indices, offset=0, default_val all_in_span: bool If set to True, then as long as one position is out of span, all positions would be set to default_value. + Returns ------- list: a list of aligned positions @@ -297,6 +299,7 @@ def check_is_max_context(doc_spans, cur_span_index, position): The index of doc span to be checked in doc_spans. position: int Position of the token to be checked. + Returns ------- bool: True if the token has 'max context'. @@ -322,16 +325,27 @@ def check_is_max_context(doc_spans, cur_span_index, position): return cur_span_index == best_span_index - SquadExample = collections.namedtuple('SquadExample', [ - 'qas_id', 'question_text', 'paragraph_text', 'doc_tokens', 'example_id', - 'orig_answer_text', 'start_position', 'end_position', 'start_offset', 'end_offset', - 'is_impossible' + 'qas_id', 'question_text', 'paragraph_text', 'doc_tokens', 'example_id', 'orig_answer_text', + 'start_position', 'end_position', 'start_offset', 'end_offset', 'is_impossible' ]) def convert_squad_examples(record, is_training): - """read a single entry of gluonnlp.data.SQuAD and convert it to an example.""" + """read a single entry of gluonnlp.data.SQuAD and convert it to an example. + + Parameters + ---------- + record: list + An entry of gluonnlp.data.SQuAD + is_training: bool + If the example is used for training, + then a rough start/end position will be generated + + Returns + ------- + SquadExample: An instance of SquadExample + """ example_id = record[0] qas_id = record[1] question_text = record[2] @@ -361,28 +375,35 @@ def convert_squad_examples(record, is_training): start_position = -1 end_position = -1 else: - start_position = char_to_word_offset[ - answer_offset] if not is_impossible else -1 + start_position = char_to_word_offset[answer_offset] if not is_impossible else -1 end_position = char_to_word_offset[answer_offset + answer_length - 1] if not is_impossible else -1 answer_offset = -1 if is_impossible else answer_offset - example = SquadExample(qas_id=qas_id, - question_text=question_text, - paragraph_text=paragraph_text, - doc_tokens=doc_tokens, - example_id=example_id, - orig_answer_text=orig_answer_text, - start_position=start_position, - end_position=end_position, - start_offset=answer_offset, - end_offset=answer_offset + len(orig_answer_text) - 1, - is_impossible=is_impossible) + example = SquadExample( + qas_id=qas_id, question_text=question_text, paragraph_text=paragraph_text, + doc_tokens=doc_tokens, example_id=example_id, orig_answer_text=orig_answer_text, + start_position=start_position, end_position=end_position, start_offset=answer_offset, + end_offset=answer_offset + len(orig_answer_text) - 1, is_impossible=is_impossible) return example -def _preprocess_text(inputs, lower=False, remove_space=True, - keep_accents=False): - """Remove space, convert to lower case, keep accents""" +def _preprocess_text(inputs, lower=False, remove_space=True, keep_accents=False): + """Remove space, convert to lower case, keep accents. + + Parameters + ---------- + inputs: str + input string + lower: bool + If convert the input string to lower case. + remove_space: bool + If remove the spaces in the input string. + keep_accents: bool + If keep accents in the input string. + Returns + ------- + str: processed input string + """ if remove_space: outputs = ' '.join(inputs.strip().split()) else: @@ -393,7 +414,6 @@ def _preprocess_text(inputs, lower=False, remove_space=True, outputs = ''.join([c for c in outputs if not unicodedata.combining(c)]) if lower: outputs = outputs.lower() - return outputs @@ -435,13 +455,31 @@ def _convert_index(index, pos, M=None, is_start=True): return index[front] -def _lcs_match(max_dist, seq1, seq2, lower=False): +def _lcs_match(max_dist, seq1, seq2, max_seq_length=1024, lower=False): """Longest common sequence match. unlike standard LCS, this is specifically optimized for the setting because the mismatch between sentence pieces and original text will be small + + Parameters + ---------- + max_dist: int + The max distance between tokens to be considered. + seq1: list + The first sequence to be matched. + seq2: list + The second sequence to be matched. + lower: bool + If match the lower-cased tokens. + Returns + ------- + numpyArray: Token-wise lcs matrix f. Shape of ((max(len(seq1), 1024), max(len(seq2), 1024)) + Map: The dp path in matrix f. + g[(i ,j)] == 2 if token_i in seq1 matches token_j in seq2. + g[(i, j)] == 1 if token_i in seq1 matches token_{j-1} in seq2. + g[(i, j)] == 0 of token_{i-1} in seq1 matches token_j in seq2. """ - f = np.zeros((max(len(seq1), 1024), max(len(seq2), 1024)), + f = np.zeros((max(len(seq1), max_seq_length), max(len(seq2), max_seq_length)), dtype=np.float32) g = {} for i, token in enumerate(seq1): @@ -458,8 +496,7 @@ def _lcs_match(max_dist, seq1, seq2, lower=False): f[i, j] = f[i, j - 1] f_prev = f[i - 1, j - 1] if i > 0 and j > 0 else 0 - if (_preprocess_text(token, lower=lower, - remove_space=False) == seq2[j] + if (_preprocess_text(token, lower=lower, remove_space=False) == seq2[j] and f_prev + 1 > f[i, j]): g[(i, j)] = 2 f[i, j] = f_prev + 1 diff --git a/scripts/language_model/index.rst b/scripts/language_model/index.rst index 7fd327e640..695362e0c3 100644 --- a/scripts/language_model/index.rst +++ b/scripts/language_model/index.rst @@ -267,19 +267,32 @@ We followed the hyperparameters reported by the paper authors. Question Answering on SQuAD ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ -| Dataset | SQuAD 1.1 | SQuAD 1.1 | SQuAD 2.0 | SQuAD 2.0 | -+===========+=========================================================================================================================================+==========================================================================================================================================+==========================================================================================================================================+==========================================================================================================================================+ -| Model | xlnet_12_768_12 | xlnet_24_1024_16 | xlnet_12_768_12 | xlnet_24_1024_16 | -+-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ -| F1 / EM | 88.58 / 81.26 | 90.97 / 84.22 | 77.96 / 81.02 | TBA | -+-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ -| Log | `log `__ | `log `__ | `log `__ | TBA | -+-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ -| Command | `command `__ | `command `__ | `command `__ | TBA | -+-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ -| Prediction| `predictions.json `__ | `predictions.json `__ | `predictions.json `__ | TBA | -+-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ -For xlnet_24_1024_16, we used the hyperparameters reported by the paper authors. ++-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Dataset | SQuAD 1.1 | SQuAD 1.1 | SQuAD 2.0 | SQuAD 2.0 | ++===========+=========================================================================================================================================================+==========================================================================================================================================================+==================================================================================================================================================================================================================================================================================================================+==================================================================================================================================================================================================================================================================================================================+ +| Model | xlnet_12_768_12 | xlnet_24_1024_16 | xlnet_12_768_12 | xlnet_24_1024_16 | ++-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| EM / F1 | 85.34 / 91.67 | 87.9 / 93.87 | 80.25 / 82.95 | 85.85 / 88.56 | ++-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Log | `log `__ | `log `__ | `log `__ | `log `__ | ++-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Command | `command `__ | `command `__ | `command `__ | `command `__ | ++-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Prediction| `predictions.json `__ | `predictions.json `__ | `predictions.json `__ `null_odds.json `__ | `predictions.json `__ `null_odds.json `__ | ++-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +For xlnet_24_1024_16, we used hyperparameters reported by the paper authors. + + +To get the score of the dev data, you need to download the evaluate script (`evaluate-2.0.py `_). +You can either put the evaluate script under the same folder with run_squad.py to let our script run it automatically, +or run it manually by yourself. To run the evaluate script, you can use the following commands: + +SQuAD1.1: +.. code-block:: console + + $ python evaluate-v2.0.py dev-v2.0.json predictions.json +SQuAD2.0: +.. code-block:: console + $ python evaluate-v2.0.py --data_file dev-v2.0.json --pred_file predictions.json --na-prob-file null_odds.json \ No newline at end of file diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py index f1ab8d9cf4..dd984ff684 100644 --- a/scripts/language_model/run_squad.py +++ b/scripts/language_model/run_squad.py @@ -14,6 +14,7 @@ import pickle import sys import itertools +import subprocess import multiprocessing as mp from functools import partial import numpy as np @@ -23,7 +24,6 @@ from model.qa import XLNetForQA from transformer import model from xlnet_qa_evaluate import predict_extended -from utils_squad_evaluate import EVAL_OPTS, main as evaluate_on_squad path = sys.path[0] sys.path.append(path + '/../bert/data') @@ -784,7 +784,7 @@ def set_new_lr(step_num, batch_id): ]) -def evaluate(prefix=''): +def evaluate(): """Evaluate the model on validation dataset. """ log.info('Loading dev data...') @@ -874,12 +874,12 @@ def evaluate(prefix=''): all_nbest_json[example_qas_id] = nbest_json output_prediction_file = os.path.join(args.output_dir, - 'predictions_{}.json'.format(prefix)) + 'predictions.json') output_nbest_file = os.path.join( - args.output_dir, 'nbest_predictions_{}.json'.format(prefix)) + args.output_dir, 'nbest_predictions.json') if args.version_2: output_null_log_odds_file = os.path.join( - args.output_dir, 'null_odds_{}.json'.format(prefix)) + args.output_dir, 'null_odds.json') else: output_null_log_odds_file = None @@ -891,21 +891,15 @@ def evaluate(prefix=''): with open(output_null_log_odds_file, 'w') as writer: writer.write(json.dumps(scores_diff_json, indent=4) + '\n') - if args.version_2: - evaluate_options = EVAL_OPTS( - data_file=dev_data_path, - pred_file=output_prediction_file, - na_prob_file=output_null_log_odds_file, - na_prob_thresh=args.null_score_diff_threshold) + if os.path.exists(sys.path[0] + '/evaluate-v2.0.py'): + arguments = ['--data_file', dev_data_path, '--pred_file', output_prediction_file, + '--na_prob_thresh', str(args.null_score_diff_threshold)] + if args.version_2: + arguments += ['--na_prob_file', output_null_log_odds_file] + subprocess.call([sys.executable, sys.path[0] + '/evaluate-v2.0.py'] + arguments) else: - evaluate_options = EVAL_OPTS( - data_file=dev_data_path, - pred_file=output_prediction_file, - na_prob_file=None, - na_prob_thresh=args.null_score_diff_threshold) - - results = evaluate_on_squad(evaluate_options) - return results + log.info('Please download evaluate-v2.0.py to get evaluation results for SQuAD. Check index.rst to see' + 'how to download evaluate-v2.0.py.') if __name__ == '__main__': From 9c13651edc3d7be80d5ebfa245573182f6b79c8f Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 22 Jan 2020 12:03:08 +0800 Subject: [PATCH 48/59] add new glue results --- scripts/language_model/index.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/language_model/index.rst b/scripts/language_model/index.rst index 695362e0c3..c2bb0d5ee5 100644 --- a/scripts/language_model/index.rst +++ b/scripts/language_model/index.rst @@ -226,21 +226,21 @@ Results using `xlnet_12_768_12`: +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ |Task Name |Metrics |Results on Dev Set |log |command | +=================+=====================+=======================+============================================================================================================================================+=================================================================================================================================================================+ -| CoLA |Matthew Corr. |56 |`log `__ |`command `__ | +| CoLA |Matthew Corr. |56 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| SST-2 |Accuracy |94 |`log `__ |`command `__ | +| SST-2 |Accuracy |94 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| MRPC |Accuracy/F1 |87/90 |`log `__ |`command `__ | +| MRPC |Accuracy/F1 |87/90 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| STS-B |Pearson Corr. |86 |`log `__ |`command `__ | +| STS-B |Pearson Corr. |86 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| QQP |Accuracy |90 |`log `__ |`command `__ | +| QQP |Accuracy |90 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| MNLI |Accuracy(m/mm) |87/86 |`log `__ |`command `__ | +| MNLI |Accuracy(m/mm) |87/86 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ | QNLI |Accuracy |88 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| RTE |Accuracy |74 |`log `__ |`command `__ | +| RTE |Accuracy |74 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ Results using `xlnet_24_1024_16`: From 8bc367dd0159483e6ae0723de4255ed6dd4ea855 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 22 Jan 2020 12:15:35 +0800 Subject: [PATCH 49/59] fix results --- scripts/language_model/index.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/language_model/index.rst b/scripts/language_model/index.rst index c2bb0d5ee5..7bd5733513 100644 --- a/scripts/language_model/index.rst +++ b/scripts/language_model/index.rst @@ -226,21 +226,21 @@ Results using `xlnet_12_768_12`: +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ |Task Name |Metrics |Results on Dev Set |log |command | +=================+=====================+=======================+============================================================================================================================================+=================================================================================================================================================================+ -| CoLA |Matthew Corr. |56 |`log `__ |`command `__ | +| CoLA |Matthew Corr. |59.33 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| SST-2 |Accuracy |94 |`log `__ |`command `__ | +| SST-2 |Accuracy |94.61 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| MRPC |Accuracy/F1 |87/90 |`log `__ |`command `__ | +| MRPC |Accuracy/F1 |89.22/92.20 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| STS-B |Pearson Corr. |86 |`log `__ |`command `__ | +| STS-B |Pearson Corr. |89.34 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| QQP |Accuracy |90 |`log `__ |`command `__ | +| QQP |Accuracy |91.31 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| MNLI |Accuracy(m/mm) |87/86 |`log `__ |`command `__ | +| MNLI |Accuracy(m/mm) |87.19/86.45 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ | QNLI |Accuracy |88 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| RTE |Accuracy |74 |`log `__ |`command `__ | +| RTE |Accuracy |75.09 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ Results using `xlnet_24_1024_16`: From a99b393248b1a277fc6621e9af73e165f1c70524 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 22 Jan 2020 13:51:05 +0800 Subject: [PATCH 50/59] fix lint and rst --- scripts/bert/index.rst | 1 + scripts/language_model/run_squad.py | 483 +++++++++------------------- 2 files changed, 152 insertions(+), 332 deletions(-) diff --git a/scripts/bert/index.rst b/scripts/bert/index.rst index 0e131fd1c8..0433314ff0 100644 --- a/scripts/bert/index.rst +++ b/scripts/bert/index.rst @@ -168,6 +168,7 @@ Question Answering on SQuAD +-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ | Prediction| `predictions.json `__ | `predictions.json `__ | `predictions.json `__ | +-----------+-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ + For all model settings above, we set learing rate = 3e-5 and optimizer = adam. Note that the BERT model is memory-consuming. If you have limited GPU memory, you can use the following command to accumulate gradient to achieve the same result with a large batch size by setting *accumulate* and *batch_size* arguments accordingly. diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py index dd984ff684..4b7f8dc780 100644 --- a/scripts/language_model/run_squad.py +++ b/scripts/language_model/run_squad.py @@ -32,195 +32,110 @@ check_is_max_context, convert_squad_examples, _lcs_match, _convert_index, \ align_position2doc_spans -parser = argparse.ArgumentParser( - description='XLNet QA example.' - 'We fine-tune the XLNet model on SQuAD dataset.') +parser = argparse.ArgumentParser(description='XLNet QA example.' + 'We fine-tune the XLNet model on SQuAD dataset.') # I/O configuration -parser.add_argument( - '--sentencepiece', - type=str, - default=None, - help= - 'Path to the sentencepiece .model file for both tokenization and vocab.') -parser.add_argument( - '--pretrained_xlnet_parameters', - type=str, - default=None, - help='Pre-trained bert model parameter file. default is None') -parser.add_argument( - '--load_pickle', - action='store_true', - help='Whether do data preprocessing or load from pickled file') -parser.add_argument('--dev_dataset_file', - default='./output_dir/out.dev', - type=str, +parser.add_argument('--sentencepiece', type=str, default=None, + help='Path to the sentencepiece .model file for both tokenization and vocab.') +parser.add_argument('--pretrained_xlnet_parameters', type=str, default=None, + help='Pre-trained bert model parameter file. default is None') +parser.add_argument('--load_pickle', action='store_true', + help='Whether do data preprocessing or load from pickled file') +parser.add_argument('--dev_dataset_file', default='./output_dir/out.dev', type=str, help='Path to dev data features') -parser.add_argument('--train_dataset_file', - default='./output_dir/out.train', - type=str, +parser.add_argument('--train_dataset_file', default='./output_dir/out.train', type=str, help='Path to train data features') -parser.add_argument('--model_parameters', - type=str, - default=None, - help='Model parameter file') +parser.add_argument('--model_parameters', type=str, default=None, help='Model parameter file') parser.add_argument( - '--output_dir', - type=str, - default='./output_dir', + '--output_dir', type=str, default='./output_dir', help='The output directory where the model params will be written.' ' default is ./output_dir') # Training configuration parser.add_argument('--seed', type=int, default=3, help='Random seed') -parser.add_argument('--version_2', - action='store_true', - help='Whether use SQuAD v2.0 dataset') -parser.add_argument( - '--model', - type=str, - default='xlnet_cased_l12_h768_a12', - choices=['xlnet_cased_l24_h1024_a16', 'xlnet_cased_l12_h768_a12'], - help='The name of pre-trained XLNet model to fine-tune') +parser.add_argument('--version_2', action='store_true', help='Whether use SQuAD v2.0 dataset') +parser.add_argument('--model', type=str, default='xlnet_cased_l12_h768_a12', + choices=['xlnet_cased_l24_h1024_a16', 'xlnet_cased_l12_h768_a12'], + help='The name of pre-trained XLNet model to fine-tune') +parser.add_argument('--dataset', type=str, default='126gb', choices=['126gb'], + help='The dataset BERT pre-trained with. Currently only 126gb is available') parser.add_argument( - '--dataset', - type=str, - default='126gb', - choices=['126gb'], - help='The dataset BERT pre-trained with. Currently only 126gb is available' -) -parser.add_argument( - '--uncased', - action='store_true', - help= - 'if set, inputs are converted to lower case. Up to 01/04/2020, all released models are cased' -) -parser.add_argument( - '--gpu', - type=int, - default=None, - help='Number of gpus to use for finetuning. CPU is used if not set.') -parser.add_argument('--log_interval', - type=int, - default=10, - help='report interval. default is 10') -parser.add_argument('--debug', - action='store_true', + '--uncased', action='store_true', help= + 'if set, inputs are converted to lower case. Up to 01/04/2020, all released models are cased') +parser.add_argument('--gpu', type=int, default=None, + help='Number of gpus to use for finetuning. CPU is used if not set.') +parser.add_argument('--log_interval', type=int, default=10, help='report interval. default is 10') +parser.add_argument('--debug', action='store_true', help='Run the example in test mode for sanity checks') -parser.add_argument('--only_predict', - action='store_true', - help='Whether to predict only.') +parser.add_argument('--only_predict', action='store_true', help='Whether to predict only.') # Hyperparameters -parser.add_argument('--epochs', - type=int, - default=3, - help='number of epochs, default is 3') -parser.add_argument('--training_steps', - type=int, - help='training steps. Note that epochs will be ignored ' - 'if training steps are set') - +parser.add_argument('--epochs', type=int, default=3, help='number of epochs, default is 3') parser.add_argument( - '--batch_size', - type=int, - default=32, - help='Batch size. Number of examples per gpu in a minibatch. default is 32' -) - -parser.add_argument('--test_batch_size', - type=int, - default=24, + '--training_steps', type=int, help='training steps. Note that epochs will be ignored ' + 'if training steps are set') + +parser.add_argument('--batch_size', type=int, default=32, + help='Batch size. Number of examples per gpu in a minibatch. default is 32') + +parser.add_argument('--test_batch_size', type=int, default=24, help='Test batch size. default is 24') -parser.add_argument('--optimizer', - type=str, - default='bertadam', +parser.add_argument('--optimizer', type=str, default='bertadam', help='optimization algorithm. default is bertadam') parser.add_argument( - '--accumulate', - type=int, - default=None, - help='The number of batches for ' + '--accumulate', type=int, default=None, help='The number of batches for ' 'gradients accumulation to simulate large batch size. Default is None') -parser.add_argument('--lr', - type=float, - default=3e-5, +parser.add_argument('--lr', type=float, default=3e-5, help='Initial learning rate. default is 5e-5') parser.add_argument( - '--warmup_ratio', - type=float, - default=0, + '--warmup_ratio', type=float, default=0, help='ratio of warmup steps that linearly increase learning rate from ' '0 to target learning rate. default is 0') -parser.add_argument('--layerwise_decay', - type=float, - default=0.75, - help='Layer-wise lr decay') +parser.add_argument('--layerwise_decay', type=float, default=0.75, help='Layer-wise lr decay') parser.add_argument('--wd', type=float, default=0.01, help='weight decay') parser.add_argument('--dropout', type=float, default=0.1, help='dropout') -parser.add_argument('--attention_dropout', - type=float, - default=0.1, - help='attention dropout') +parser.add_argument('--attention_dropout', type=float, default=0.1, help='attention dropout') # Data pre/post processing parser.add_argument( - '--max_seq_length', - type=int, - default=512, + '--max_seq_length', type=int, default=512, help='The maximum total input sequence length after WordPiece tokenization.' 'Sequences longer than this will be truncated, and sequences shorter ' 'than this will be padded. default is 512') parser.add_argument( - '--doc_stride', - type=int, - default=128, + '--doc_stride', type=int, default=128, help='When splitting up a long document into chunks, how much stride to ' 'take between chunks. default is 128') parser.add_argument( - '--max_query_length', - type=int, - default=64, + '--max_query_length', type=int, default=64, help='The maximum number of tokens for the question. Questions longer than ' 'this will be truncated to this length. default is 64') -parser.add_argument('--start_top_n', - type=int, - default=5, +parser.add_argument('--start_top_n', type=int, default=5, help='Number of start-position candidates') -parser.add_argument('--end_top_n', - type=int, - default=5, +parser.add_argument('--end_top_n', type=int, default=5, help='Number of end-position candidates corresponding ' 'to a start position') -parser.add_argument('--n_best_size', - type=int, - default=5, - help='top N results written to file') +parser.add_argument('--n_best_size', type=int, default=5, help='top N results written to file') parser.add_argument( - '--max_answer_length', - type=int, - default=64, + '--max_answer_length', type=int, default=64, help='The maximum length of an answer that can be generated. This is needed ' 'because the start and end predictions are not conditioned on one another.' ' default is 64') parser.add_argument('--num_workers', type=int, default=4, help='Number of workers used for data preprocessing') parser.add_argument( - '--null_score_diff_threshold', - type=float, - default=0.0, - help= - 'If null_score - best_non_null is greater than the threshold predict null.' + '--null_score_diff_threshold', type=float, default=0.0, + help='If null_score - best_non_null is greater than the threshold predict null.' 'Typical values are between -1.0 and -5.0. default is 0.0. ' - 'Note that a best value can be automatically found by the evaluation script' -) + 'Note that a best value can be automatically found by the evaluation script') args = parser.parse_args() @@ -235,8 +150,8 @@ # set the logger log = logging.getLogger('gluonnlp') log.setLevel(logging.DEBUG) -formatter = logging.Formatter( - fmt='%(levelname)s:%(name)s:%(asctime)s %(message)s', datefmt='%H:%M:%S') +formatter = logging.Formatter(fmt='%(levelname)s:%(name)s:%(asctime)s %(message)s', + datefmt='%H:%M:%S') fh = logging.FileHandler(os.path.join(args.output_dir, 'finetune_squad.log')) fh.setLevel(logging.INFO) fh.setFormatter(formatter) @@ -260,9 +175,8 @@ log.info('Using gradient accumulation. Effective batch size = %d', args.accumulate * args.batch_size) if args.max_seq_length <= args.max_query_length + 3: - raise ValueError( - 'The max_seq_length (%d) must be greater than max_query_length ' - '(%d) + 3' % (args.max_seq_length, args.max_query_length)) + raise ValueError('The max_seq_length (%d) must be greater than max_query_length ' + '(%d) + 3' % (args.max_seq_length, args.max_query_length)) get_pretrained = True @@ -281,9 +195,7 @@ batchify_fn = nlp.data.batchify.Tuple( nlp.data.batchify.Stack('int32'), # example_id - nlp.data.batchify.Pad(axis=0, - pad_val=vocab[vocab.padding_token], - dtype='int32'), # input_ids + nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token], dtype='int32'), # input_ids nlp.data.batchify.Pad(axis=0, pad_val=3, dtype='int32'), # segment_ids nlp.data.batchify.Stack('float32'), # valid_length nlp.data.batchify.Pad(axis=0, pad_val=1), # p_mask @@ -293,33 +205,22 @@ if pretrained_xlnet_parameters: # only load XLnetModel parameters - nlp.utils.load_parameters(xlnet_base, - pretrained_xlnet_parameters, - ctx=ctx, - ignore_extra=True, + nlp.utils.load_parameters(xlnet_base, pretrained_xlnet_parameters, ctx=ctx, ignore_extra=True, cast_dtype=True) units = xlnet_base._net._units -net = XLNetForQA(xlnet_base=xlnet_base, - start_top_n=args.start_top_n, - end_top_n=args.end_top_n, +net = XLNetForQA(xlnet_base=xlnet_base, start_top_n=args.start_top_n, end_top_n=args.end_top_n, units=units) -net_eval = XLNetForQA(xlnet_base=xlnet_base, - start_top_n=args.start_top_n, - end_top_n=args.end_top_n, - units=units, - is_eval=True, +net_eval = XLNetForQA(xlnet_base=xlnet_base, start_top_n=args.start_top_n, + end_top_n=args.end_top_n, units=units, is_eval=True, params=net.collect_params()) initializer = mx.init.Normal(0.02) if args.model_parameters: # load complete XLNetForQA parameters - nlp.utils.load_parameters(net, - args.model_parameters, - ctx=ctx, - cast_dtype=True) + nlp.utils.load_parameters(net, args.model_parameters, ctx=ctx, cast_dtype=True) else: net.start_logits.initialize(init=initializer, ctx=ctx) net.end_logits.initialize(init=initializer, ctx=ctx) @@ -329,22 +230,15 @@ net_eval.hybridize(static_alloc=True) SquadXLNetFeautre = collections.namedtuple('SquadXLNetFeautre', [ - 'example_id', 'qas_id', 'valid_length', 'tokens', - 'tok_start_to_orig_index', 'tok_end_to_orig_index', 'token_is_max_context', - 'input_ids', 'p_mask', 'segment_ids', 'start_position', 'end_position', - 'paragraph_text', 'paragraph_len', 'is_impossible' + 'example_id', 'qas_id', 'valid_length', 'tokens', 'tok_start_to_orig_index', + 'tok_end_to_orig_index', 'token_is_max_context', 'input_ids', 'p_mask', 'segment_ids', + 'start_position', 'end_position', 'paragraph_text', 'paragraph_len', 'is_impossible' ]) -def convert_examples_to_features(example, - tokenizer=None, - cls_token=None, - sep_token=None, - vocab=None, - max_seq_length=384, - doc_stride=128, - max_query_length=64, - is_training=True): +def convert_examples_to_features(example, tokenizer=None, cls_token=None, sep_token=None, + vocab=None, max_seq_length=384, doc_stride=128, + max_query_length=64, is_training=True): """convert the examples to the XLNet features""" query_tokenized = tokenizer(example.question_text)[:max_query_length] #tokenize paragraph and get start/end position of the answer in tokenized paragraph @@ -393,14 +287,8 @@ def convert_examples_to_features(example, for i in range(len(paragraph_tokenized)): start_chartok_pos = tok_start_to_chartok_index[i] end_chartok_pos = tok_end_to_chartok_index[i] - start_orig_pos = _convert_index(chartok_to_orig_index, - start_chartok_pos, - n, - is_start=True) - end_orig_pos = _convert_index(chartok_to_orig_index, - end_chartok_pos, - m, - is_start=False) + start_orig_pos = _convert_index(chartok_to_orig_index, start_chartok_pos, n, is_start=True) + end_orig_pos = _convert_index(chartok_to_orig_index, end_chartok_pos, m, is_start=False) tok_start_to_orig_index.append(start_orig_pos) tok_end_to_orig_index.append(end_orig_pos) @@ -408,42 +296,35 @@ def convert_examples_to_features(example, tok_start_position, tok_end_position = -1, -1 # get mapped start/end position if is_training and not example.is_impossible: - start_chartok_pos = _convert_index(orig_to_chartok_index, - example.start_offset, + start_chartok_pos = _convert_index(orig_to_chartok_index, example.start_offset, is_start=True) tok_start_position = chartok_to_tok_index[start_chartok_pos] - end_chartok_pos = _convert_index(orig_to_chartok_index, - example.end_offset, - is_start=False) + end_chartok_pos = _convert_index(orig_to_chartok_index, example.end_offset, is_start=False) tok_end_position = chartok_to_tok_index[end_chartok_pos] assert tok_start_position <= tok_end_position # get doc spans using sliding window - doc_spans, doc_spans_indices = get_doc_spans( - paragraph_tokenized, max_seq_length - len(query_tokenized) - 3, - doc_stride) + doc_spans, doc_spans_indices = get_doc_spans(paragraph_tokenized, + max_seq_length - len(query_tokenized) - 3, + doc_stride) # record whether the tokens in a docspan have max context token_is_max_context = [{ - p: check_is_max_context(doc_spans_indices, i, - p + doc_spans_indices[i][0]) + p: check_is_max_context(doc_spans_indices, i, p + doc_spans_indices[i][0]) for p in range(len(doc_span)) } for (i, doc_span) in enumerate(doc_spans)] # get token -> origin text mapping - cur_tok_start_to_orig_index = [[ - tok_start_to_orig_index[p + st] for p in range(len(doc_span)) - ] for doc_span, (st, ed) in zip(doc_spans, doc_spans_indices)] - cur_tok_end_to_orig_index = [[ - tok_end_to_orig_index[p + st] for p in range(len(doc_span)) - ] for doc_span, (st, ed) in zip(doc_spans, doc_spans_indices)] + cur_tok_start_to_orig_index = [[tok_start_to_orig_index[p + st] for p in range(len(doc_span))] + for doc_span, (st, ed) in zip(doc_spans, doc_spans_indices)] + cur_tok_end_to_orig_index = [[tok_end_to_orig_index[p + st] for p in range(len(doc_span))] + for doc_span, (st, ed) in zip(doc_spans, doc_spans_indices)] # get sequence features: tokens, segment_ids, p_masks seq_features = [ - concat_sequences( - [doc_span, query_tokenized], [[sep_token]] * 2 + [[cls_token]], - [[0] * len(doc_span), [1] * len(query_tokenized)], [[1], [1], [0]]) + concat_sequences([doc_span, query_tokenized], [[sep_token]] * 2 + [[cls_token]], + [[0] * len(doc_span), [1] * len(query_tokenized)], [[1], [1], [0]]) for doc_span in doc_spans ] @@ -451,9 +332,7 @@ def convert_examples_to_features(example, # set position to cls_index, i.e., last token in the sequence. if not example.is_impossible: positions = [ - align_position2doc_spans([tok_start_position, tok_end_position], - doc_idx, - offset=0, + align_position2doc_spans([tok_start_position, tok_end_position], doc_idx, offset=0, default_value=len(seq[0]) - 1) for (doc_idx, seq) in zip(doc_spans_indices, seq_features) ] @@ -462,56 +341,36 @@ def convert_examples_to_features(example, for seq_feature in seq_features] features = [ - SquadXLNetFeautre(example_id=example.example_id, - qas_id=example.qas_id, - tok_start_to_orig_index=t2st, - tok_end_to_orig_index=t2ed, - valid_length=len(tokens), - tokens=tokens, - token_is_max_context=is_max, - input_ids=vocab[tokens], - p_mask=p_mask, - segment_ids=segment_ids, - start_position=start, - end_position=end, - paragraph_text=example.paragraph_text, - paragraph_len=len(tokens), + SquadXLNetFeautre(example_id=example.example_id, qas_id=example.qas_id, + tok_start_to_orig_index=t2st, tok_end_to_orig_index=t2ed, + valid_length=len(tokens), tokens=tokens, token_is_max_context=is_max, + input_ids=vocab[tokens], p_mask=p_mask, segment_ids=segment_ids, + start_position=start, end_position=end, + paragraph_text=example.paragraph_text, paragraph_len=len(tokens), is_impossible=(start == len(tokens) - 1)) - for (tokens, segment_ids, p_mask), (start, end), is_max, t2st, t2ed in - zip(seq_features, positions, token_is_max_context, - cur_tok_start_to_orig_index, cur_tok_end_to_orig_index) + for (tokens, segment_ids, p_mask), ( + start, + end), is_max, t2st, t2ed in zip(seq_features, positions, token_is_max_context, + cur_tok_start_to_orig_index, cur_tok_end_to_orig_index) ] return features -def preprocess_dataset(tokenizer, - dataset, - vocab=None, - max_seq_length=384, - doc_stride=128, - max_query_length=64, - num_workers=16, - load_from_pickle=False, - feature_file=None, - is_training=True): +def preprocess_dataset(tokenizer, dataset, vocab=None, max_seq_length=384, doc_stride=128, + max_query_length=64, num_workers=16, load_from_pickle=False, + feature_file=None, is_training=True): """Loads a dataset into features""" vocab = tokenizer.vocab if vocab is None else vocab - trans = partial(convert_examples_to_features, - tokenizer=tokenizer, - cls_token=vocab.cls_token, - sep_token=vocab.sep_token, - vocab=vocab, - max_seq_length=max_seq_length, - doc_stride=doc_stride, - max_query_length=max_query_length) + trans = partial(convert_examples_to_features, tokenizer=tokenizer, cls_token=vocab.cls_token, + sep_token=vocab.sep_token, vocab=vocab, max_seq_length=max_seq_length, + doc_stride=doc_stride, max_query_length=max_query_length) pool = mp.Pool(num_workers) start = time.time() if not load_from_pickle: - example_trans = partial(convert_squad_examples, - is_training=is_training) + example_trans = partial(convert_squad_examples, is_training=is_training) # convert the raw dataset into raw features examples = pool.map(example_trans, dataset) - raw_features = list(map(trans, examples)) #pool.map(trans, examples) + raw_features = list(map(trans, examples)) #pool.map(trans, examples) if feature_file: with open(feature_file, 'wb') as file: pickle.dump(raw_features, file) @@ -528,8 +387,7 @@ def preprocess_dataset(tokenizer, def convert_full_features_to_input_features(raw_features): """convert the full features into the input features""" - data_features = mx.gluon.data.SimpleDataset( - list(itertools.chain.from_iterable(raw_features))) + data_features = mx.gluon.data.SimpleDataset(list(itertools.chain.from_iterable(raw_features))) data_features = data_features.transform(lambda *example: ( example[0], # example_id example[7], # inputs_id @@ -549,11 +407,9 @@ def split_array(arr, num_of_splits): if size < num_of_splits: return [arr[i:i + 1] for i in range(size)] slice_len, rest = divmod(size, num_of_splits) - div_points = [0] + [(slice_len * index + min(index, rest) + slice_len + - (index < rest)) for index in range(num_of_splits)] - slices = [ - arr[div_points[i]:div_points[i + 1]] for i in range(num_of_splits) - ] + div_points = [0] + [(slice_len * index + min(index, rest) + slice_len + (index < rest)) + for index in range(num_of_splits)] + slices = [arr[div_points[i]:div_points[i + 1]] for i in range(num_of_splits)] return slices @@ -562,10 +418,8 @@ def split_and_load(arrs, _ctxs): # TODO Replace split_array() with gluon.utils.split_data() once targeting MXNet 1.7 assert isinstance(arrs, (list, tuple)) # split and load - loaded_arrs = [[ - i.as_in_context(ctx) - for i, ctx in zip(split_array(arr, len(_ctxs)), _ctxs) - ] for arr in arrs] + loaded_arrs = [[i.as_in_context(ctx) for i, ctx in zip(split_array(arr, len(_ctxs)), _ctxs)] + for arr in arrs] return zip(*loaded_arrs) @@ -577,9 +431,7 @@ def _apply_gradient_decay(): layer-wise gradient decay. Gradient decay and learning rate decay could be the same by using standard SGD, but different by using Adaptive optimizer(e.g., Adam). """ - parameter_not_included = [ - 'seg_emb', 'query_key_bias', 'query_emb_bias', 'query_seg_bias' - ] + parameter_not_included = ['seg_emb', 'query_key_bias', 'query_emb_bias', 'query_seg_bias'] num_layers = len(xlnet_base._net.transformer_cells) for (i, layer_parameters) in enumerate(xlnet_base._net.transformer_cells): layer_params = layer_parameters.collect_params() @@ -609,40 +461,26 @@ def train(): log.info('Number of records in Train data: %s', len(train_data)) train_data_features = preprocess_dataset( - tokenizer, - train_data, - vocab=vocab, - max_seq_length=args.max_seq_length, - doc_stride=args.doc_stride, - num_workers=args.num_workers, - max_query_length=args.max_query_length, - load_from_pickle=args.load_pickle, + tokenizer, train_data, vocab=vocab, max_seq_length=args.max_seq_length, + doc_stride=args.doc_stride, num_workers=args.num_workers, + max_query_length=args.max_query_length, load_from_pickle=args.load_pickle, feature_file=args.train_dataset_file) - train_data_input = convert_full_features_to_input_features( - train_data_features) - log.info('The number of examples after preprocessing: %s', - len(train_data_input)) + train_data_input = convert_full_features_to_input_features(train_data_features) + log.info('The number of examples after preprocessing: %s', len(train_data_input)) - train_dataloader = mx.gluon.data.DataLoader(train_data_input, - batchify_fn=batchify_fn, - batch_size=args.batch_size, - num_workers=4, + train_dataloader = mx.gluon.data.DataLoader(train_data_input, batchify_fn=batchify_fn, + batch_size=args.batch_size, num_workers=4, shuffle=True) optimizer_params = {'learning_rate': args.lr, 'wd': args.wd} try: - trainer = mx.gluon.Trainer(net.collect_params(), - args.optimizer, - optimizer_params, + trainer = mx.gluon.Trainer(net.collect_params(), args.optimizer, optimizer_params, update_on_kvstore=False) except ValueError as _: - warnings.warn( - 'AdamW optimizer is not found. Please consider upgrading to ' - 'mxnet>=1.5.0. Now the original Adam optimizer is used instead.') - trainer = mx.gluon.Trainer(net.collect_params(), - 'bertadam', - optimizer_params, + warnings.warn('AdamW optimizer is not found. Please consider upgrading to ' + 'mxnet>=1.5.0. Now the original Adam optimizer is used instead.') + trainer = mx.gluon.Trainer(net.collect_params(), 'bertadam', optimizer_params, update_on_kvstore=False) num_train_examples = len(train_data_input) @@ -735,8 +573,7 @@ def set_new_lr(step_num, batch_id): if args.version_2: step_loss_sep_tmp = np.array( [[span_ls.mean().asscalar(), - cls_ls.mean().asscalar()] - for span_ls, cls_ls in batch_loss_sep]) + cls_ls.mean().asscalar()] for span_ls, cls_ls in batch_loss_sep]) step_loss_sep_tmp = list(np.sum(step_loss_sep_tmp, axis=0)) step_loss_span += step_loss_sep_tmp[0] / len(ctx) step_loss_cls += step_loss_sep_tmp[1] / len(ctx) @@ -756,8 +593,7 @@ def set_new_lr(step_num, batch_id): log_num / (toc - tic)) if args.version_2: - log.info('span_loss: %.4f, cls_loss: %.4f', - step_loss_span / log_interval, + log.info('span_loss: %.4f, cls_loss: %.4f', step_loss_span / log_interval, step_loss_cls / log_interval) tic = time.time() @@ -770,18 +606,17 @@ def set_new_lr(step_num, batch_id): finish_flag = True break epoch_toc = time.time() - log.info('Time cost=%.2f s, Thoughput=%.2f samples/s', - epoch_toc - epoch_tic, total_num / (epoch_toc - epoch_tic)) + log.info('Time cost=%.2f s, Thoughput=%.2f samples/s', epoch_toc - epoch_tic, + total_num / (epoch_toc - epoch_tic)) ckpt_name = 'model_xlnet_squad_{0}.params'.format(epoch_id + 1) params_saved = os.path.join(args.output_dir, ckpt_name) nlp.utils.save_parameters(net, params_saved) log.info('params saved in: %s', params_saved) -RawResultExtended = collections.namedtuple('RawResultExtended', [ - 'start_top_log_probs', 'start_top_index', 'end_top_log_probs', - 'end_top_index', 'cls_logits' -]) +RawResultExtended = collections.namedtuple( + 'RawResultExtended', + ['start_top_log_probs', 'start_top_index', 'end_top_log_probs', 'end_top_index', 'cls_logits']) def evaluate(): @@ -802,26 +637,17 @@ def evaluate(): log.info('Number of records in dev data: %d', len(dev_data)) dev_data_features = preprocess_dataset( - tokenizer, - dev_data, - vocab=vocab, - max_seq_length=args.max_seq_length, - doc_stride=args.doc_stride, - num_workers=args.num_workers, - max_query_length=args.max_query_length, - load_from_pickle=args.load_pickle, + tokenizer, dev_data, vocab=vocab, max_seq_length=args.max_seq_length, + doc_stride=args.doc_stride, num_workers=args.num_workers, + max_query_length=args.max_query_length, load_from_pickle=args.load_pickle, feature_file=args.dev_dataset_file) dev_data_input = convert_full_features_to_input_features(dev_data_features) - log.info('The number of examples after preprocessing: %d', - len(dev_data_input)) + log.info('The number of examples after preprocessing: %d', len(dev_data_input)) - dev_dataloader = mx.gluon.data.DataLoader(dev_data_input, - batchify_fn=batchify_fn, - num_workers=4, - batch_size=args.test_batch_size, - shuffle=False, - last_batch='keep') + dev_dataloader = mx.gluon.data.DataLoader(dev_data_input, batchify_fn=batchify_fn, + num_workers=4, batch_size=args.test_batch_size, + shuffle=False, last_batch='keep') log.info('start prediction') @@ -834,25 +660,21 @@ def evaluate(): for splited_data in data_list: example_ids, inputs, token_types, valid_length, p_mask, _, _, _ = splited_data total_num += len(inputs) - outputs = net_eval(inputs, - token_types, - valid_length, - p_mask=p_mask) + outputs = net_eval(inputs, token_types, valid_length, p_mask=p_mask) example_ids = example_ids.asnumpy().tolist() for c, example_ids in enumerate(example_ids): - result = RawResultExtended( - start_top_log_probs=outputs[0][c].asnumpy().tolist(), - start_top_index=outputs[1][c].asnumpy().tolist(), - end_top_log_probs=outputs[2][c].asnumpy().tolist(), - end_top_index=outputs[3][c].asnumpy().tolist(), - cls_logits=outputs[4][c].asnumpy().tolist()) + result = RawResultExtended(start_top_log_probs=outputs[0][c].asnumpy().tolist(), + start_top_index=outputs[1][c].asnumpy().tolist(), + end_top_log_probs=outputs[2][c].asnumpy().tolist(), + end_top_index=outputs[3][c].asnumpy().tolist(), + cls_logits=outputs[4][c].asnumpy().tolist()) all_results[example_ids].append(result) if batch_id % args.log_interval == 0: log.info('Batch: %d/%d', batch_id + 1, len(dev_dataloader)) epoch_toc = time.time() - log.info('Time cost=%2f s, Thoughput=%.2f samples/s', - epoch_toc - epoch_tic, total_num / (epoch_toc - epoch_tic)) + log.info('Time cost=%2f s, Thoughput=%.2f samples/s', epoch_toc - epoch_tic, + total_num / (epoch_toc - epoch_tic)) log.info('Get prediction results...') @@ -863,23 +685,17 @@ def evaluate(): results = all_results[features[0].example_id] example_qas_id = features[0].qas_id score_diff, best_non_null_entry, nbest_json = predict_extended( - features=features, - results=results, - n_best_size=args.n_best_size, - max_answer_length=args.max_answer_length, - start_n_top=args.start_top_n, + features=features, results=results, n_best_size=args.n_best_size, + max_answer_length=args.max_answer_length, start_n_top=args.start_top_n, end_n_top=args.end_top_n) scores_diff_json[example_qas_id] = score_diff all_predictions[example_qas_id] = best_non_null_entry all_nbest_json[example_qas_id] = nbest_json - output_prediction_file = os.path.join(args.output_dir, - 'predictions.json') - output_nbest_file = os.path.join( - args.output_dir, 'nbest_predictions.json') + output_prediction_file = os.path.join(args.output_dir, 'predictions.json') + output_nbest_file = os.path.join(args.output_dir, 'nbest_predictions.json') if args.version_2: - output_null_log_odds_file = os.path.join( - args.output_dir, 'null_odds.json') + output_null_log_odds_file = os.path.join(args.output_dir, 'null_odds.json') else: output_null_log_odds_file = None @@ -892,15 +708,18 @@ def evaluate(): writer.write(json.dumps(scores_diff_json, indent=4) + '\n') if os.path.exists(sys.path[0] + '/evaluate-v2.0.py'): - arguments = ['--data_file', dev_data_path, '--pred_file', output_prediction_file, - '--na_prob_thresh', str(args.null_score_diff_threshold)] + arguments = [ + '--data_file', dev_data_path, '--pred_file', output_prediction_file, + '--na_prob_thresh', + str(args.null_score_diff_threshold) + ] if args.version_2: arguments += ['--na_prob_file', output_null_log_odds_file] subprocess.call([sys.executable, sys.path[0] + '/evaluate-v2.0.py'] + arguments) else: - log.info('Please download evaluate-v2.0.py to get evaluation results for SQuAD. Check index.rst to see' - 'how to download evaluate-v2.0.py.') - + log.info( + 'Please download evaluate-v2.0.py to get evaluation results for SQuAD. ' + 'Check index.rst to see how to download evaluate-v2.0.py.') if __name__ == '__main__': if not args.only_predict: From 3c210eb499db27aefbc686ece0e649e8bb991426 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 22 Jan 2020 15:20:02 +0800 Subject: [PATCH 51/59] fix doc --- scripts/language_model/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/language_model/index.rst b/scripts/language_model/index.rst index 7bd5733513..6fb8fad65d 100644 --- a/scripts/language_model/index.rst +++ b/scripts/language_model/index.rst @@ -280,6 +280,7 @@ Question Answering on SQuAD +-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Prediction| `predictions.json `__ | `predictions.json `__ | `predictions.json `__ `null_odds.json `__ | `predictions.json `__ `null_odds.json `__ | +-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + For xlnet_24_1024_16, we used hyperparameters reported by the paper authors. From bead7d20933463d9e7862326b4f5b2f1fc63fb90 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 23 Jan 2020 15:23:44 +0800 Subject: [PATCH 52/59] remove xlnet squad part --- scripts/language_model/index.rst | 34 - scripts/language_model/model/qa.py | 351 ---------- scripts/language_model/run_squad.py | 729 -------------------- scripts/language_model/xlnet_qa_evaluate.py | 152 ---- 4 files changed, 1266 deletions(-) delete mode 100644 scripts/language_model/model/qa.py delete mode 100644 scripts/language_model/run_squad.py delete mode 100644 scripts/language_model/xlnet_qa_evaluate.py diff --git a/scripts/language_model/index.rst b/scripts/language_model/index.rst index dfc80ac442..7f1a61b6a8 100644 --- a/scripts/language_model/index.rst +++ b/scripts/language_model/index.rst @@ -263,37 +263,3 @@ We followed the hyperparameters reported by the paper authors. +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ | RTE |Accuracy |84.12 |`log `__ |`command `__ | +-----------------+---------------------+-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Question Answering on SQuAD -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -+-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Dataset | SQuAD 1.1 | SQuAD 1.1 | SQuAD 2.0 | SQuAD 2.0 | -+===========+=========================================================================================================================================================+==========================================================================================================================================================+==================================================================================================================================================================================================================================================================================================================+==================================================================================================================================================================================================================================================================================================================+ -| Model | xlnet_12_768_12 | xlnet_24_1024_16 | xlnet_12_768_12 | xlnet_24_1024_16 | -+-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| EM / F1 | 85.34 / 91.67 | 87.9 / 93.87 | 80.25 / 82.95 | 85.85 / 88.56 | -+-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Log | `log `__ | `log `__ | `log `__ | `log `__ | -+-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Command | `command `__ | `command `__ | `command `__ | `command `__ | -+-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Prediction| `predictions.json `__ | `predictions.json `__ | `predictions.json `__ `null_odds.json `__ | `predictions.json `__ `null_odds.json `__ | -+-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -For xlnet_24_1024_16, we used hyperparameters reported by the paper authors. - - -To get the score of the dev data, you need to download the evaluate script (`evaluate-2.0.py `_). -You can either put the evaluate script under the same folder with run_squad.py to let our script run it automatically, -or run it manually by yourself. To run the evaluate script, you can use the following commands: - -SQuAD1.1: -.. code-block:: console - - $ python evaluate-v2.0.py dev-v2.0.json predictions.json - -SQuAD2.0: -.. code-block:: console - - $ python evaluate-v2.0.py --data_file dev-v2.0.json --pred_file predictions.json --na-prob-file null_odds.json \ No newline at end of file diff --git a/scripts/language_model/model/qa.py b/scripts/language_model/model/qa.py deleted file mode 100644 index 42ab647392..0000000000 --- a/scripts/language_model/model/qa.py +++ /dev/null @@ -1,351 +0,0 @@ -"""XLNetForQA models.""" - -import mxnet as mx -from mxnet.gluon import HybridBlock, Block, loss, nn - - -class PoolerStartLogits(HybridBlock): - """ Compute SQuAD start_logits from sequence hidden states.""" - def __init__(self, prefix=None, params=None): - super(PoolerStartLogits, self).__init__(prefix=prefix, params=params) - self.dense = nn.Dense(1, flatten=False) - - def __call__(self, hidden_states, p_masks=None): - # pylint: disable=arguments-differ - return super(PoolerStartLogits, self).__call__(hidden_states, p_masks) - - def hybrid_forward(self, F, hidden_states, p_mask): - """Get start logits from the model output. - - Parameters - ---------- - hidden_states : NDArray, shape (batch_size, seq_length, hidden_size) - p_mask : NDArray or None, shape(batch_size, seq_length) - - Returns - ------- - x : NDarray, shape(batch_size, seq_length) - Masked start logits. - """ - # pylint: disable=arguments-differ - x = self.dense(hidden_states).squeeze(-1) - if p_mask is not None: - x = x * (1 - p_mask) - 1e30 * p_mask - return x - - -class PoolerEndLogits(Block): - """ Compute SQuAD end_logits from sequence hidden states and start token hidden state.""" - def __init__(self, units=768, is_eval=False, prefix=None, params=None): - super(PoolerEndLogits, self).__init__(prefix=prefix, params=params) - self._eval = is_eval - self._hsz = units - with self.name_scope(): - self.dense_0 = nn.Dense(units, activation='tanh', flatten=False) - self.dense_1 = nn.Dense(1, flatten=False) - self.layernorm = nn.LayerNorm(epsilon=1e-12, in_channels=768) - - def __call__(self, - hidden_states, - start_states=None, - start_positions=None, - p_masks=None): - # pylint: disable=arguments-differ - return super(PoolerEndLogits, - self).__call__(hidden_states, start_states, - start_positions, p_masks) - - def forward(self, hidden_states, start_states, start_positions, p_mask): - # pylint: disable=arguments-differ - """Get end logits from the model output and start states or start positions. - - Parameters - ---------- - hidden_states : NDArray, shape (batch_size, seq_length, hidden_size) - start_states : NDArray, shape (batch_size, seq_length, start_n_top, hidden_size) - Used during inference - start_positions : NDArray, shape (batch_size) - Ground-truth start positions used during training. - p_mask : NDArray or None, shape(batch_size, seq_length) - - Returns - ------- - x : NDarray, shape(batch_size, seq_length) - Masked end logits. - """ - F = mx.ndarray - if not self._eval: - start_states = F.gather_nd( - hidden_states, - F.concat( - F.contrib.arange_like(hidden_states, - axis=0).expand_dims(1), - start_positions.expand_dims( - 1)).transpose()) # shape(bsz, hsz) - start_states = start_states.expand_dims(1) - start_states = F.broadcast_like( - start_states, hidden_states) # shape (bsz, slen, hsz) - x = self.dense_0(F.concat(hidden_states, start_states, dim=-1)) - x = self.layernorm(x) - x = self.dense_1(x).squeeze(-1) - if p_mask is not None and self._eval: - p_mask = p_mask.expand_dims(-1) - p_mask = F.broadcast_like(p_mask, x) - if p_mask is not None: - x = x * (1 - p_mask) - 1e30 * p_mask - return x - - -class XLNetPoolerAnswerClass(Block): - """ Compute SQuAD 2.0 answer class from classification and start tokens hidden states. """ - def __init__(self, units=768, dropout=0.1, prefix=None, params=None): - super(XLNetPoolerAnswerClass, self).__init__(prefix=prefix, - params=params) - with self.name_scope(): - self._units = units - self.dense_0 = nn.Dense(units, - in_units=2 * units, - activation='tanh', - use_bias=True, - flatten=False) - self.dense_1 = nn.Dense(1, - in_units=units, - use_bias=False, - flatten=False) - self._dropout = nn.Dropout(dropout) - - def __call__(self, hidden_states, start_states=None, cls_index=None): - # pylint: disable=arguments-differ - return super(XLNetPoolerAnswerClass, - self).__call__(hidden_states, start_states, cls_index) - - def forward(self, hidden_states, start_states, cls_index): - # pylint: disable=arguments-differ - """Get answerability logits from the model output and start states. - - Parameters - ---------- - hidden_states : NDArray, shape (batch_size, seq_length, hidden_size) - start_states : NDArray, shape (batch_size, hidden_size) - Typically weighted average hidden_states along second dimension. - cls_index : NDArray, shape (batch_size) - Index of [CLS] token in sequence. - - Returns - ------- - x : NDarray, shape(batch_size,) - CLS logits. - """ - F = mx.ndarray - index = F.contrib.arange_like(hidden_states, - axis=0, - ctx=hidden_states.context).expand_dims(1) - valid_length_rs = cls_index.reshape((-1, 1)) - 1 - gather_index = F.concat(index, valid_length_rs).T - cls_token_state = F.gather_nd(hidden_states, gather_index) - - x = self.dense_0(F.concat(start_states, cls_token_state, dim=-1)) - x = self._dropout(x) - x = self.dense_1(x).squeeze(-1) - return x - - -class XLNetForQA(Block): - """Model for SQuAD task with XLNet. - - Parameters - ---------- - xlnet_base: XLNet Block - start_top_n : int - Number of start position candidates during inference. - end_top_n : int - Number of end position candidates for each start position during inference. - is_eval : Bool - If set to True, do inference. - prefix : str or None - See document of `mx.gluon.Block`. - params : ParameterDict or None - See document of `mx.gluon.Block`. - """ - def __init__(self, - xlnet_base, - start_top_n=None, - end_top_n=None, - is_eval=False, - units=768, - prefix=None, - params=None): - super(XLNetForQA, self).__init__(prefix=prefix, params=params) - with self.name_scope(): - self.xlnet = xlnet_base - self.start_top_n = start_top_n - self.end_top_n = end_top_n - self.loss = loss.SoftmaxCELoss() - self.start_logits = PoolerStartLogits() - self.end_logits = PoolerEndLogits(units=units, is_eval=is_eval) - self.eval = is_eval - self.answer_class = XLNetPoolerAnswerClass(units=units) - self.cls_loss = loss.SigmoidBinaryCrossEntropyLoss() - - def __call__(self, - inputs, - token_types, - valid_length=None, - label=None, - p_mask=None, - is_impossible=None, - mems=None): - #pylint: disable=arguments-differ - """Generate the unnormalized score for the given the input sequences.""" - valid_length = [] if valid_length is None else valid_length - return super(XLNetForQA, - self).__call__(inputs, token_types, valid_length, p_mask, - label, is_impossible, mems) - - def _padding_mask(self, inputs, valid_length, left_pad=False): - F = mx.ndarray - if left_pad: - # left padding - valid_length_start = valid_length.astype('int64') - steps = F.contrib.arange_like(inputs, axis=1) + 1 - ones = F.ones_like(steps) - mask = F.broadcast_greater( - F.reshape(steps, shape=(1, -1)), - F.reshape(valid_length_start, shape=(-1, 1))) - mask = F.broadcast_mul( - F.expand_dims(mask, axis=1), - F.broadcast_mul(ones, F.reshape(ones, shape=(-1, 1)))) - else: - # right padding - valid_length = valid_length.astype(inputs.dtype) - steps = F.contrib.arange_like(inputs, axis=1) - ones = F.ones_like(steps) - mask = F.broadcast_lesser(F.reshape(steps, shape=(1, -1)), - F.reshape(valid_length, shape=(-1, 1))) - mask = F.broadcast_mul( - F.expand_dims(mask, axis=1), - F.broadcast_mul(ones, F.reshape(ones, shape=(-1, 1)))) - return mask - - def forward(self, inputs, token_types, valid_length, p_mask, label, - is_impossible, mems): - # pylint: disable=arguments-differ - """Generate the unnormalized score for the given the input sequences. - - Parameters - ---------- - inputs : NDArray, shape (batch_size, seq_length) - Input words for the sequences. - token_types : NDArray, shape (batch_size, seq_length) - Token types for the sequences, used to indicate whether the word belongs to the - first sentence or the second one. - valid_length : NDArray or None, shape (batch_size,) - Valid length of the sequence. This is used to mask the padded tokens. - p_mask : NDArray or None, shape (batch_size, seq_length) - We do not want special tokens(e.g., [SEP], [PAD]) and question tokens to be - included in answer. Set to 1 to mask the token. - label : NDArray, shape (batch_size, 1) - Ground-truth label(start/end position) for loss computation. - is_impossible : NDArray or None, shape (batch_size ,1) - Ground-truth label(is impossible) for loss computation. Set to None for squad1. - mems : NDArray - We do not use memory(a Transformer XL component) during finetuning. - - Returns - ------- - For training we have: - total_loss : list of NDArray - For squad1, we will only have one span loss of Shape (batch_size, ) - For squad2, we will have a span loss (batch_size, ) and a cls_loss (batch_size, ) - total_loss_sum : NDArray - For squad1, it equals to span_loss - For squad2, it equals to span_loss + cls_loss - - For inference we have: - start_top_log_probs : NDArray, shape (batch_size, start_n_top, ) - start_top_index : NDArray, shape (batch_size, start_n_top) - end_top_log_probs : NDArray, shape (batch_size, start_n_top * end_n_top) - end_top_index : NDArray, shape (batch_size, start_n_top * end_n_top) - cls_logits : NDArray or None, shape (batch_size, ) - """ - if isinstance(valid_length, list) and len(valid_length) == 0: - valid_length = None - attention_mask = self._padding_mask(inputs, - valid_length).astype('float32') - output, _ = self.xlnet(inputs, token_types, mems, attention_mask) - start_logits = self.start_logits(output, - p_masks=p_mask) # shape (bsz, slen) - bsz, slen, hsz = output.shape - if not self.eval: - # training - start_positions, end_positions = label - end_logit = self.end_logits(output, - start_positions=start_positions, - p_masks=p_mask) - span_loss = (self.loss(start_logits, start_positions) + - self.loss(end_logit, end_positions)) / 2 - - total_loss = [span_loss] - - # get cls loss - start_log_probs = mx.nd.softmax(start_logits, axis=-1) - start_states = mx.nd.batch_dot(output, - start_log_probs.expand_dims(-1), - transpose_a=True).squeeze(-1) - - cls_logits = self.answer_class(output, start_states, - valid_length) - cls_loss = self.cls_loss(cls_logits, is_impossible) - total_loss.append(0.5 * cls_loss) - total_loss_sum = span_loss + 0.5 * cls_loss - return total_loss, total_loss_sum - else: - #inference - start_log_probs = mx.nd.log_softmax(start_logits, - axis=-1) # shape (bsz, slen) - start_top_log_probs, start_top_index = mx.ndarray.topk( - start_log_probs, k=self.start_top_n, axis=-1, - ret_typ='both') # shape (bsz, start_n_top) - index = mx.nd.concat(*[ - mx.nd.arange(bsz, ctx=start_log_probs.context).expand_dims(1) - ] * self.start_top_n).reshape(bsz * self.start_top_n, 1) - start_top_index_rs = start_top_index.reshape((-1, 1)) - gather_index = mx.nd.concat( - index, start_top_index_rs).T #shape(2, bsz * start_n_top) - start_states = mx.nd.gather_nd(output, gather_index).reshape( - (bsz, self.start_top_n, hsz)) #shape (bsz, start_n_top, hsz) - - start_states = start_states.expand_dims(1) - start_states = mx.nd.broadcast_to( - start_states, (bsz, slen, self.start_top_n, - hsz)) # shape (bsz, slen, start_n_top, hsz) - hidden_states_expanded = output.expand_dims(2) - hidden_states_expanded = mx.ndarray.broadcast_to( - hidden_states_expanded, shape=start_states.shape - ) # shape (bsz, slen, start_n_top, hsz) - end_logits = self.end_logits( - hidden_states_expanded, - start_states=start_states, - p_masks=p_mask) # shape (bsz, slen, start_n_top) - end_log_probs = mx.nd.log_softmax( - end_logits, axis=1) # shape (bsz, slen, start_n_top) - # Note that end_top_index and end_top_log_probs have shape (bsz, END_N_TOP, start_n_top) - # So that for each start position, there are end_n_top end positions on the second dim. - end_top_log_probs, end_top_index = mx.ndarray.topk( - end_log_probs, k=self.end_top_n, axis=1, - ret_typ='both') # shape (bsz, end_n_top, start_n_top) - end_top_log_probs = end_top_log_probs.reshape( - (-1, self.start_top_n * self.end_top_n)) - end_top_index = end_top_index.reshape( - (-1, self.start_top_n * self.end_top_n)) - - start_probs = mx.nd.softmax(start_logits, axis=-1) - start_states = mx.nd.batch_dot(output, - start_probs.expand_dims(-1), - transpose_a=True).squeeze(-1) - cls_logits = self.answer_class(output, start_states, - valid_length) - - outputs = (start_top_log_probs, start_top_index, end_top_log_probs, - end_top_index, cls_logits) - return outputs diff --git a/scripts/language_model/run_squad.py b/scripts/language_model/run_squad.py deleted file mode 100644 index 4b7f8dc780..0000000000 --- a/scripts/language_model/run_squad.py +++ /dev/null @@ -1,729 +0,0 @@ -""" -Question Answering with XLNet -""" -# pylint:disable=redefined-outer-name,logging-format-interpolation - -import os -import time -import argparse -import random -import logging -import warnings -import json -import collections -import pickle -import sys -import itertools -import subprocess -import multiprocessing as mp -from functools import partial -import numpy as np -import mxnet as mx -import gluonnlp as nlp -from gluonnlp.data import SQuAD -from model.qa import XLNetForQA -from transformer import model -from xlnet_qa_evaluate import predict_extended - -path = sys.path[0] -sys.path.append(path + '/../bert/data') -#pylint: disable=wrong-import-position -from preprocessing_utils import concat_sequences, get_doc_spans, \ - check_is_max_context, convert_squad_examples, _lcs_match, _convert_index, \ - align_position2doc_spans - -parser = argparse.ArgumentParser(description='XLNet QA example.' - 'We fine-tune the XLNet model on SQuAD dataset.') - -# I/O configuration -parser.add_argument('--sentencepiece', type=str, default=None, - help='Path to the sentencepiece .model file for both tokenization and vocab.') -parser.add_argument('--pretrained_xlnet_parameters', type=str, default=None, - help='Pre-trained bert model parameter file. default is None') -parser.add_argument('--load_pickle', action='store_true', - help='Whether do data preprocessing or load from pickled file') -parser.add_argument('--dev_dataset_file', default='./output_dir/out.dev', type=str, - help='Path to dev data features') -parser.add_argument('--train_dataset_file', default='./output_dir/out.train', type=str, - help='Path to train data features') -parser.add_argument('--model_parameters', type=str, default=None, help='Model parameter file') -parser.add_argument( - '--output_dir', type=str, default='./output_dir', - help='The output directory where the model params will be written.' - ' default is ./output_dir') - -# Training configuration -parser.add_argument('--seed', type=int, default=3, help='Random seed') -parser.add_argument('--version_2', action='store_true', help='Whether use SQuAD v2.0 dataset') -parser.add_argument('--model', type=str, default='xlnet_cased_l12_h768_a12', - choices=['xlnet_cased_l24_h1024_a16', 'xlnet_cased_l12_h768_a12'], - help='The name of pre-trained XLNet model to fine-tune') -parser.add_argument('--dataset', type=str, default='126gb', choices=['126gb'], - help='The dataset BERT pre-trained with. Currently only 126gb is available') -parser.add_argument( - '--uncased', action='store_true', help= - 'if set, inputs are converted to lower case. Up to 01/04/2020, all released models are cased') -parser.add_argument('--gpu', type=int, default=None, - help='Number of gpus to use for finetuning. CPU is used if not set.') -parser.add_argument('--log_interval', type=int, default=10, help='report interval. default is 10') -parser.add_argument('--debug', action='store_true', - help='Run the example in test mode for sanity checks') -parser.add_argument('--only_predict', action='store_true', help='Whether to predict only.') - -# Hyperparameters -parser.add_argument('--epochs', type=int, default=3, help='number of epochs, default is 3') -parser.add_argument( - '--training_steps', type=int, help='training steps. Note that epochs will be ignored ' - 'if training steps are set') - -parser.add_argument('--batch_size', type=int, default=32, - help='Batch size. Number of examples per gpu in a minibatch. default is 32') - -parser.add_argument('--test_batch_size', type=int, default=24, - help='Test batch size. default is 24') - -parser.add_argument('--optimizer', type=str, default='bertadam', - help='optimization algorithm. default is bertadam') - -parser.add_argument( - '--accumulate', type=int, default=None, help='The number of batches for ' - 'gradients accumulation to simulate large batch size. Default is None') - -parser.add_argument('--lr', type=float, default=3e-5, - help='Initial learning rate. default is 5e-5') - -parser.add_argument( - '--warmup_ratio', type=float, default=0, - help='ratio of warmup steps that linearly increase learning rate from ' - '0 to target learning rate. default is 0') -parser.add_argument('--layerwise_decay', type=float, default=0.75, help='Layer-wise lr decay') -parser.add_argument('--wd', type=float, default=0.01, help='weight decay') -parser.add_argument('--dropout', type=float, default=0.1, help='dropout') -parser.add_argument('--attention_dropout', type=float, default=0.1, help='attention dropout') - -# Data pre/post processing -parser.add_argument( - '--max_seq_length', type=int, default=512, - help='The maximum total input sequence length after WordPiece tokenization.' - 'Sequences longer than this will be truncated, and sequences shorter ' - 'than this will be padded. default is 512') - -parser.add_argument( - '--doc_stride', type=int, default=128, - help='When splitting up a long document into chunks, how much stride to ' - 'take between chunks. default is 128') - -parser.add_argument( - '--max_query_length', type=int, default=64, - help='The maximum number of tokens for the question. Questions longer than ' - 'this will be truncated to this length. default is 64') - -parser.add_argument('--start_top_n', type=int, default=5, - help='Number of start-position candidates') -parser.add_argument('--end_top_n', type=int, default=5, - help='Number of end-position candidates corresponding ' - 'to a start position') -parser.add_argument('--n_best_size', type=int, default=5, help='top N results written to file') -parser.add_argument( - '--max_answer_length', type=int, default=64, - help='The maximum length of an answer that can be generated. This is needed ' - 'because the start and end predictions are not conditioned on one another.' - ' default is 64') -parser.add_argument('--num_workers', type=int, default=4, - help='Number of workers used for data preprocessing') -parser.add_argument( - '--null_score_diff_threshold', type=float, default=0.0, - help='If null_score - best_non_null is greater than the threshold predict null.' - 'Typical values are between -1.0 and -5.0. default is 0.0. ' - 'Note that a best value can be automatically found by the evaluation script') - -args = parser.parse_args() - -# random seed -np.random.seed(args.seed) -random.seed(args.seed) -mx.random.seed(args.seed) - -if not os.path.exists(args.output_dir): - os.mkdir(args.output_dir) - -# set the logger -log = logging.getLogger('gluonnlp') -log.setLevel(logging.DEBUG) -formatter = logging.Formatter(fmt='%(levelname)s:%(name)s:%(asctime)s %(message)s', - datefmt='%H:%M:%S') -fh = logging.FileHandler(os.path.join(args.output_dir, 'finetune_squad.log')) -fh.setLevel(logging.INFO) -fh.setFormatter(formatter) -console = logging.StreamHandler() -console.setLevel(logging.INFO) -console.setFormatter(formatter) -log.addHandler(console) -log.addHandler(fh) - -log.info(args) - -pretrained_xlnet_parameters = args.pretrained_xlnet_parameters -if pretrained_xlnet_parameters and args.model_parameters: - raise ValueError('Cannot provide both pre-trained BERT parameters and ' - 'BertForQA model parameters.') - -ctx = [mx.cpu(0)] if not args.gpu else [mx.gpu(i) for i in range(args.gpu)] - -log_interval = args.log_interval * args.accumulate if args.accumulate else args.log_interval -if args.accumulate: - log.info('Using gradient accumulation. Effective batch size = %d', - args.accumulate * args.batch_size) -if args.max_seq_length <= args.max_query_length + 3: - raise ValueError('The max_seq_length (%d) must be greater than max_query_length ' - '(%d) + 3' % (args.max_seq_length, args.max_query_length)) - -get_pretrained = True - -get_model_params = { - 'name': args.model, - 'dataset_name': args.dataset, - 'pretrained': get_pretrained, - 'ctx': ctx, - 'use_decoder': False, - 'dropout': args.dropout, - 'attention_dropout': args.attention_dropout -} - -# model, vocabulary and tokenizer -xlnet_base, vocab, tokenizer = model.get_model(**get_model_params) - -batchify_fn = nlp.data.batchify.Tuple( - nlp.data.batchify.Stack('int32'), # example_id - nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token], dtype='int32'), # input_ids - nlp.data.batchify.Pad(axis=0, pad_val=3, dtype='int32'), # segment_ids - nlp.data.batchify.Stack('float32'), # valid_length - nlp.data.batchify.Pad(axis=0, pad_val=1), # p_mask - nlp.data.batchify.Stack('float32'), # start_position - nlp.data.batchify.Stack('float32'), # end_position - nlp.data.batchify.Stack('float32')) # is_impossible - -if pretrained_xlnet_parameters: - # only load XLnetModel parameters - nlp.utils.load_parameters(xlnet_base, pretrained_xlnet_parameters, ctx=ctx, ignore_extra=True, - cast_dtype=True) - -units = xlnet_base._net._units -net = XLNetForQA(xlnet_base=xlnet_base, start_top_n=args.start_top_n, end_top_n=args.end_top_n, - units=units) - -net_eval = XLNetForQA(xlnet_base=xlnet_base, start_top_n=args.start_top_n, - end_top_n=args.end_top_n, units=units, is_eval=True, - params=net.collect_params()) - -initializer = mx.init.Normal(0.02) - -if args.model_parameters: - # load complete XLNetForQA parameters - nlp.utils.load_parameters(net, args.model_parameters, ctx=ctx, cast_dtype=True) -else: - net.start_logits.initialize(init=initializer, ctx=ctx) - net.end_logits.initialize(init=initializer, ctx=ctx) - net.answer_class.initialize(init=initializer, ctx=ctx) - -net.hybridize(static_alloc=True) -net_eval.hybridize(static_alloc=True) - -SquadXLNetFeautre = collections.namedtuple('SquadXLNetFeautre', [ - 'example_id', 'qas_id', 'valid_length', 'tokens', 'tok_start_to_orig_index', - 'tok_end_to_orig_index', 'token_is_max_context', 'input_ids', 'p_mask', 'segment_ids', - 'start_position', 'end_position', 'paragraph_text', 'paragraph_len', 'is_impossible' -]) - - -def convert_examples_to_features(example, tokenizer=None, cls_token=None, sep_token=None, - vocab=None, max_seq_length=384, doc_stride=128, - max_query_length=64, is_training=True): - """convert the examples to the XLNet features""" - query_tokenized = tokenizer(example.question_text)[:max_query_length] - #tokenize paragraph and get start/end position of the answer in tokenized paragraph - paragraph_tokenized = tokenizer(example.paragraph_text) - - chartok_to_tok_index = [] - tok_start_to_chartok_index = [] - tok_end_to_chartok_index = [] - char_cnt = 0 - for i, token in enumerate(paragraph_tokenized): - chartok_to_tok_index.extend([i] * len(token)) - tok_start_to_chartok_index.append(char_cnt) - char_cnt += len(token) - tok_end_to_chartok_index.append(char_cnt - 1) - - tok_cat_text = ''.join(paragraph_tokenized).replace(u'▁', ' ') - - # XLNet takes a more complicated strategy to match the origin text - # and the tokenized tokens - n, m = len(example.paragraph_text), len(tok_cat_text) - max_dist = abs(n - m) + 5 - for _ in range(2): - f, g = _lcs_match(max_dist, example.paragraph_text, tok_cat_text) - if f[n - 1, m - 1] > 0.8 * n: - break - max_dist *= 2 - - orig_to_chartok_index = [None] * n - chartok_to_orig_index = [None] * m - i, j = n - 1, m - 1 - while i >= 0 and j >= 0: - if (i, j) not in g: - break - if g[(i, j)] == 2: - orig_to_chartok_index[i] = j - chartok_to_orig_index[j] = i - i, j = i - 1, j - 1 - elif g[(i, j)] == 1: - j = j - 1 - else: - i = i - 1 - - # get start/end mapping - tok_start_to_orig_index = [] - tok_end_to_orig_index = [] - for i in range(len(paragraph_tokenized)): - start_chartok_pos = tok_start_to_chartok_index[i] - end_chartok_pos = tok_end_to_chartok_index[i] - start_orig_pos = _convert_index(chartok_to_orig_index, start_chartok_pos, n, is_start=True) - end_orig_pos = _convert_index(chartok_to_orig_index, end_chartok_pos, m, is_start=False) - - tok_start_to_orig_index.append(start_orig_pos) - tok_end_to_orig_index.append(end_orig_pos) - - tok_start_position, tok_end_position = -1, -1 - # get mapped start/end position - if is_training and not example.is_impossible: - start_chartok_pos = _convert_index(orig_to_chartok_index, example.start_offset, - is_start=True) - tok_start_position = chartok_to_tok_index[start_chartok_pos] - - end_chartok_pos = _convert_index(orig_to_chartok_index, example.end_offset, is_start=False) - tok_end_position = chartok_to_tok_index[end_chartok_pos] - assert tok_start_position <= tok_end_position - - # get doc spans using sliding window - doc_spans, doc_spans_indices = get_doc_spans(paragraph_tokenized, - max_seq_length - len(query_tokenized) - 3, - doc_stride) - - # record whether the tokens in a docspan have max context - token_is_max_context = [{ - p: check_is_max_context(doc_spans_indices, i, p + doc_spans_indices[i][0]) - for p in range(len(doc_span)) - } for (i, doc_span) in enumerate(doc_spans)] - - # get token -> origin text mapping - cur_tok_start_to_orig_index = [[tok_start_to_orig_index[p + st] for p in range(len(doc_span))] - for doc_span, (st, ed) in zip(doc_spans, doc_spans_indices)] - cur_tok_end_to_orig_index = [[tok_end_to_orig_index[p + st] for p in range(len(doc_span))] - for doc_span, (st, ed) in zip(doc_spans, doc_spans_indices)] - - # get sequence features: tokens, segment_ids, p_masks - seq_features = [ - concat_sequences([doc_span, query_tokenized], [[sep_token]] * 2 + [[cls_token]], - [[0] * len(doc_span), [1] * len(query_tokenized)], [[1], [1], [0]]) - for doc_span in doc_spans - ] - - # get the start/end positions aligned to doc spans. If is_impossible or position out of span - # set position to cls_index, i.e., last token in the sequence. - if not example.is_impossible: - positions = [ - align_position2doc_spans([tok_start_position, tok_end_position], doc_idx, offset=0, - default_value=len(seq[0]) - 1) - for (doc_idx, seq) in zip(doc_spans_indices, seq_features) - ] - else: - positions = [(len(seq_feature[0]) - 1, len(seq_feature[0]) - 1) - for seq_feature in seq_features] - - features = [ - SquadXLNetFeautre(example_id=example.example_id, qas_id=example.qas_id, - tok_start_to_orig_index=t2st, tok_end_to_orig_index=t2ed, - valid_length=len(tokens), tokens=tokens, token_is_max_context=is_max, - input_ids=vocab[tokens], p_mask=p_mask, segment_ids=segment_ids, - start_position=start, end_position=end, - paragraph_text=example.paragraph_text, paragraph_len=len(tokens), - is_impossible=(start == len(tokens) - 1)) - for (tokens, segment_ids, p_mask), ( - start, - end), is_max, t2st, t2ed in zip(seq_features, positions, token_is_max_context, - cur_tok_start_to_orig_index, cur_tok_end_to_orig_index) - ] - return features - - -def preprocess_dataset(tokenizer, dataset, vocab=None, max_seq_length=384, doc_stride=128, - max_query_length=64, num_workers=16, load_from_pickle=False, - feature_file=None, is_training=True): - """Loads a dataset into features""" - vocab = tokenizer.vocab if vocab is None else vocab - trans = partial(convert_examples_to_features, tokenizer=tokenizer, cls_token=vocab.cls_token, - sep_token=vocab.sep_token, vocab=vocab, max_seq_length=max_seq_length, - doc_stride=doc_stride, max_query_length=max_query_length) - pool = mp.Pool(num_workers) - start = time.time() - if not load_from_pickle: - example_trans = partial(convert_squad_examples, is_training=is_training) - # convert the raw dataset into raw features - examples = pool.map(example_trans, dataset) - raw_features = list(map(trans, examples)) #pool.map(trans, examples) - if feature_file: - with open(feature_file, 'wb') as file: - pickle.dump(raw_features, file) - else: - assert feature_file, 'feature file should be provided.' - with open(feature_file, 'rb') as file: - raw_features = pickle.load(file) - - end = time.time() - pool.close() - log.info('Done! Transform dataset costs %.2f seconds.', (end - start)) - return raw_features - - -def convert_full_features_to_input_features(raw_features): - """convert the full features into the input features""" - data_features = mx.gluon.data.SimpleDataset(list(itertools.chain.from_iterable(raw_features))) - data_features = data_features.transform(lambda *example: ( - example[0], # example_id - example[7], # inputs_id - example[9], # segment_ids - example[2], # valid_length, - example[8], # p_mask - example[10], # start_position, - example[11], # end_position - example[14])) # is_impossible - return data_features - - -def split_array(arr, num_of_splits): - """split an array into equal pieces""" - # TODO Replace this function with gluon.utils.split_data() once targeting MXNet 1.7 - size = arr.shape[0] - if size < num_of_splits: - return [arr[i:i + 1] for i in range(size)] - slice_len, rest = divmod(size, num_of_splits) - div_points = [0] + [(slice_len * index + min(index, rest) + slice_len + (index < rest)) - for index in range(num_of_splits)] - slices = [arr[div_points[i]:div_points[i + 1]] for i in range(num_of_splits)] - return slices - - -def split_and_load(arrs, _ctxs): - """split and load arrays to a list of contexts""" - # TODO Replace split_array() with gluon.utils.split_data() once targeting MXNet 1.7 - assert isinstance(arrs, (list, tuple)) - # split and load - loaded_arrs = [[i.as_in_context(ctx) for i, ctx in zip(split_array(arr, len(_ctxs)), _ctxs)] - for arr in arrs] - return zip(*loaded_arrs) - - -def _apply_gradient_decay(): - """apply layer-wise gradient decay. - - Note that the description in origin paper about layer-wise learning rate decay - is inaccurate. According to their implementation, they are actually performing - layer-wise gradient decay. Gradient decay and learning rate decay could be the - same by using standard SGD, but different by using Adaptive optimizer(e.g., Adam). - """ - parameter_not_included = ['seg_emb', 'query_key_bias', 'query_emb_bias', 'query_seg_bias'] - num_layers = len(xlnet_base._net.transformer_cells) - for (i, layer_parameters) in enumerate(xlnet_base._net.transformer_cells): - layer_params = layer_parameters.collect_params() - for key, value in layer_params.items(): - skip = False - for pn in parameter_not_included: - if pn in key: - skip = True - if skip: - continue - if value.grad_req != 'null': - for arr in value.list_grad(): - arr *= args.layerwise_decay**(num_layers - i - 1) - - -def train(): - """Training function.""" - segment = 'train' - log.info('Loading %s data...', segment) - if args.version_2: - train_data = SQuAD(segment, version='2.0') - else: - train_data = SQuAD(segment, version='1.1') - if args.debug: - sampled_data = [train_data[i] for i in range(100)] - train_data = mx.gluon.data.SimpleDataset(sampled_data) - log.info('Number of records in Train data: %s', len(train_data)) - - train_data_features = preprocess_dataset( - tokenizer, train_data, vocab=vocab, max_seq_length=args.max_seq_length, - doc_stride=args.doc_stride, num_workers=args.num_workers, - max_query_length=args.max_query_length, load_from_pickle=args.load_pickle, - feature_file=args.train_dataset_file) - - train_data_input = convert_full_features_to_input_features(train_data_features) - log.info('The number of examples after preprocessing: %s', len(train_data_input)) - - train_dataloader = mx.gluon.data.DataLoader(train_data_input, batchify_fn=batchify_fn, - batch_size=args.batch_size, num_workers=4, - shuffle=True) - - optimizer_params = {'learning_rate': args.lr, 'wd': args.wd} - try: - trainer = mx.gluon.Trainer(net.collect_params(), args.optimizer, optimizer_params, - update_on_kvstore=False) - except ValueError as _: - warnings.warn('AdamW optimizer is not found. Please consider upgrading to ' - 'mxnet>=1.5.0. Now the original Adam optimizer is used instead.') - trainer = mx.gluon.Trainer(net.collect_params(), 'bertadam', optimizer_params, - update_on_kvstore=False) - - num_train_examples = len(train_data_input) - step_size = args.batch_size * args.accumulate if args.accumulate else args.batch_size - num_train_steps = int(num_train_examples / step_size * args.epochs) - epoch_number = args.epochs - if args.training_steps: - num_train_steps = args.training_steps - epoch_number = 100000 - - log.info('training steps=%d', num_train_steps) - num_warmup_steps = int(num_train_steps * args.warmup_ratio) - step_num = 0 - - def set_new_lr(step_num, batch_id): - """set new learning rate""" - # set grad to zero for gradient accumulation - if args.accumulate: - if batch_id % args.accumulate == 0: - net.collect_params().zero_grad() - step_num += 1 - else: - step_num += 1 - # learning rate schedule - # Notice that this learning rate scheduler is adapted from traditional linear learning - # rate scheduler where step_num >= num_warmup_steps, new_lr = 1 - step_num/num_train_steps - if step_num < num_warmup_steps: - new_lr = args.lr * step_num / num_warmup_steps - else: - offset = (step_num - num_warmup_steps) * args.lr / \ - (num_train_steps - num_warmup_steps) - new_lr = args.lr - offset - trainer.set_learning_rate(new_lr) - return step_num - - # Do not apply weight decay on LayerNorm and bias terms - for _, v in net.collect_params('.*beta|.*gamma|.*bias').items(): - v.wd_mult = 0.0 - # Collect differentiable parameters - params = [p for p in net.collect_params().values() if p.grad_req != 'null'] - # Set grad_req if gradient accumulation is required - if args.accumulate: - for p in params: - p.grad_req = 'add' - - epoch_tic = time.time() - total_num = 0 - log_num = 0 - finish_flag = False - for epoch_id in range(epoch_number): - step_loss = 0.0 - step_loss_span = 0 - step_loss_cls = 0 - tic = time.time() - if finish_flag: - break - for batch_id, data in enumerate(train_dataloader): - # set new lr - step_num = set_new_lr(step_num, batch_id) - data_list = list(split_and_load(data, ctx)) - # forward and backward - batch_loss = [] - batch_loss_sep = [] - with mx.autograd.record(): - for splited_data in data_list: - _, inputs, token_types, valid_length, p_mask, start_label, end_label, is_impossible = splited_data # pylint: disable=line-too-long - valid_length = valid_length.astype('float32') - log_num += len(inputs) - total_num += len(inputs) - out_sep, out = net( - inputs, - token_types, - valid_length, - [start_label, end_label], - p_mask=p_mask, # pylint: disable=line-too-long - is_impossible=is_impossible) - ls = out.mean() / len(ctx) - batch_loss_sep.append(out_sep) - batch_loss.append(ls) - if args.accumulate: - ls = ls / args.accumulate - ls.backward() - # update - if not args.accumulate or (batch_id + 1) % args.accumulate == 0: - trainer.allreduce_grads() - nlp.utils.clip_grad_global_norm(params, 1) - _apply_gradient_decay() - trainer.update(1, ignore_stale_grad=True) - - if args.version_2: - step_loss_sep_tmp = np.array( - [[span_ls.mean().asscalar(), - cls_ls.mean().asscalar()] for span_ls, cls_ls in batch_loss_sep]) - step_loss_sep_tmp = list(np.sum(step_loss_sep_tmp, axis=0)) - step_loss_span += step_loss_sep_tmp[0] / len(ctx) - step_loss_cls += step_loss_sep_tmp[1] / len(ctx) - - step_loss += sum([ls.asscalar() for ls in batch_loss]) - if (batch_id + 1) % log_interval == 0: - toc = time.time() - log.info( - 'Epoch: %d, Batch: %d/%d, Loss=%.4f, lr=%.7f Time cost=%.1f Thoughput=%.2f samples/s' # pylint: disable=line-too-long - , - epoch_id + 1, - batch_id + 1, - len(train_dataloader), - step_loss / log_interval, - trainer.learning_rate, - toc - tic, - log_num / (toc - tic)) - - if args.version_2: - log.info('span_loss: %.4f, cls_loss: %.4f', step_loss_span / log_interval, - step_loss_cls / log_interval) - - tic = time.time() - step_loss = 0.0 - step_loss_span = 0 - step_loss_cls = 0 - log_num = 0 - if step_num >= num_train_steps: - logging.info('Finish training step: %d', step_num) - finish_flag = True - break - epoch_toc = time.time() - log.info('Time cost=%.2f s, Thoughput=%.2f samples/s', epoch_toc - epoch_tic, - total_num / (epoch_toc - epoch_tic)) - ckpt_name = 'model_xlnet_squad_{0}.params'.format(epoch_id + 1) - params_saved = os.path.join(args.output_dir, ckpt_name) - nlp.utils.save_parameters(net, params_saved) - log.info('params saved in: %s', params_saved) - - -RawResultExtended = collections.namedtuple( - 'RawResultExtended', - ['start_top_log_probs', 'start_top_index', 'end_top_log_probs', 'end_top_index', 'cls_logits']) - - -def evaluate(): - """Evaluate the model on validation dataset. - """ - log.info('Loading dev data...') - if args.version_2: - dev_data = SQuAD('dev', version='2.0') - else: - dev_data = SQuAD('dev', version='1.1') - (_, _), (data_file_name, _) \ - = dev_data._data_file[dev_data._version][dev_data._segment] - dev_data_path = os.path.join(dev_data._root, data_file_name) - - if args.debug: - sampled_data = [dev_data[0], dev_data[1], dev_data[2]] - dev_data = mx.gluon.data.SimpleDataset(sampled_data) - log.info('Number of records in dev data: %d', len(dev_data)) - - dev_data_features = preprocess_dataset( - tokenizer, dev_data, vocab=vocab, max_seq_length=args.max_seq_length, - doc_stride=args.doc_stride, num_workers=args.num_workers, - max_query_length=args.max_query_length, load_from_pickle=args.load_pickle, - feature_file=args.dev_dataset_file) - - dev_data_input = convert_full_features_to_input_features(dev_data_features) - log.info('The number of examples after preprocessing: %d', len(dev_data_input)) - - dev_dataloader = mx.gluon.data.DataLoader(dev_data_input, batchify_fn=batchify_fn, - num_workers=4, batch_size=args.test_batch_size, - shuffle=False, last_batch='keep') - - log.info('start prediction') - - all_results = collections.defaultdict(list) - - epoch_tic = time.time() - total_num = 0 - for (batch_id, data) in enumerate(dev_dataloader): - data_list = list(split_and_load(data, ctx)) - for splited_data in data_list: - example_ids, inputs, token_types, valid_length, p_mask, _, _, _ = splited_data - total_num += len(inputs) - outputs = net_eval(inputs, token_types, valid_length, p_mask=p_mask) - example_ids = example_ids.asnumpy().tolist() - for c, example_ids in enumerate(example_ids): - result = RawResultExtended(start_top_log_probs=outputs[0][c].asnumpy().tolist(), - start_top_index=outputs[1][c].asnumpy().tolist(), - end_top_log_probs=outputs[2][c].asnumpy().tolist(), - end_top_index=outputs[3][c].asnumpy().tolist(), - cls_logits=outputs[4][c].asnumpy().tolist()) - all_results[example_ids].append(result) - if batch_id % args.log_interval == 0: - log.info('Batch: %d/%d', batch_id + 1, len(dev_dataloader)) - - epoch_toc = time.time() - log.info('Time cost=%2f s, Thoughput=%.2f samples/s', epoch_toc - epoch_tic, - total_num / (epoch_toc - epoch_tic)) - - log.info('Get prediction results...') - - all_predictions = collections.OrderedDict() - all_nbest_json = collections.OrderedDict() - scores_diff_json = collections.OrderedDict() - for features in dev_data_features: - results = all_results[features[0].example_id] - example_qas_id = features[0].qas_id - score_diff, best_non_null_entry, nbest_json = predict_extended( - features=features, results=results, n_best_size=args.n_best_size, - max_answer_length=args.max_answer_length, start_n_top=args.start_top_n, - end_n_top=args.end_top_n) - scores_diff_json[example_qas_id] = score_diff - all_predictions[example_qas_id] = best_non_null_entry - all_nbest_json[example_qas_id] = nbest_json - - output_prediction_file = os.path.join(args.output_dir, 'predictions.json') - output_nbest_file = os.path.join(args.output_dir, 'nbest_predictions.json') - if args.version_2: - output_null_log_odds_file = os.path.join(args.output_dir, 'null_odds.json') - else: - output_null_log_odds_file = None - - with open(output_prediction_file, 'w') as writer: - writer.write(json.dumps(all_predictions, indent=4) + '\n') - with open(output_nbest_file, 'w') as writer: - writer.write(json.dumps(all_nbest_json, indent=4) + '\n') - if args.version_2: - with open(output_null_log_odds_file, 'w') as writer: - writer.write(json.dumps(scores_diff_json, indent=4) + '\n') - - if os.path.exists(sys.path[0] + '/evaluate-v2.0.py'): - arguments = [ - '--data_file', dev_data_path, '--pred_file', output_prediction_file, - '--na_prob_thresh', - str(args.null_score_diff_threshold) - ] - if args.version_2: - arguments += ['--na_prob_file', output_null_log_odds_file] - subprocess.call([sys.executable, sys.path[0] + '/evaluate-v2.0.py'] + arguments) - else: - log.info( - 'Please download evaluate-v2.0.py to get evaluation results for SQuAD. ' - 'Check index.rst to see how to download evaluate-v2.0.py.') - -if __name__ == '__main__': - if not args.only_predict: - train() - evaluate() - else: - evaluate() diff --git a/scripts/language_model/xlnet_qa_evaluate.py b/scripts/language_model/xlnet_qa_evaluate.py deleted file mode 100644 index 3421192d1a..0000000000 --- a/scripts/language_model/xlnet_qa_evaluate.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright 2018 The Google AI Language Team Authors, Allenai and DMLC. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""XLNet SQuAD evaluate.""" - -from collections import namedtuple, OrderedDict - -from mxnet import nd - -_PrelimPrediction = namedtuple( # pylint: disable=invalid-name - 'PrelimPrediction', [ - 'feature_id', 'start_index', 'end_index', 'start_log_prob', - 'end_log_prob' - ]) - -_NbestPrediction = namedtuple( # pylint: disable=invalid-name - 'NbestPrediction', ['text', 'start_log_prob', 'end_log_prob']) - - -def predict_extended(features, - results, - n_best_size, - max_answer_length=64, - start_n_top=5, - end_n_top=5): - """Get prediction results for XLNet. - - Parameters - ---------- - features : list of SQuADFeature - List of squad features for the example. - results : list of data.qa.PredResult - List of model predictions for span start and span end. - tokenizer: callable - Tokenizer function. - max_answer_length: int, default 64 - Maximum length of the answer tokens. - null_score_diff_threshold: float, default 0.0 - If null_score - best_non_null is greater than the threshold predict null. - n_best_size: int, default 10 - The total number of n-best predictions. - version_2: bool, default False - If true, the SQuAD examples contain some that do not have an answer. - - Returns - ------- - prediction: str - The final prediction. - nbest : list of (str, float) - n-best predictions with their probabilities. - """ - - prelim_predictions = [] - score_null = 1000000 # large and positive - for features_id, (result, feature) in enumerate(zip(results, features)): - cur_null_score = result.cls_logits[0] - score_null = min(score_null, cur_null_score) - for i in range(start_n_top): - for j in range(end_n_top): - start_log_prob = result.start_top_log_probs[i] - start_index = int(result.start_top_index[i]) - j_index = j * end_n_top + i - end_log_prob = result.end_top_log_probs[j_index] - end_index = int(result.end_top_index[j_index]) - # We could hypothetically create invalid predictions, e.g., predict - # that the start of the span is in the question. We throw out all - # invalid predictions. - if start_index >= feature.paragraph_len - 1: - continue - if end_index >= feature.paragraph_len - 1: - continue - - if not feature.token_is_max_context.get(start_index, False): - continue - if end_index < start_index: - continue - length = end_index - start_index + 1 - if length > max_answer_length: - continue - prelim_predictions.append( - _PrelimPrediction(feature_id=features_id, - start_index=start_index, - end_index=end_index, - start_log_prob=start_log_prob, - end_log_prob=end_log_prob)) - - prelim_predictions = sorted(prelim_predictions, - key=lambda x: - (x.start_log_prob + x.end_log_prob), - reverse=True) - - seen_predictions = {} - nbest = [] - for pred in prelim_predictions: - if len(nbest) >= n_best_size: - break - feature = features[pred.feature_id] - tok_start_to_orig_index = feature.tok_start_to_orig_index - tok_end_to_orig_index = feature.tok_end_to_orig_index - start_orig_pos = tok_start_to_orig_index[pred.start_index] - end_orig_pos = tok_end_to_orig_index[pred.end_index] - - paragraph_text = feature.paragraph_text - final_text = paragraph_text[start_orig_pos:end_orig_pos + 1].strip() - if final_text in seen_predictions: - continue - seen_predictions[final_text] = True - nbest.append( - _NbestPrediction(text=final_text, - start_log_prob=pred.start_log_prob, - end_log_prob=pred.end_log_prob)) - - # In very rare edge cases we could have no valid predictions. So we - # just create a nonce prediction in this case to avoid failure. - if not nbest: - nbest.append( - _NbestPrediction(text='', start_log_prob=-1e6, end_log_prob=-1e6)) - - assert len(nbest) >= 1 - - total_scores = [] - best_non_null_entry = None - for entry in nbest: - total_scores.append(entry.start_log_prob + entry.end_log_prob) - if not best_non_null_entry: - best_non_null_entry = entry - probs = nd.softmax(nd.array(total_scores)).asnumpy() - - nbest_json = [] - - for (i, entry) in enumerate(nbest): - output = OrderedDict() - output['text'] = entry.text - output['probability'] = float(probs[i]) - output['start_log_prob'] = float(entry.start_log_prob) - output['end_log_prob'] = float(entry.end_log_prob) - nbest_json.append(output) - - assert len(nbest_json) >= 1 - assert best_non_null_entry is not None - score_diff = score_null - return score_diff, best_non_null_entry.text, nbest_json From 2505d49fc0d25a9619e16f1a171d5718b1df5928 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 23 Jan 2020 15:25:13 +0800 Subject: [PATCH 53/59] fix pylint --- scripts/bert/data/preprocessing_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/bert/data/preprocessing_utils.py b/scripts/bert/data/preprocessing_utils.py index d034cd4841..75a837532c 100644 --- a/scripts/bert/data/preprocessing_utils.py +++ b/scripts/bert/data/preprocessing_utils.py @@ -381,4 +381,3 @@ def convert_squad_examples(record, is_training): start_position=start_position, end_position=end_position, start_offset=answer_offset, end_offset=answer_offset + len(orig_answer_text) - 1, is_impossible=is_impossible) return example - From 5c6fcd80dfa83c690dfb6bff23512a52d627e8b7 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 23 Jan 2020 15:40:57 +0800 Subject: [PATCH 54/59] fix conflict --- src/gluonnlp/data/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/gluonnlp/data/__init__.py b/src/gluonnlp/data/__init__.py index e7544cfe46..c098ade79e 100644 --- a/src/gluonnlp/data/__init__.py +++ b/src/gluonnlp/data/__init__.py @@ -44,8 +44,4 @@ + word_embedding_evaluation.__all__ + stream.__all__ + conll.__all__ + translation.__all__ + registry.__all__ + question_answering.__all__ + dataloader.__all__ + candidate_sampler.__all__ + intent_slot.__all__ -<<<<<<< HEAD - + glue.__all__)# pytype: disable=attribute-error -======= + glue.__all__) # pytype: disable=attribute-error ->>>>>>> upstream/master From b9b285738ec0c69e3e8c984a20c2ab0e589aa876 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 23 Jan 2020 15:52:07 +0800 Subject: [PATCH 55/59] fix conflict --- scripts/language_model/model/XLNet_classifier.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/language_model/model/XLNet_classifier.py b/scripts/language_model/model/XLNet_classifier.py index 88683a409c..18f91526b1 100644 --- a/scripts/language_model/model/XLNet_classifier.py +++ b/scripts/language_model/model/XLNet_classifier.py @@ -46,10 +46,6 @@ def _apply_pooling(self, sequence, valid_length): This is used for pre-training or fine-tuning a XLNet model. """ -<<<<<<< HEAD - # Note that we are using left pad so we always take the last hidden state -======= ->>>>>>> upstream/master F = mx.ndarray index = F.contrib.arange_like(sequence, axis=0, ctx=sequence.context).expand_dims(1) valid_length_rs = valid_length.reshape((-1, 1)) - 1 From 57e97196cd0df493ad0959be303243915b8ea5f7 Mon Sep 17 00:00:00 2001 From: Wang Date: Fri, 24 Jan 2020 12:35:16 +0800 Subject: [PATCH 56/59] remove unrelated files --- scripts/bert/test-682b5d15.bpe | Bin 253253 -> 0 bytes scripts/bert/test_squad.py | 18 ------------------ 2 files changed, 18 deletions(-) delete mode 100644 scripts/bert/test-682b5d15.bpe delete mode 100644 scripts/bert/test_squad.py diff --git a/scripts/bert/test-682b5d15.bpe b/scripts/bert/test-682b5d15.bpe deleted file mode 100644 index 9677dd042fb01ea2a165c0ce5c49f69d1e65b946..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 253253 zcmZ6T3tZGy`v0F9Zc2ieI+o^rv$D;8*>7#L)Y8 zUP{Y0B@mH|$VEj!tt@S!&B|8WwV!0R+Gb{(ZMNC|-{*T~2KL{p&+ECIbDnd1&T}r` z!N};yu;0hd_}yG3{CAx_Qp*odnBU^*zxN%8>=7BJ%evLRi#Y*Hj?@G-v^>|!c9Xh!Kb*1YQ1`SOb-J1zvNBNGm98p> zt`11OA$&&(~V@IMw_)>kdTrjO<3P z3CXT_r+Vgt4S|ZMsd3X@3RK`qRmGc;x<}fb32OJYmjiWWI=pIP1tOhJP?dc)T6u20 zNqu@RtQn;&cZRZ6ZL;#Ds$X4v#mY1ECe`buEtYbcn)SWm9O}*oO9Bl~PjaNI52hny zJVnKCc_Uyk$>~wQ%qX=I?pC7{w*}fsYsTMHZrM-HQV~73TUk7+TOT+YKXRmMUbZ8c zJ6)aYR%K*NHo=Ot;;nPL{yZuMG9+*M>_|mzUQDOgfU2)qn$tnDjW+ zXRcjV?xAYskUC3w?@h`!rasVOuUkEk(P(8!R(<~lXIhr%_Nc)(?zR#}s@G@0VG!Gm zQ}+h)q`K49=jWRO4PE5)WT_{I?z76wQ-7EU*DJC+E|Tm@PfvEL_u|lCWsy@A{M;O{ z;?77{C&sn}tgte&)PH6pn<2#|Pe0h&ndgK!O3{;fh&P-Lej6-HA>NWOdBfM!+s$1nf z`<7K^ftq*A+kr9*Qr(&A_oIVyvTK1;&DFAhwyA8$(}-r_IO25Ib5;h^-KkFX_T7hq z>GK?{o~`3h_&q_pb(xmU3Qtp~{}2@7)&4Q>1X`HjR*Uu``O7(RgnIA02O@h%Mlu~N z0%vOIX38pYLDXQ)~vKfaIry6+?$#f^vospbC zWC|+cEk%n~U@2ZL8+#;BN0I{{_~ub$)1vewcgDP=bd`D-6`3DV-;H_C;wGr$)8I@M zGc%mdROLMSr@)9N5TK?VYzvr8chRJ>_3c)#yiWC7`}={mx)Rm=@knMS+=+>*DIu7M zudntYY1c_t$Ik`xd7VymEacBt3Tyk-2Vu=9xl#$k%N8M zKL-nBCeifOmrb5TH8S#pKv9n>c?*HcF-%OV`tDnkM6RqKVNJVkQ3FCg3XETRk}GwAx=kxg!6YXE=+J6pYG-Gt+G$uK z&bybp6w?m0R(%{;&$BZ;dOiQ|^=Qn@#5w0t??YN)EPLHd#n)BH##iRK)eUt>rd3{5 zJ@8l{Ym7U7fl42btQ(Q4W_Vytm%WZG^+-OVagzr;S!qek!;K%JGNEOvBR!qSX=#k@ zo7E2=k?eQTER}i;$s{GJA(K7{*v`cEbI&50oN-xd$}%L=e0Cm>y5%IY>EFT(SGrRz z2>nYSLrMmdm8@mr!yNXh_j^%H6L%h=DVcJIfIlhgUYt z2x^ku?ggA9{+flx6rRT>^wx)nX5faY<3Dt!Inz~fx08YPC%Qady>X*3vphco`TI^e zMa$E>lTI69&y0m-en*bHNgav{7IUX^M0opcWaE~T@CB#Wt2`?{3s`ou8LCBErdf~a zxV>5nW*k$UIyitt+NE)xuzV^G{=#s1Hr-$6#nMu&7xQM z=hm3Ts=j4#CMCte#ysvrWK-E}HMqy=Kq5ZwR0mwhCUKtnayf$W$T%klBd2<|IG8=z z&DxAy6_hib9Flxng2j{6#n3M-Tj_cgKe+&bUOkkgo_pEonZ!7i{J@z&M`9cv#t-242=zXuxUap`^9?nHqnKBkV>!I^mTh+7T% z?O97dQSD8D^BZ9+z>{upqVZSh)@XYy1BL+sVESzmh5>#+;3Mnxla zY`)|?7f9w9%Dy&F#b=^26{PD)_}#O}rh)`wefmqmhAPe6U>lkt=7s7D9i|0?enVAt-&HG zPNH+(uAuBrC7_4a2W4j#QGNIUWG2z=R*%p6ht>6TT)NT?AEG8s|JDjKOPNk}>epXe9i6VSe*b~O6~XpwW%mQ$-`l(Mk`o1vvtV3`3wT{^;lAts@1${B!{XLMzlTryk(t^ zpS}f_jO;I}_uYn1CdQuAtLJIVb8`I47 z$YvHDI_$949@Q$-k;jtsAu&%YjHkylBy8j}Uka9*=S;V@0b6OXls8FV+3zYt;jc-D zlM&59Il)aV*nSY%A1#Ro-<=MMQ*r&XqrSH~z&do0;U|KGa1K@6nrvGz4?7;^}C2 ztuSqHb3n61|42EK7pN$mY~(}-aZz=a=ln-29_TEu&3)1->XX)$FP^NVR=Xa6Kyd}ES z;1QRtwLL}sbuk>h)7P1wml|F#jYTWqO>OavQGOGmzj(5G`V%DMip;dbvp@~)Fh%uE zVD8uOW`ezG+&k3$zy2wZ-I=OiEL5V_7+2u{YtA7#TIcT{joElRqNy?6r5tHUG*dsd zj$af^VR|kt4odSJYTv6tDZ!;?{9sDy>-(Tl|2AtqP1QaEi!lfH$f76U{r>D#3tvVu zO^=4koq$R5N`eUUf)D%472Gu;t~ zzrTAm3e(YKM*7jw!F26MS?cLKgXvzDYVAG2bca{p;?_RenaFU8QvWr)(LDA%={>QlNH}`~RU-5X< zdCi)!@(?3sVo>xl-Wwh;g^0K6!I)rDBFm#^ETU;rIx#}e<(@}TF_+BF6t(3j0(A_# zNsa#&*7PObt+rh@3Ac?<*OmRp_z6B--3HHYI7dD80~}@uk6B7Z%RzrP(~>zoym2kE z-}h7W234vR{%sAJ>D%6^;V9xnvs-J-SjDkBs(*LN1b&CQ@w#Aorh_ZNd@VCpBS#KX zR}XD_+QXv4GhsFU{@-j$xEe(eYr(JR1{hiGc&vBRl;{YEgEsZ>YH zgUOsgQ`LrAWYZy@ne@2+es5=iB!}9(57CTIs_Oj}0!`KL;(P73Kt*~=RB9h&({yeY zs^jWlG8f~$@1s?lSE@`6-nrn*zhW~jNzGi;W^bY!9qfxp!mOiLZ9-c0s^ zE*c(B^jof_2^wKZcgYgRp@TyAX*0$>?w_iXJ8Ly>ufJ%8akMn2hkf3wE{sD(rzg*v zquw76Z#p@9>SPsqFQSPMj?~3^KV+@ke_t?<*O94OCLl6^))}I5q7fYWrZXTWm@%2j zKL1cqOk{CfGaHeHI;?_&A4M{~Pxn~!^z<`mXqG-q{A-cPYxej*-(?TXOq_!ky=j@q zrfi&(OCqnuT4Mrsynde08)KJTiSA?_$}EjP4rF+&ZLUkEG^d+`i^*h$oRbm*+@%z^ zA!~oIRAwqCxp|d;P*)#tl`IvjMU%Y1nU<~`owCQxVJ^8-PIU9omD(vk;^sq!SIcG= zc!71LrfN5?>C7}wpQPXHloRzzKBr~V?gVQ#u3a2##XP-mzSOd@8t?R^^A&=*t@=YS zQ@oC?Nx8^oXgLY%_sD-0qA_+eliYfLcsW=^-}%&(6G55NOM>dz5tP}Qz3RH3kO^fO z@#=}6VVU9#UX<>;DkR{y-VD_v8p+h6FC1R=&%r3js-Hc6GYsAgFLx|;$Bl?Mpwq#F zO3pJTpEt>+zFP`!#@(xfdftZ4a%@~Db_T^c55=m;!-%HJcn*5pAKOsScUMXpR}&q{ zUeg+)>Jq~l*NJh=C!l8;l^H5WqDOyJwX&P~37l!_kGiJ6PE(0ks!w{_agbfhy?6kko`;W_#q^bvfZms}jRYyR&sbY}Ft*7@ehUxJyqC3D`z*=Bu)&Q$8r+w9=e z$oNLGez{%wcO&R`T=mjd2vj$NK+yXVyfMuO9Ad>)KcS#$-1c=$J@g+$6H~no_3MOC z%Yh%^fc+UPM)W7_m;M2--*EsTBc~6d$Hlo zU_R`c=j0q@rJ8=so;qQ+k^4sWM8~8}ljV zLyDy_9p)UsspqxLjGJ|n=#t5i%mam2UGaCE zrq4@eqqJoIJX$X#mclg8t6hm}m@eYalB|2sB@5x)%K-h-X_e>81ZAHv zb#Ik4V>Le`B~87gEg4_Iax$^y4XrUbcz{b*JG6XFWE4JYU2@FTs77l|b=D5yVVvLT zEIChK4322ocu^(~qVH=l;Oq`hnmXE{5(LZ4LtV;&-a)15*k-_xG%Tt*;c^Vl9;0;^7el}P*%{5P5`<+S3;Jx5C zKf#+0@*>W;iSZ2k8J%g5b}Tgi`VSgoJjG3n9os9Svy{GGlc!-YI)A^d*Oo7DMPUlX z>A*bUA!N4F`D*S=INX2sRQ1HuurxE><52y2bqf|?mei^d$fh0UcLb_eYEWZZtF}Fh z22(?asowdp#&kEQMJGJnwn* zC9zAMIG290vK0O!lslCtzYMzGB^PTY)2}d=oQny!#GWq3y*#t)wsleQ0|!?^pgvQO zuW&rfcuX!cj2ylaRGsdGnXz;?&yM=7($bkcZVnBqUgt3~7fa?w%f@>aa3^f;OeT^n zP;YjXc}72`WivAR9gojKTZ82jx!7E%MSqj@!EH0{Rk zR-!gBI*V)5E+jLuX&FnF=zH#lhM+3Z>9igf|M4y=Gf=z}sGmL!ihNpBJ;y`_Qu&Uf zuAhd8Z&}|GPWS~Hkf5?>lS0)F>&GuQB_NsM()Fkh7X@YgPO2VRYV3H@d9eJywf>@3 zAb9zYNUWLPspGH11wF%hYp&gb$_#6QJ0ngz6bj=SI)EnV&~E7huBeX-+Rua8_%P0e zh6{*mhS!(c`D2zKMaCvAv5;VbRd(_lFA?tW% zeX#%Z3nYEQ&_}tC)Kw;~ayfWsbPux?FdXtaJejO->TAmoD4$`ygn#lClBsOQL$j47 zn)W}Y&!d)X+RIj$l1d!0RQ}zO&6#Vvr)#zZXFRHe|5hbidxKRnFnNiIuJ{Z-9a&1# z0O#U(BbpsK(c0szt;ot`nl;bmNjABRrw~FD*ebi^NOf}jv1GsJaHaA$+Z6HYw}OfDXB(dx|ZS1U~!$+S&V`Qm-sAoRx6ld zr}c}li&`>+H$@*^E_KS&bq$tmikm9`-D!_^2*T6ge_+APQ5tI_MT^FuH;LEVnoil1 z!CdU`lyRC=b;H-DIYj#VlT?hBjIntcF7=cajWL!L*O5*+NxuVb{XUqUsCT8bOF@|* z#;{O-4$6F~_o!7NQI@akk98^}6v2#e9COZVT+3=S96j{=OySMZ7O%gr-taGYmWXxI zslTVEHGbxjm^>DVBKl_s#gpNUL30;Xr_+KOkNJU8>UE!%7#8Bh|$tZC6d4hYuF1Mq`}FxrkM!*L)Y#&c!)>sv%)s+-={ICnUWzjrKjPq#WeS>xret-NGnzCf&kmlh#?xH!){3xQYDA-t{Nw|;QtyMFRwJXSQqdR4BT9n;uUBAS!`jn0%yJo*B=ywkY0V$GSQGr6X68Q`%jPvv%D~sX!<`BqbC~Nt zb(Tob$FzTR%G8;q`fi{hW_5d6y&rapNo+&|ULsX@i-)!(m8&Jw=_KwPYHz2^AtqUU z?$4d*)Xzw^mxGo$(5PV>5m}WF5i?RWk69P)h_`5orR>oU9;Y^$Y_s`M(v?@>{S!zi zajKKAqA(5MD3hcPG$NbEr*rb_-m6c*FA|d-dee8Snvv*C7B*n@Kfgz$X7gtgu^uE- zfpy$ez7iCE=g!dYTk6|gXafGoOC~;3cNi5Wd9BtPXT26pj_K|=>z%|>nRY%lZTj@C zSsvq)f{V?}%3`XW}7p>f%7w+1dmT*N{L5OlzI<=6OvPGYD+y+}^utSn&&ZJLzKEm8zXLoq%B} zyBzVkPwI8vrAiK$)+4#4GF7CQr@Stic(hogUsWuX8KP8ITwHQjL;NDx8LZM6(w~so zytx6ft8T!ug(msmy53c=}4E7JOae?~z9jD&ecHns8h z4xF31WbkI-Oj{{I=Hkm*j*;)RE2-_zPWX?@ZSp=>Dul z)2t-@dzUkvGWVxU^;4(J*Mw9Rek9le{XmeVuI*H?d^wT-s#BqVV3=hTrV9F%s)p+f z{;}a)!ld5XS;XtmZvb~_h3SNML6(ZwqOp*utN2;V#2G&D)1&`%W?sPehbiv`&2uQ> zR-tTv3KnNaUZ8TdY^=v6JJe$zA(}zu>uh?my7SYZB3WNw)wQFpwqhuAkf7clqAbDs zqla5OeFH7iuT*OO0c6v+1ed;%sj94%#Cy28=;dYXah*%x&|zd_FUAqah1<$t7Uj$gM}l6udNEPT zRHBdJR**qsmT9~r&BZ5RlgR{06aB29MmA&KnH-1LS(Z#D_H<634!r_{)}3Cg{a1aw z3F*=4QjT8&XZpieuw0Ux$3cHN(|A`32RN5r9ad4()CDfKL@Q7STc4^we+F7_=GtwA z333=q)=%8{5trYR|0u@!LEpr1vM%=Z0@aV}|DVci13`ntt3>(~7)3K0d_+rg2P-kn z<&&1z6=;kZMLua+krQJ^K?ufEtqRQqc^!*_P4EX-0&uGJqlvEBSYo)(e`)kZ+GtEC z^;b1caXRK$`HVSErA(7Pd00x*RGxr2I{HK?ujLAkcc^`Qw=D+0GQ>Q)BHJ#sA_=}stna2|Gnha)?z52acpQ^1o zjL|s#lm5m|A51eJfh>)`6dRFs74MRZKUd%wteQI8I)Nibi(j0=gDQuw)=sPZ$&S8f z9n>0AF9$)M6FbX4!Y>Nm>P$}2yV!rp4DZ`I-SkZ#X`Skw&H|j1_zO!d`@;cmQv|g? zcjkCRe|G&_ryQq0^II|}!f7)eQ0H};nNa;ZNX|Lxf>s4aQy+4!bShZTxJLgYpykYRR4+dPd5LbHXpYCC-9%`gD(7W zyX`Bx`Sa=A5c6l#{V(p1j_j72lKymR;)3`-r}sxoy z1^hUXKWP4Iqkm5GV@2AVMPBJHzSqDOuzVne`-$X$aRbFS8pMFfz&8p#c!PY!&~cB4 z$k?eNa$ltQ#wUe{EuSXXY!b3c#9k~Cwq7K3jY!01k?@To-M5Q$+ahvRl}Kc%NYq}D z9xWn0K{R~tLn6Hnid@|;(&vbXIwo?>QITs;iu64p((ep^JKiC3{dtl8=R^ix5*ct& zWbhS{L6=1wcXbyh?To)KTH?O#CeE9CN`fz1<{j@TiQo1VS6UBAy1j=i__l}4Ki)%9 zrbbEffGBbI>LaPiQ8I;or1iL3yyaJm=j*E_+CpU;K181HE>cd}5$IRK{|~fyREVsBZ#ym$i_L7%(qDYj85^GyTT$2? zC$fBCh_v;xOFR9_9YOx|5GlzCmqXDRX)ZzU^#?v_E5)S=cIP-X4cO z1roqXo!5n(zlBO}Z=0M!o{#=K^a=>Op6){H7^Ak=L#3U3ZQX3rh+Ik=3nN^%LIS0e7@n{bXliZo~F(ZP(Lccd$3Pwjr z;fM&yn;^0p8{1fSfO^_F`?4k8ay&{N)Y&g>s*ee-%3Ii+_W!?rxK_(6+Or-)xulq?OR` zyfBf?@Sl@b1ubu2E$MR9+W>9*nfwQA(gJ@->+?gTH+{)D6C&SGrtn;d{7H{xDdS7} zPv|O;kLkP{S<5FwWMvC!H_)~rK(FW9S-Zs2LVUp&%h=8l`JQsi>EBBH;0$F?g7>kL zlTDt5tfwK^*vb0O4w0kiOL~XMdB!tmq)lqCh-@RKR80txGw4c)Nmrnkz#k#+ z$QAOmeGnmS#M!U`_(d`KriV*=DE^)XO^4nWA+L<=DQ)Eu(q0)M1u7zx6FZv=e;KT{@(Hs)oq=RtE~@SQC-xek5F720|fy1kup4yKI_G zoV0x>Z7P|=-mo2?1UVY?#RsYu+GGKFwrabp7)R=v3`!X9xzJeP1UV_>2eo1N9(9gJ zmrp;EDOXN?>)}^{s_u;APTI{{_JQT?tT*%xparxw6DQcGi-`wq1GF!5J~+gjvToYO zf+YA!c3B=n>?MEU3FdAjb<@Wj{3w^YJk)uNw03Y(+r%e}7>}Y3(?)&9Xv@RS!!{|I z93pvR*u%+pUg!NA^ETBkDYX9_W4{<%`S2Btf$rla@+?JHa+JA7p8?v6LM0~y+aO1$ zHDF&trG2(tu8_YFT^{RX6*LTgjsk5TL`&PP^hd&^{mC%-nz}B4wwI&X=P2KXFOMY; z=Z5xIp&$2@_QgimeiwPpFz;YF$ORu$HV>-L028oZT*@ADg7G^NA!Cqdf@!+m-`M05 zd3`5o>p7A8b-cVn+uM;Z>oJTF7x^5Zm_D}MY?Cbbg`k7>>+*kv`sR|?iQm|0g99Gl z^Gy>OMZW%w)eiFLv&pUScYrzQiJQKIoQoc!&fJ0ejKcoiFM?P|Jj0&~M-UI!M96ov zZ-onAIuR~K(BexGGL16Nvv;pN!G4atAIUQUd;f;6*@B%3*a3PSAEAz1>>P!j043`= zALK`fJ`=RPz`UOdliG8%opFxE?(cyE%mcq*uhbbK9h7U^6C&lY5poVbg|v%M8?m}O zxU6|}B~{!Tpeu)mON6E|BHL+KZVF?xku1tB1CgL8it%Kgx8d_^u>BmmyBN=s;Wil$-8qMJ^m-i~gjOk=Jdb`9_6pGD zlYc1u2(XR0@kLP|K2SJ{@tnq-^)q|(7}BFDw}$*xGa2`f@#7zDQa(Xs0%?KuoO3=z zULwzCu#&c&$8SpDa}KfoS?k;3bK$2^Chtgy^=yNYFnW;d6zs3$+N9g z2ZlJUev5USp{R3V59`^pqaM?ncGdj-*#u$D(C^=95`^e8(XfN_X zkkcLihg$j%>{3n)xQ2G=I&?g3dxm-w!sQ~i27sgFT{FTaSG4W+!EyrmU-0Lk9oo*@ z7+Rlb&;OkmRVrvXMG+<_?yXIfJEe!eIK(yd|+W#0-|4(!nyEx|VMzoxCfL4w9RZ z3mXQrH<7*(Fiy)`kn`b-2g^n5>2uaA^tqVy%gB094QDQBiq99t{9NIF4_|U(u#{vC zk`m^w2kBL#2g^3cAlXKJTPUAL-fhql^uMOwD(vSiL^pJ>)Ux5$Qs;WoJLpdz%J0_m zUC(^)93T&)ucbeaLSIGSkN)=tBfwDL+e`mJ?oh^&Gg}_xG75b`F>48*C}Is4pR>sc zY(IB}aXUktr#;=t^ZZH5Fy?EBAvxsrX>5T9m#OPNK#xIh`dGZ)Ca>g>)=oj7-&)$g zWnG-%je_)?^Q_xa9T!=jPjU|*$eRV_%P)%zCH*G&C+R~R7=f(MU!$NUtec^vjnOt{ zQvQ3&e_)f5==Aw%Ci?${p8&mA>nJw|ei|se$a($*(C7L!tXp*H*k5^>HP89Ja4&lJ zF_ddd;GKYVzmmKzUG^w%0c%J<87^xskzYck{cGZalm0MPzoX1T@-4yMWL^F`#&QgE zjxGzdJ&CR1BDXQ0djHjD{*o~vG79-OU<-M+&A~2j1VyZ+l9?g09o<}PEl-J(ZJR~P zs?bAgqsYhpc#O1d4d}rUT_^D)=U9ZirFqi-44uI6$3v@%MO;u{J9(JLwn=(_A0&B~ zxEtyGk0VcKK3Us2SD0sPCh4&uZz{B{+aOtak(j`FsGalBOzal326Do}Wg0y0>ze~y zU^(;P8%i4e_vtZ!AA`PFj|1(>VSjZ~XU-MuG431DuK~5dH?g1f^LD@p?)+f*(1p^uX_9E>|+&fN&S?j8=9_Leb=^(v`G0BOcEnp^P&!Kw^ zx@Js-{FZ!0ha#jDc?D;di=@3m`wP!R$U@qj!~X39catyoGH(gI2k0}fk2eJE3k4nF z@(0T1;fuxOU%{UL+(yn1S;(8irHZk8eh%k{lnA+u?KU5A<_5d$myjO8c{* zdD!u-V!W8+Hhio7Y^c~sTY zaxgwooRQ%V6V|Thd{N~h^GL>b9BXum3%is|1DcMXc@O2AiyCVgDVWq#1hy z_53ep{T}C?0DsVDt}kdy?iI@S6ZsYCh4{@7o$oiSi`zrx@1+0HrQ}9*E7@P#d4FkR z{nt*j$xxc1-y`%p(CZtOczLFqf-dh8{l1y3!G=tcopU)#4K5&<0z zj#5|cdXeFjt%L3Yqv7uY_knxBZ6JfZWwduV^kv#Va42s^u_1ClY5m|8a13EAl10`z zqvS=g%d5=u=2Dw%Dke^Ymp8Hwp>0FhcSC90eb6VN3|c#Hv2D!}lCLYWaVL%9ofvyf zl*tbhSqMJ|wC3X*BX|oK-cy{&xkGzO{s@tJ>d7Zoq@b(g9=t2Rr_>&?$xhx5c5^pw z#CIA%J;-qqgTQhJeyQ)(T$J!BrtYdNtPew}vx{Z8CXn`IAwO}0e?uGs_-6jX2Yp$c>RosdC%o^v~uW{r{*hjyGv1-8AMv}gpIrY(p9QxCCGwYdt z_}Y;#M_co>ocZ%*FbB2Ji_l{>*-m+mAwE6NweUXXT<`t4*&=I6U)Mp~7_W__aXj(q zwpJfr+}s*uNM9^c3%Z~kbZKFW1)J|=B5n829oIvvpEw8`3+ zM&O^{5=(a4WIAIum$W&cMDy5l0w1wEhq_YWGqg_S|6#7q!M7d9&expDwO-IuXFK1O+IdrGpN;(_>Q3f-NxFlzwr(S_<-k1%wCe=@ zrmgJ>XVF#R%OKy$9Bzi6i4LsmATDv%*Rf&kA@1>%KZDJpp~fH2>-w)_4vu55 zUhsR`%b8&#W0T(<9rO};u0T&h!$1^JpgXXED#olgv>#~O!+PQDp*K|h=A!+7AbM>p zA3v~>ce@@J_Wm3;INfi3FYb;$e-3Z09T6F17z| z*69O9rlBjV(MQznRzF#URz^GTSGo;{b6^bzx-tMTm0o?boq0P-$i^E zd)u{Mw;#lkHcqFfkj|KDUs+yilPKMO`m5{j4Ic)ElCSMBLm z@LaKcN5KiLrw_V69q_}o{X@vd82M1z$FZKsvvw|T#9Bwb_B8xFJxtcw#>yJfvOxRI_y}|r z(DSq&nhi!%&j<~zA8y*z_9Oj#n>EPV+eN!_*w=llkl$vESkL)0$u~`(TkzLg!Cm+q zd3~<&k^`n;Z!&1VYpk@*9xFLV$I9Bx)5@rp1%3cGY%Jdk16FFDstX-`g#L4X-5ol(dF@yfAL<4g`WoU58cbT zjPzdMd%X`Z&-g-nGy6Pm{cWf8eoxzzpnQwfclGwg=y~&R=k34kEc7tv-Kru>@pMEbAT^woaAS7FQC0689)8aZvg>A}osWLzOt@FFkzJ(cK%G$EaP%YY zm9=Xahis7>$-9pA-*%k2$a&H?&@UZJ*3#D_n?GhgIe!aCuz?aVgl*sbe?l! z>Y+ZJ{}A~uYCYrE4u9Ei=NNqL!7$m$e%b=R3)*lZOzPX&t7*HAA$!@Yx4^Fl`xyIn za4mhaVIvIs5pr9RU9N&}KQ$g-$LCo$Y3Nsj3FxzdkAAj)O!^n&WjS(NGyIgP$2os8u4C_&BKoKIAC7&#BF5t$XeQA9yB@0J+bht54%RUJ ze?X_vjxCz@;vUAF6^+2Jh(9q}2i5J|2A@BYJIU*`CzzkCC^H@4G1e4=eE zXRguM;%r>N`Hr8W_=<=L1<-Ai!Xys*+y^j+!kt==V;UOq%TB%z8~!*PNc)H;_hCA&6oL(z42Dw!q9Q{@U>01GalIY zkKc>hj=#Nin%8>lHNqEO;&%xj^RC5y$~DAyfo<<$4Q+q8wq z`VaU9iLUNch?G%g8*O;;xJUxJ)`KDopf5o^&@A8sdi}ID-z%*g*q=jgUfe@k;G4eh zA^T?ckUh{B(KmwKdwNI{w1FR@?0TIyir0I{7UbIdLZlkMuVIbv3FG_aRK63TuL0$g z7+1#NQ}}JrHyFdxB79|Zi0mF0A}`X;2I$7Q-1TF)^Rvb_xp)JBz6$=BZ4R|UF>gn$sgDAdm{X5GkIUV#G6a4O{y33&PKhhVBaj&hZD z35OQ^$eYU7A#$qTF44#}*sFelIT=fS<|`82OVoMlIB_H|L^fb!zx0sJDI%{r_^ll2 zwfIxr!~AxH{C%}e;^8UAr4GN>`|B`tfpq@WFkgQ#2h|8D^L}iX`tNCzZp-U-xy`TJNcr8Q@jfSaqPrWczz=u-hV{mHjz^vd>PVmB zr@x23%9w2iG4Qj2{)Qe0O#$iP58#i}*iWIwAQ8L(UIHh;pTSq)6nG6R=PZ@YNf?>FCDz(yoi>_|2BAVy-X2cL=_Y51d}tOZ5HYrDDzy*APb*6JsS* zrrgc>O16xh0ven1n!e2@yZ6|-ibi@^42Hdz9#+%B>d znhOd*-Eh`0bTxPp?B>4LIEMG5Q8w9tyb0{R%sHSp???UcY2>Y-3{(K$PWGN6`Ubz8 zeJXD@vGU1K*@c|P_i*1y_S2D!Cvm71-)chN3JO_E1+Rum9eeMa$nR)9{cRl(CXJj4 z8cu}Bd+7GD2RB^|kxh?(WXn6nyCG+kR?=Fq+YDOeIw@uCmPLig`{d6XaGiVz{REUy zu9AJc3RIJ)rXTVf*W0W9*56)1y*q|eetJJ?o!VbMC4Va~!1bs4+Z#R}Xm7lIko^L4 z_8Iz)*Lm-}tH1p;F{?>9S0)Uww|+mszF&IT*Mxp#d*O>8Y^NsK4h>`aQ{7pUAg>Ev0tZ%6oXpga~Q&)Vlh1p; zk9B@sh}=d!cYr&=-C#VJ2=?%fzc(X7nzAEgAG8^{g?EKkz76aLJGY1PeJ)&fWkpCG zsOMq10W{8yklnFR#= zK>rko1Bsw)4r73RKKu?FeH$4rsqh|9b3Q_WXf+lpW;7#xjcn@?`+T?xc>1V>_L+B^q zQ*dE+xO@iv9JJQ=moK4Tfp0)T(f}zWrhf?T?9k`ts-Yr7z#OB>#JeUZkfCs@0FbiCGIYJ(V zKB4&{e7e{!Pr=85R$qTfgf^mU0Q2GZIT>S+3hx2?Ib*aUw=j3jVE4FiX#@?RerUMV zjb^QYTKslr_i)*dFSpJNmlkLD#}%Y9XYIrjrjbUa4Cm|lV=fF4N49Tloz3;&@!+AzCc+2oUbZI43Zt7a?Bv9 z0@d&}!v{$n{3i0f2DXAS(8%Ld1E_%CcVdt<@%U8@zYFYl4VG5NU}=Fi9~>m*7YE69 zPyu#;%F);#M}BC{$U)LbnI_N*-URP}_rUw$L$HU7_+BpCP0)Q@jGMVYx2&NKuz%HH z*%{041Lh8rUC_EKgQcE)4dicx?&e}&k~LU9!Oo}PGw?YmYZ)x1;7j-&8;PCRDkx%4 zpH57m-_=8DqaKSKF45D71t510{Rw>J6v|D zaM>s;6X40bY1_mke>igfkMvi1y6=b-t+{Mcyq=ItQU>DH8 z9dCsCp!$2o9@g_-(A1sz1;O|XzaQ*8#y8%RcG(53b20wJ;|9M}qyAh)2jR z#F1ubG`c>ZFX#^jfnnfAa3OD?+yd?3TzDJw^v|I@HZpDl&H3dHbmbR`MW7;z*c3*L z0#)6KS;WH{;^3X+xf@vPuUCkSho1VumT)-PfD|G)k&L1(v z(pX~ZTw*J<9=Tx@F&FFxCH$6Q26kqFN5K=IteD?FtcsAQ;CJx*#R}v&_(V{H-D>2j z9L^8W`RM98IA1_Lpz%z&EP^fp4Hs!w3+-Yomm=o^Yu{#VHm_$4iT~vT>HlzZM%e+a zq~BHaqnh^C(AEOVtN^RQi(muD-OHK0mUnmPp7Z!$KK{1~|ARJfX8l83k@rJ)Vy`xe z{xhz1XYfDfy#d+?b{}K?mxjqEY`zAzf(lT!B}_^i!lW90$B{6pxEv-0{HU&Q0DJOK z*2Wmt$SBsz(J!}n!{$UDgIf!0wWvVRrXU?wOe6W9luK?`UF`!DsBo&4^gc5<}r;?cMcT0fWHEYQD3up5-mr2aY7PreV78UqC5*#bo*w!|yJp>Ng3IYUG;H^fQ)zqA#Dz`g7o;{H@;( z{IoI!->k()K@G^`eb#;-=ME4FqCp?f7xV{v7P9_;wa%f<$5?+SS%1*|?X16}?0;Yv zr~~ygS%07r>;{9dQFqQR!=SCcP~PY4awGgLz=|Ke?XnNthI|KTU4vcFY~wd4;7)XR zgA(q*ef6o>=!K`QWoJ^94nV&d;+*4KLGzn1w2ty$bZz)t+3_6+B*4$fbfiNEKF zzb9Fr$B4fz?ET-ewy<;hnrK-94csf2!sl|X*KbILU-Nf%$OT{pSPd>b6D==7H-Jqb z|H*854Z0PaPT+SD(2jcUe$WfA zzAfxw?62>`e+c$-XK1Zr?*h#Y{H`AVyTI?;0{++fWQcr1zJed|w`AU5;XebPgD=5X z;Pj+W`3BmtlKXpUxO@ly1NaI20?N>pV%t7}d1Eis_myz?NKivtsz`FE#WdAI}<@(9`B?t=-;%^f5kfpx*gC8P|%I>d={U~rfmypV-{_-h06@` z)K%f9&__YzcJ=`1Q=kETQwn}b9Ew9u1g-ev{?YiU6F-gR{K49{)<1Lys03BSfa=Yx zk@ehRDU%93U=dgXmV#VR0QQh)FL|4`uuehqLe@WM1^dCy^Q`|%tbb58rTXdhSR=rv~MKs8%X;U?aL;gP6O2-@4jdG z!3z74|9rU%UF+A+N)xnTYBq0jvkB@7b_~C_Mt2AJf&Jl5XvfZFw1xa1KPz`5-w4Kof<5@#$HY?jDd0gc z1Iz-Cf(t{Ql_#Ke#o6){G!7(!`Ji!CHur~YNrm4xd8sr35BwsqAHEf|#4hD5vXsA7 z$(AMPmx5eS0Lo&rr4+1y-;uvmD!^*^7eNhtHKX13)F%7OG~BU z@=|Fe&u&mcdz!G*3f=_ofU<_AQtDhL@4@d_PydUT%KPvif*RUYO@FHxgG%rTI^F+I zp`U@qLpJ#w`Xy*MieDbVFTq#H-+)&9b3gl2GrAT~elnZ>XVd>|-e0n%lC@Q3!~Z#_ z>a*&1l=%Vt1bzY5JKp`o!QQMhu%|cczaQ&=DC-|IA7fuWNgRXjXC3XFrhnti`k%x4 z2lX+m|52=eup6}UzErX>+xiBf_y4kdyF_9?8kDYKT^6%0K>^=&3wy9UC`Q{eHiG!^{=4bYOo75g0c+uzsc;W@H=X` ze{YG9=Z~aFD{{?6?%!*;e{baejeh;f3|V(DU0!NWlMN%ivSx0QybzltYfq%hi!mjh4Ny*Y+2i~L_Q?#!VBGH17q{jOnzTC zH%r!c&yr0G!{n8gF!_Y^Pr+xPgWp5E${WyW{#}kO`K(uTCHz=&E5H9PwRPj~cKQ33 z?cJrUrMqk|RZ`BknjN6xiju0my`{3Yx76@mr+P~-spSt_cb*w2b?5rYF57j|V7o!; zv-?TjHOw=x{(fRT{zIJnih2qj=5MqUM81Ll4m2HOUQRMk2bm|(Ix1B5gJ#CL1?;L~ zUK^NaXl*I;x|w;s#5}_{tY>aq5xkpmjsn|3HK+lVK>vNk57hA!_yyP}v4??35Dof( z^4?*xeIRcPW5Q$ys2mw4Rg7Qtv@ofe5GH-e(;o~1!@!N;7H}K51MHbg{BaR~xZCdo z&D`}{fOSu3;QY~@zcB^7I*31@K7;s^P5goGPC-{2DtSU2;*S#U#NOSY@o1>;5t5q=8TzZcuJ)G?a6z=P;YE``br=qymSK2)B7J_SlULWy6Y#4lo3d6X2i z^^odIylY(HI|pZ@DxmNG9lwc?)A#W$;_(Q6PsMsa$$TB-doVT9ne`9L>ntM0=zBTLxybH95Mm&jYN#m+n-LTg zMwo^X%F3zAs?7SCSwAyV2qA8f?dTcmCKvzVSTg z-D*2aI^HKSyr|9X!^6&mvjxG%M zr1Jnbqq0KAisc`D^x8LEoBZ~lt?Z!wP1hzavC*||E)0Xk6=E1hU=)fl7UPlkSBASk zY~qQ+lQ9+Lf8!d}4?Q1yG0YV1{^gKR9`wmKcvnBBpH`GU|IzZ+tFnpl`w!M1Am`=<@z&y#Lwvw7=cYg;Kwjp#rRL$A^TC|5kC$3P~4xN=@THEkVXfxh`Y90s3wO|4uCJ%AAawuCJ0jfkv3}tJ`3QaJ zKgT`yo(&`iqY%T8eysh+D0<|RbfEl~1))edchqywct_EN>H*sK(NBEL+DT7*>O`^j zeVX=t!oU!7o-+0Q_8aOZjF84yRJ^2IHMqay_`fH$)o1l{lU?kB44DT!fVC?bxk-V=pWbUWV*OcE?tB$ad`;S?xM%Tvyz6)uN7`KytJEvqM(+&q}Pu z8mz+xY{C|_Y46*w%D?OCbUj@+47jg&INo{st$Yt{}3)l0Sg zL;UWy$hVJtkop0B7nl9o=y2WtZ{im6^SYz{=7Yvph1N zF^%*2AZX^BG)>a>k7EPO(Dn~y1JJ9L*BaLuN9_&ge(V~CE9;}(>mmKZJ?D6YKJ@>e z)WH~xLJUJ2JG*_5`|S8m|LdCKz9R4c-&g-@6Q241o=wYaWph6NpKSFkDtZ-&-~EhHlXZ@Z=KO9)H((RCpcG}Oz%Eo*y&P&1#@NYP zWA1fiLO4m*4}UrAam+p(z#$yLF~kr@+YsabBaQ!$G5(J(eof@tWOdZ!j_ZG2Z;t>p zp?S9P|JRNG&o};GZ2W(kICBh=(rH2(9g!55LYCf{GJgRl=%whNe zJgWqm6s{+;KlF}xK9~Ie3MxLToPV=lxJJK@wyjbB6CV$q=t4%`vv=hm4Trsdv-LlD z=bFiu<=zc4E!;}pl*TRG!96@c?<@U7@B2mJ5xx62qeIUx`-h&-6ou~cexXlX|1T&9 z7>s6R===J_3hBcz0#Uh&-oH`wPUWk^J6c2^i=1%wvOWUkt&1ElZXzb5@;&b|S@n?r zO`lGmi4w#wDxd0uy5Udihx%KzCXWh!&k zb5p)@%2~r!_kXebzuf&NTjsm}#qR$!_uv1zKVB4#&1~!WY*E-n?!i7Bz#$yLF=W3| z5Mt!^joJzH{_^F#PZt-LMBe^*i=RV}`T#q~o*#M-$P+k)-j)!~kmqm#myjA@?j4zq zpkE=cq1m|bbuu$jo2Ko`y`fDbZ;HEx&OzF|G1{-k1Hv8Qdw76Hh%MKCp^sj>P+PKD zU;bS04D#*QTtgNv za_v0XC7jX5XSMCQ_59Y8{MNJlShD#lzm-f0r^(jM;@gXs`xnoq+cDzD zi_7P;pZ@o6DR=sFa`K%O&XCc4ndq3x#7Dwpaw=j~=8%yyQ6)Y;TfXM&;}@QVq;NE@ zU$ULz2L&II~ zADZra|GbwiC%u1-@`zS6->{}cI;*h;>yYjqApZlx270G*+<{H>Ey!(De)}oEgOuMf z{X?m^O7jKE$O^=~H@nC^s7mnb*DF`pC!GBhyM4B{eTs6tN;#gd{9aUkBY(GBAC+UG z`9DX7D$74xFN& zL5`k923@FrUHNw%an}{im8qNSy0)@Cm#vOk7|bN+V$mr(xo7s3_t8m^;Fd2Uy( zI}pwF>r&n`!`=U(?mrq%>;FWPx~>^5^c2#LYu%{){|(>&2mG)${QzH9zNC2zcW@66 z@CbeAKQC;1U;cZ)j9)7o!cp5B)q#~e%-JWa-mxA){4m5){F2X^ckq)JLaqEoc}$=V z^@yHD^=|&}-VglW8a^feG{HYjlJ$J=fzla)Q7FP#jK@SwM%zZ??^}((lb!Yx=sL;2 zKFhx*bC3D|(rn~QH=%h4|CisJ;-{z4ie~=rROw8|Oq3u!$uXEk?_95MVQpcUL!XD- zerYc?{=LxnH$QlRxJuzg4x?PCkbl|IWz& zdB>r4g8F}k`X5PpJwEI|ukrtN*nsRel?n3M{G08bKe~2!{<}SYWh?4uZ&<4Qq3L#1 z)|9`;${+c3{>>I?L}PZ*oZnJ<^IT*4WW{IIH@}d^6luAR%ou5tozjlx`W)~+R5-43 zWMSAv?!i7BKetnqxSdn=Gh3}!96^{BlMy7ll?>g z|5@9B!6?KqjKC;F?Qan|7UR+LzPX5G_h;F_Wb@+Z!c=lPW}*b?(aJw&(K`nz|1+Ko zbLjK%Z2sRvw$f1Lf4lOp%)}2X|BcGVMP=lwvO(6LRX?4zK4g&m=?`Dv|BJ8;*>4O8 zE66s_wjCXBdN$~CPcz8UbLxWzwngJ^<$s6rzt;2j+q3z9j;Z*B`}4dySX!&G2J5f^ zQ8|zD-)%A9CgC3Udka~LGE|^)u`yI~5BA{z^7A)G8$YGj4wAPS)*8`|;o1DZho1jX z<^N6X527}@7IpLll83cZ`v-<4QF@3Yi6*4cfh^kivF#7p0=xOYJG6=Zmm#yrrTD+M z`M-{7ddycLTZB{R`M+pI^Ig6zyLo#`J%$re`tLcWTN*#tXER0G6UDWsR{|5G=#t=0dhFR}xj`UktjM}7DCd59CN zOLD#=IEFa7zhd3pZ=VZ2pLY)7CiJ`-#r>Di{d@Jca0jwDfr_ub7IuEkTK`wZnE%br z5chO`;TiflTtEjqqZ3`oAd4I>`Rxj>;W}>O7Ve<;cl?j9y&Ae-HAhA`nt%9!e1tyq zpYPcZe=ZCp2cr9y*xII>?Y2}QzDot8v2$Dt0@qf0`~xRMZ`R1#{* zy5f?MAd|xNWXyTW?pxz;uYheBA&s%f&r5pO{dHXXl#dt}B;uZu!cP|G4$D;mE2P;qcwhhIbdf68>T7v~XzQv~ck5 zXTlGLObLI#YDze8{4-(y(n;a_^CyMxb$>eSyF4N6T{i%>bbDRF_mn*W21}1Q433dxc=4fqm$<9zFrVw#tr|e zdr+9}c^$pac79La&Q;@S)_f&S4h_l6#tdhCG*qv8KGYazjN1&YcF>TpYs%#CcSrwo zD3x{@_O$=!@SUO4!?#CI4ZG)0HSRn<{B6yD^8TnJzETh>j{arXIec;`A2~7X*gG*C zdZQ%#gXd83Q|;IJ>q*mUJ?$>|F7Zi+g~$g z`(MNUW&ucLoYVISsb#zE-;R(UBe+9!7;=TNBh#^ z(6+GH*h_KfSXFH7rI=k<9CGeamTcT!92%tEEWYWTVr_46XjxwzT7}bO<(OAPQaVjY zqXV%?uZAjQ>9w!Fs%?BVoS>gVeY@iuU)7KKs=Daaa7NrYG;e(_Tp*)$kC(_Rh}J<~ zBd??Lq&f(hc6Crn|Dsf!acwDyi^h% zkx~C&UljfXyZ`=kq5p!gZKwNG@%4f*kRDsA{4abi45rueapEYX4@3PpKA8F;p?;_< zR^LnK0Y><36pAnw<1rD{+M1fE9oelN*`eLg-X+MV>jN7eYu{hzU!xf?E7+tNf|&vNM7;D$-hqw)5=2%j~~y_8+?Tvj4PO+5POlceMX( zzQ&F0|E=tQvPC$>Hcz7!&D!hL(%FDb*n;#{?Z5a^dS|1wYn-26foSetRycFixyW7O zD*Fu#d&qr=v1JaBhfpOx->-O|{V#kB`F_RqY`@!2`Wd@F#Rm}I^F{4M^gEi6MhCJu zfm1kxXzqKo2DSHH{^sTo&WXE#@}IJAz28^pDKd>G_CNpg^VaXzXn(7``x#~6sNay= z?_C_~K6}2=T*gFny;Yurxa!$6Le2Ua;hOZW<0fw54({Or9wB?%doaNav{!@Hu7 zr}Mb?ZW_DnqW94KXdS}t@}8yWO;gxh#q83B-dj|&jcX9!&b~k$qIpHn=0AUZNa%~s z^OYf?|Np{H!(bF*7@qn6%RT?ap2>XA#&gcE^B^0Jvi}q8|0?&Vo&R6M|3A$C-){_p zFVOuZe*e$)0ep5~=wTa;kme{9VJymjWlaD%5tA_$)6x5N&)`?)&&=%~W(t>J7Um%) zt_ln2wG-rRocHz%p2II*aIfWUl02eLKTLl9J6}J}=dUH}_`?Y@DO^u(yJ4Tf+b@PP za*p&CVF{LD1==q1|M~qLWaoSQ|A+j4Wa&9HT;>0p6VNo9ufP1o&_br>^Z&_K={Aq! zn@eXk)?gjd;!=F}4fM_((&poCqHjU&9cgcO4zg?Si=k9prG9`ivH~&sE^-g5^c}>P z%h&N2!#?2yNS=I=U&sGD+1#8)ZH z=!X(+b4JR;INoh2px!pVSWABDCofhsu zvrsNd}-*~6E=Mdm&=Jlr9B^>^GOAK(%C(Ep3d1A4yc z`xpF&?pMr55H7?pj6nW=Y15+qfl5@)VJtZj)gwPx|2y0GKHI!bTb0n3CO5M0 z58MCfN5ez*E$!6v?2DJIL-(Jln2wofyC3awWc-8d+|8%o?*1c-92#y||9ji|U$U9c z-$GBFw}$Vc^}lEBGa-#xn1kly)(epN{<*u#4}B4q;E&dCdJfrfo`Gi(tFcDn3-(uW zOf+9{1zCBpe^^PbMsln7=yms6UqHe$i*NLviCg2hb=ZJS*n(2@{>r@~E3gYaA1w@f z$nL*3-=B=;^5*+n+|z7>IgipiB!1_|tkKf`AE8HiOF7>$dgllCCPt4Vchvv9yP2!* z6{@v0H7WIOM!iecCDg;(=cM+zo;=_`Ni-pi4rFlxr_iQNZ%4;z^)9+DtA~+2t6n~- z|D6rcxS4%0S6h!3$EL=qm)QiZs8mm%kz$L`?TYsm%zCy3f=tEC^BwV9kN4@qt z$!v>v&Z6%!HK7?T_m%(0 z$~f8Tf6dE{AxLL3reZpxzWLNfa~|l?nvxEb&}Shx!@D=dId-TAP&LW;jre(p%KZXz z5h@3;gUE#It3$ruPd%_q+<5n+_N;diD}+~~{3qJu@Y}GO{^9$#MtB`IAj>!2LYATo z6%oI*?%%wj-WNU+b_wsnJ{-Uy9KkWf5J&fq*;ZuFr@a$o8Xd^u1Ww@$&fx;8_2btd zj#|_qfh6kDpx$UyXEc$`>W>!s)A>iO>XtS>e)}}GcX8C0#Qx~V{!mv$Yk+g=f@baG zCFi+9S#cS3HQGmPzV{5h@AA=>Td(sQ_v3Fy zvwcQ~dyapAN9aTUMe=WKZXns?UJfP;F$^Ox3Pl);@#tmem%nfP{~O*B;hrz*^CYKY zI%XmoGc6%!VGiaYT02oCegXaI`t3#ZZ1qdw->%=To?`tu;;2R4a^upojfu}UPEFo) z-DPL(=^t%g_t-xA(piT5{GZ>szbA`AyYcJ}vh%7jaWW&EC38!yzc04_et`A&L#)3i zTSgmOH-8{aZ+$GTy(p}Z-b$>-8l?Rw}`8h zPAOT2*mmQov91+k1b*d;d`1)p_NB ze$#KaP%e$^x7h&N!aKq}!5o6$zZ~w-yI(B|y}vMr;43eO2f~lghyMSoeT@dNL6xIg zGLFO$b}3mujh%|}uMY?V9XA+-7={rTg(9@S=N;@;wjV0vWJb7)%&7~qWa9+$f5x%B zi`n2~lxy~H3){E#G9P8Ab%)XzkBO+DJy*|2$UzNH>Z4i~RMu z+M31MY*eZfvOg%$AHy!dEdQ(c<$$mg`SpeB?z*AufC1Wu8gUD$(tIDkW_?q^;%;v3EL=6ly^ixb-9WDWZO4chufK3LO3 z?fZT0`$6qH((h>BxAViuc66*|pRHnZY-M|pS>YTRjje1uXwIc`MRQb-kuk)PMD-B! z!?(JB^UV=Es(r4~KC@Bs;}hEIh8_I4z5KcwejOX8#qTMljW zVi-oD2zh&Rn)kcdesIDS-{nXC+Poe5L_~9LCzDe#9W#;Ne?xo;y>pUsiCOeH$cEjGOR%Cu>LLaE9tAT2J5f^oA6=# zWs7iW6o+zcZpF`B|7Y}53hzNyKmR`R01lyBeRqWHsU8@Pk>&6Eb{*OKGuKHb(S$TQ z5RK_&$rCt*Ge|4rk^Oy+-dU{tP4nG2`X%HZJ08&-fi7GT*Zq6@Dv>>}ek5EcZ{im2 z;2yGHe<3^~`_O-}XNAG&{h0Ba<;o8#Z|EaZrlNhH3jH<=v4eca>K8(VK7?BJKs=-F zIjjys{YG^Vs`=J6i1WFlaq>Fj?umu`f7GMGIUitz^hTiwV=*2R(RSPX|N9@z|KDv) ze!sE#!{#lMIpKyW{Qnuo|N9yLN6SOLJ<|FGS}z)Z-|YN|?|xA`%`ZR9FX!JU>Gka| zhRMz`71J>jC76Xdn1?of0PP3$5ggV3fG%?iGWr>^bM-%j`vuZjge6#pwChX_FAOW_okQhqkUk1}`49Pg8|6(n1PSHCTvK!|14`=A-khdFzqcN~9veL&) zE{Ko%53Z2c5PN+{s2VdQT&LIWWf#1|&Y<5yy*4{3oM4+oYXG0_!&5fjnkK(@e|1>s z`75@@^Z!GL*5BO|*ZV44-L&xJ?yKIF&$rS(Vsf5OQfY>?5usj@_Q z!(bF*7)D?e%G(BnHut)Ht1?b@4pOGo<@x>D=(&ELk9wd{9nnNKJFewT&u6*kvzY%k zzh5Yl&RC4cL`=q1^uEHU_;F!~#{Rqi>P7oYnj`dXVVLf>vZ&qMs@!9ya0#+M(f-c1 zUYDLC)ARLZ&?`PCzuzrjySYAfMAu{2M`pWS-$nbJ?teZU8Ss2Ke04~8xA=wd4^v(U zhpYuUc>AN_2gM%^e}8dkIM6se>>u#4@ckRZ!}n4n!@emW4|`{PJbZV^OW`|jyc90E zkInq&1<|>+jdRFFNNbmukjv1yUptD-VtoY2A&V7$TZumzU)cU+{Jp%|_cHyy29@Im zgmq*U8JlE&8hsPCpcG}Oz%J~;J{-Uy9KkVE)(i;8&KHEEmkYuVSDCjqW_0-5Y4$tw zTO1YpKOS~o{h0BskzogR4H+K(uJL0b>9;2AF_!in-$42H*^l}*^U$#GjUnNC8;973 zQXA~|11ASri&zl8zxxH_TZ61&xBd?mKmFIR(=pMvShsDq2kV%Ez=sY;PkI*qSK06P z;n-#V{P4%&N2$l*pKRJ0>-Rfl|97E!|3miieSNg|L!$AnzVhFAH-Dp#_HJnGz8xC+ zy{~Tlb!dA0zIW?AeGKn~6Vg0|GdPC}xP&XXhU>VA>fHsQW`9A5qqeFb)MW}n!o5j8 zEC}^?3)tVk54Rk52lwy*kI;wy|C{=E^8o8V23Y?wAas&l#vC(bRycRt-sA@hLL-{y z>Z?MFx&A4n>8+?7T@bSW{QEFaT7yxD*wTVfHLV~Fqt`BXZ5Tlxh5EzR*dT#A6p3rr zpFWnHh_w1=u$>9Wc5|~pQ~**URJx7&7t2h zv7ZgX#;9*+-|sg5qr8+UFVm$t6D631IcRfiyJI`Z&IFq##kR??ambwWHn3qDFY2E_ zv)^09Kb;@YDm+gb3s8BUokLdLe=aN`mmxO#xv+vhw{VR_{{QToki@Ljy zjf*`FNz`L}^q-;L+mYVe^Zy$52=7DrH{}s+8~fCMed@nH&%e*}@ALfo`2T$&cfT(* z?0w8We;k_7jF!cH-qAkwU!VG~&zkVZ;ehli>a>r1$V2q>`o|%)`EfWx?_B&ibl@00 zhTK7E?{$vZk3B=zEv|C3>m{3_W2QY0X|e-V#eN?uU&sodK=NW=sCSKZ{p1tXXa6f6m|>F5nWb;2N%@ZMXcpo{s(UpOF8QYioCnHS&*!?mlI-Pye&L zKI}6;MgCnw+BLK~u6gOdhMUq!D^It`JCT0WG5&jx-Z@U%ctC%I+)!yxat?G&>C4Yc zs~r8WKF09m8O5BZ&vE_#TK;eMh4>qNp%&#|RnHhNPQEE$i{*2pe9{LyuIH7$P)K&a z_4vv9zvrI|!-Pj*6pB#(N#p;YEC^%iy3|~Dg9*A z>7Cl>j*KxXdI@q%y*J8QhV0U|*Nj#Mhx7l(JsGEp=JwPnH}%@Dq_Uj-sWoUos9lWdT%~uA=%8C1@jmBXffuIsWgc;4I~;b?q5>W^C? zou1!Vze29Yhy8~&!t1aBTTqHJ^nRM{L{`4Xmmr_6|Jy^)`zHtOF;0)xEXLIb2k3{8 z?|+l8d_MQuqfp#2#1KakO-Q4c9a;Xv0%L^#8ajk~$~{B!1Ww@$nzb3{$Q}PYGF%`p zAsSmwJ>*-^JFD0xj=4s^j@)5sAS1pj!TvYqS~G;7I`ql7Yu%Kn|C@h3LHj$-UUvRd zcG~*C;~()63jOWEw~+7u`@MUAn}2;@+pgX3B)fz&3!k*@(YS2Ga{l=kKK2a$xpZ4b z@~x4+%0Ito{lG!{o=EQ=9^esTjn)seTR%XL`sAbifBG+FhoRp8qP+(a;-kF?28(NU zU4`T@r1d$BAV(qUhsdx0TV;;F@L1&61qgR-HBPb7`2QsNpXMCJ^6&cc^YUjq2cq?V zu@Kv~VjK&0U-$oq1S* zMTni0e|a1j<=?%F*8eRLzYO)l$r(?cQ62e5`;X4{E;}Z_zK6b&Uhz5g>Q~t&6WD*_ zJo94DevEej(f*^W9kT}c^?%`o(4+n6{iS{BKB^rN-}_zT((LCA^zNVQQzEyZ6lJKu zE>s@xANG({_}aRO&>4i|6A}Cb!pzjE%a*Jv!AyAp!I)ugnPd0o7`m7|MwsY zzu=uxe?Fq`{Ji}I{>oehdiu0;T;;ngQ~s6xj`PMG=!22V$WxOn8ELCp^z$v;OOJ%MBm{~QgM`PaAk*AMy9_xacR`PY2+G+I+HhBkhE z`y1XrbfOCxWRXMV@*#g%|2oUgM8n;)(Ft2qy919_bwm!AzyfV zVVEf{I&a0V?Ni*PPIoOO;$|UrL%+ag{Q~pNT@ap!>@@uX1uG7Di_KjziU@87=`Hlyl-QW6+h5#|113n*5*WePjvsz zdNc9Q_A{OoLXmK^rgAJf9uqMcQQu2c2T!GUD(BJMf$8*_$cawS%y{I~Wh_?36DMqeJHKE1?#@6kc;5!N_n9b)Q&4df<7 zYeLE%8*?YOk=yTUE5&a?DaufRU6E{4p1$k(4AK8TQvd&2{r}p{I_+kHOroCu-Y`l3 z|4IG-?1Mj9U*{ZcersnxbX+z+08jVr&mPtXu-||DzQ_6Y;Q$UHTAP=;XulqM=a?sR z3y#ra$W2fuip!8)(_ELhN^O3UY>JMdr^yaf?e+WaC)b@7K7sso%S+vQ`5>!@D}USF zH`JmIiFefhjqWQ>`Og`g!v$Qz6h<9|{6zd(2qqV|6YxeWik_P<(PP@|5CPg76K zRWB@6FCa;;XTRsak2pwu$^Tp-ot0RFXm81imtJCjy8n0G|NHL$Zuft``@h}&M=s<3 z4~l#Mw!h<=yFb|fpG@C)vj6`&X>GtJG^?++kg2Qs&)5T{^fFYSLtdh_%w5{=%nbL< zea#`Z+xi>7??KfLwhtMN$sZsOA-Udrgu1oft2d(Z$=!UVX^rzc;twH!p>36#l|T!U^&e&fpxLzDF16(VFOr&zX<#lb6C3 z;cK{#o4AELs5VAiGr;*#CAx_;K*53>G#hV}Qj+dgG ze}CTkdwcJ9J+%M+ZTs&ZwEsRD##w(q$@+U~HCu1rLQgqY+PPZICurXBawwF}FpR(` zq~CE2is(_A9o;X7vGnoCotCz^jJPi6nJBLE_{(83ITf+9FNf*mOjOb1Rq~a1Ig|*` zLXuv;TRv;#({I(r?`sf$X#5{_^aPT{o||X=;rYMiU+Ev_NMjy$>R+jN*Zz0s*?*Un zv2OM?{$&2|eC2i_e?6y1z1O~nqaO3xd}0I1-6i-sCA9;^IHXW zVGs7<01n{@jvz6~c?mp_<+>U5mO2=|zKzKC3cWmth|{_je1HP&Dqp3eW>K(C!7Z~9g?(YK&}IKK;tG5oLz`d;|H zHGJRrV*c+c{_j@4C7E9*_l7pXe@an?3gqoqc4M^v?JnWG|H}vNc=&<;&+p9~{*eFM z&HqOenjiCj`MfEt%6Q&0UwyJ{-UyM8Bo(4-7}>o#Uj93U=5|eS|sv64^1% zL3X+JV~&aX5MpE;v7@e?Y(kZOgs1xoq=h?>T*a?nE1&D-lg#`7#g2PN{!u5b{P&OO z^~n0q37ofKCX}8WAs<_9emuLoc=cgqtNp+dw-DK))$N=$D{Ytj{DWP zFp(bZ#hqCAq_3}jmG`lq@;JylyVUs>vnk#m5+*xtDyAd*yCGpFS%O)}e@B45kbOg+ zAKGr~kLOoB-ES=V7HaAuzfk^K$p-#%zCT{LSvcC?YmRiH?{Lf`%fDn#eR2_&;OKk( z!w-k+r!n8)nEdwYLtQ3*1y*7;)?mjg<3qF;!8&^Cp!3l;&?|myzs2wSuHlZTkCOc$ zPnqTVHOZXt7RQvLEIJmuum|6{`rksd|4;UpFNW@K4-EUn^=$LJ*g4zTE@e0%?%>*& z!w*ir9RA_-UxY)OM}@;1M}>D!{zdr5hwNwX(cU*lgzq-~xA48Se;)Ro{qwN@{GWyI zzd6hvmBYe;vws%;cJZ%5+3UXwo1}FJJ9m!`6?;D(b{O9)-#;?^-O!hOqwl3~#Bay& zogKr&o`c4|tR?&Q{?Xxk3qKb2&Hh-3`7MsWf5RS_8=nvReWdUE4~HB3GM<&;@!KxxhUy8QRW_}|>Sx_m zzt4oa(XWR337;`W^BHx^E5^TGQI0-k{Oc9{yVlg5|5P|7%`-TM3%GC^5Uucd1 zI?=Vt+<>*l&B@$o^B+d*AF=+R3C(CZsek0G{t>cuvpEqLUkrWH>Hifr6$T@H*D-!C zq<0R`hoO&U7<~kC`d+gA^j!?shk;SzD)+t^ipa5urCto<$%&}iE?vi)egr_1o zTfXG6ZlQdldb|A}5l1cRkU+B0w6VrcYq6D)r2W9Q-sQ-KwaT0KylJ-fPdn7&*wkq4-*9vAh8Uxk-U2lD8xj_gDRLQEuFgqY zpP2o0L0BQ&seO*-;btB>2m3ickKnGh&u^=-29>jigmq*U8Jj;OY@k>CQhWWQf)Kx< zJ~^q)yQ;o%ZTau}xZY~lR5ML|@YiUoo zwkr8&<9BV;{csBBz zS^T6fs+=|;ED?VSmv9BKz1EbD8xXG1YctO8TQJw@H&Or4`0IV+ultR^Vw-wqJIeUk z&AaVo?wDxqT=Wg0JM?>afR6e6|Ji)@<$U+o`S8f$k>C1oGb#gbC<7RbLJULRKG8859S||(RTn^-qUx0^iuN=^-r|%^V`vZPDFDLGOi(-dk~FJ zR36s{;yj~Jgt3U}f2cZd{~vnoAm?`eiS)^+*XAaXK%F-F!~362^?UXQe0EHy&qRea z@jHKF{yu#c=3pKcU=gad*EMUMLw^3f{)Z)wS%wu@iPczxb=ZKm#q7`J>`$`uH2?Q9 z{})+$?l}Lqk^j-o|9FT0ftL0929Vy!{#+~082u*cY(Xi?kkq>UMkrt$tYbih9-K&SKQw(sVnFm$f8Go-3hY$NBVEb-Y+W) z7Uf!eFux&6C(DpDzx> z=%Y}Cw73+;(mPKpf6D!M`b6Y}v%(p&>%4x&X#bxdo7-dkU$p04(L-|7oI?;s;vdE#~to&b; ze?)s7v>cWH8u@oGT2bkKr=`<@EKVRcsKow@CE*mkcBpF;K0`l;`n9gjbtcxk)_x`7 zg1Ae#f@`>ro4AELc-CJ%AcT9uS$1wY9?|>oN8?}0T-`L~PZ>;-^}_xCPwzYiqY%R| z0;5obw)fcE=y->Hi!Qcr23dLz4U6@^Emu~_=J$-fKV*+1O>gZMw_G`qPQ|CR)BkKg zetLfYuFP{`BE3^u9ed3&rcXum?XWB|=-O`I|M0O;InrDMzs*EU+9l*HRMF!X^($Vr z|BvuIL~~~9QFl^4$(}EjgzjIwW{=$x_52H`?po!Trkh;H}dhW`A;7;SIsdSunAjGiZWDS7xtj-o#N1rj)V3v*lX{C zc6%5gd)^)fXc*xA8RGpRn@4+p$dquJY;|1qVefCkJ5uHS&3Grsgm99q-)jGWx`O;3 z!uy=-01n{@jv?RgF26SO~w#+0MIW%J|z!mAwDd?MGC z_y1jIRPHNps5f6{lOpCE*BpBtRrj^4m*oo;Ur-Nx-ME^zwtl&MZk11ZwKVek55D;! z`(m^F3)kbObZ+4e?%@F*p%4APD*vmL|Fz0L*|}BuCo{rXGC%$_lK+pU?edQnq>x4{ zqHhNdl+IujVi?j79dp;7KlIM4@{dvUBINk`S!5RT|1nlv^bMu)7)BKn3> zGzkhRHJB`{&e_H-a{AU)Ty$|M*3$O@Fund1x{&y&ohn3Ck z$|!QF|G4~bTdV(}O8H5szZY9SE6r4+`T;BH<;EdalO59S+%Nsz(w`!IyBH`=$KNJp#r@P`X+v2P7i$# zdfw8v!58Unvku38Z2QC=z#$yLF~m@A9YOZ5hlG6p*fjYmHbyHxi6-Q~|MzA60Q4+Q z;1tf_94_DzuHYK3qnE#5UN^-0AMc@X5BuO2c?b9K0MVT9M`R!RuTobd+FLl%;}B5WI}oFN)p z7%SYi)c8K~sfQZCrT^E zUmj0RMrFd7KN)?CAo?ajeS%MpIj(2A_*!xC$bW9Pf4y*i|LYgj{q$&W!PL?h!z_B| zM%RWp^m)kff3wIqrfU`dcfI^0j#||1HMams)MJ7FEW#2j!wRg#YOFz<`_#UhKSp*Q z=Ks?(QJzx#|ILyAC;wYyMhJ<6HJq+{j+i=fgwEkGX@4-Hl8$-&X;&Z+W^DA==#2vvg#1Kak)$j0s_bUJU zmH&2j`9*g5N&YYD@AF$7lST)!5pUU7lRSkpXj6{bm7@;izrR6dPAi{xl}|M6=Ks#t z{?FIelP$}&|72RYm5k=EoRiK4T*4KkXS1K#1=r}E+oe6%H*V=SkrU1ybdJ4GY>ivu zDwkSwNZv!NpYc-i5vtS;@geec^109_+<&z^UsVSP*O7^f&-uQN{{7p=L6n!?Hv1Pi zrklT3{sUtgKew;3aQ6LzFpTVdK7Ue`XLOWDlK_iV`AL;e4Ln~s?%!7R+dJoNt3b$q=bbpPCYDBSaseJ;r*ScVm- zoT3keEdQi=z~malCRqPFrhiyRf4a_l1APnf>wZof2Rd#Xh^&53`;X`wueGS7ClJjs zi}var=s#s=pUf4A<_v5SE=9%1pUe+vJFoxyvNoz)`;V@@+NQ(WsQu=@y<`5{T{Qd$%`0wI6(IqZ}r~CVsxt0T-Z3Vdtd$2Ex z!y#1PHGhFWALr-SUN!E2`a|m%k2>cB;|z{Fh8W^VFW3H#VFS=RXUiL!qIl#o@`ene zzWxqzm21V3ClDLSo*~bmYSojzgW7)fDHOhdaa?Z&@%8~@&6{QDsP{~dLEMjfBx|FcOOiuwQD=Kr7> zEl9m*{F{H-ip~Mb*JZxGb$~lRt;`NlzQjG9-xGZ^=azHbLAl@Wk?9-Cr@G+*J-=_} z>(ZDki!to52~rL2g6ZoA6;+UbtkS`1Bv>( zY_-So@#&Ys2*-{>5yoOXCSo$$x9WpLwD)!gqP@4fkfTT6I&0kT9=IpX^W6h_YPoxW zR$=>ng{jh*j+rRIEX=_?EWn5Tr$xfcumU@O=bpVanq5#3vOj&v_j?A0mEu=p4c1`; zHem}&QHBcaLbMO!9&#TJ;7}B2EaC`x46!II%{UpY4@{EL{(ntTSiPQ(!e1R2whjHD zJ)R(skKF_DPv@6p=_gSB_0irB_0K`~<{kHFp?iZIDyI~MC5}0TGdPFXQv3feED9Is z(KqSibFC9MCp?;SQGd87L~H01sGD6BE;*)o(97Wpc@1g$b@C=s^o{}M1q^vP+!DTn ztZ+_z*KlL_;u~(5A8_0J0J7P9fEF_Kp7{YI%@07^1oI2b8|WyO4%s!-|3;ewAdQ@~ z?n(0jZhH3~k$vd@HFgjNquRWL8sz5zWd6y2Fr$F()l9#f_C zGUUG-x`Q2YkR5>vag~nWMeadNdi%%&sG4REv9Tp z{Jnh@zCU+V*gvu;9CB^{pbm@sJ&7iCe_~j8_pbK+6C=Vuj(Is8F@NOn!M_OaPWlVq zg)@$p`SWma#-E2DH2!(``*;2<9H{xTupi&wI5~W8#$Q_h_LpIA<6nmFPWw;cJBjh3 z=WkvP7mN#Zf7zb=%2?Vtw%uh{AX}~N63*fTqUYcH>6gPP`WbZps3`OpROorj_Z7O# z~v&!Gx^`WufTcK&tTcLT(*3i=LZ$fJL--PMXiD8)kWIx5ud4d1@ zZQoZE9)%)|#dsW5pIRFlDz}F4qv#uU!>yUV|3dhue$R*MG4`80`Mf^#Z-&}I-_&RI z&5#(qF(fyCLqE(nLiG*y$6fXZYA3Njrm;VY*&k%R@I>iP##BtlOq5_2=Ai8{`(w9u zcnA9ITswNbm_xMs3tA=_rT{4bXO`Qq*t*rQW<#339( zdaE^hI7aVmw6<@l^_KKFatHNo?YGA7sI`7Io|CxBR6%GW(}>M*JefrmJwDoVy+0tF z5I%+EdC&E#=X~39j-GwHF*tTW?P+C{OsEs;@O1x=GyZc97jOwza1GaS6Z!f7eEg{I zF~9%E;U|3%c|Sp1<6>iwi24?q5cNH_c>i19COHgIA4JsmP>T`5qmciuz*c!Y z&OX96cH4ILTN&B=Gkp#0f}YV;6`|6|cz-Y<+NdwyYD zikyt{9}fx5r}-1~l=dT912COF6D8=-mUJex|Lxj;Wa+so{we>zaW?<-b^a+@=&AYq z)AjsQv~B0#q63}iLIzpn@W4HqCH*;=hYH~ZKF5W zckpc|xd+2Te*Wt+|A~pK+Hd|ny|$bGYYzTO`fAj_=fC0-_oan3;+l7u|Nk}h(@UNy zeVw=sNbTkO?>7J6T>nkNTacZiUjk3(|5x*)^ZOsnb$zp?xm?<>I|u4F^1=P53>Dag zJ=li>IE1#N?hiVKxW5D3-_h=GKlc~;_1{C>-_!2zW%u`@``hULB3J&ov^qHi-M`nguWb%@L21WqAuk4*DEsrSzapTh-|duOt! z_zXMbcOhI6cMa9Y+2H4uJ=C)6^WVRkrjJays3)&G<|c084({Or9-$9y-tBhfFDiST z=-T_lH_v~6litA3Z(Pj(U#_f?`Te=>J}G;xWM`xQ)+k5+w|tcIKZf%^grhw|yU1p? z!$7|cMj?hFy68N$Y}omJaPdRVF{L@;w$VH`}eP)M|**+BvV_J zzm3*b(br%dI@sEs=z8e=IPX~?hYfzKR1P-U?17@u|hkzu|wW^AsmRpUmP3`kw=hKhxC4CNH|81q5Ku$I`dOzXdjU8 zpRUn9Xgi;MpZ=)*bKcXwpyRam<)rrIs`ll$_650O-`$hehyByIa1u?povmHOlQ0i@G`3VnppH^s8_6F7x4Xj^N1 ze*hb+pZ)=DbC-5GYMZkYjPIjiyZ!|0i+PrrwY)qWJ;M)A;7nxqwT!g1rAf zj?G1X>i=J--$Z0nWZk1o;}ie?mbl9G<~oq~5c&TP$VaGp;}8CSpK$-LyEl*d|J@(> z|M%JdaE6d$<8o}AJv!dR4V(}(?%?`zO2e)pE~IAhy&%8T$+Ovg-=U>4>e zntw8nT!4Mvy+veN`HSYCETMO9RlYa+{xy9Cax*-`Y0CFv3X#8kO1S4U&xJGOIb6UcG{0$`EqM*saTDn`_*mj^(evvs z#+WxozlUe*e?0f7|NViuN9aRT5Ab;Ot63KiR2w`+*s_I}*(esp_3-s2yt zJL)mm@!kA{@`ATR&nMXrKOYzhqqxr$gkfaw_sz}mE=1o*i{`3~5H|`%(Qn2T+IE+Q z_Wh-7oYK%)RjN;`RQ*({ekxTzm4=2@rJ-?cX=vJ98k*OahL)wJ`qoQB+HtL^(ons) zG}OFNY7R$fs6`zTNTMENo#%kOjgO!Y#6(QSR8&i|265D)ZtvUP|F`wOzpelM?J(Uj zGf{$Bn1gv(fJJEA@izPaZTA1$`rqHy|NgfAuebGoy&ZDP-}e5$t^fUP{qJwJW^7qNH)%2Q$j-%IZ74Lp0-0$Q=&-bo+ zUzytZiZ;Q2%TR$`*n@reqy1<234b#GXFvb{F#n%yp3nbZ?A=_heIld%XPU2W4F{xi z2uE-X>DyaF3Nd=;QvCx9*$4C_^7~)zW&a-3Kkz!6T3qG9tszZzAXc+AWXTh#qQ{4` zD@ObNt?(HnFS6~gvNHy-^~rPMF5nWb;Mx4l86jK~zK&=QqFdx0+`|JrLLZ_vNB!67 zkJCR@@qR%VNbmiYdCX)XqW<%qSIkMKcYn#6C~_2vFc#JNK5F#W$E#dt#&wYi;b?7g z)XqlV-yQF`iI|M3n2wn!LE8}HzeDxA;o17?i~e_2`%X5f2O80|*}Y!x{x5a^k)GlH zPZKxEo;}i;gLznhr}Kvv(IcC^1Iy?u@Nd`efB5^?(VpNd9n<^%+o8A1`%CZssCwb& z_?J@t#M@y_6!%x9VIA4?$cYaDey~BPxj;kU! zky{X>N8j%$rI(=syYTe;J$vY#8MXuV(GMUR6Uc7Yev)1LwSN=zeKZA4)dxE@x zXdlA-x)Ak$e*do*ychIn-B`5tE`w+JUn>86^Ex&|G%q2UQvPd<{}=VA zTeyS&aQ@L{@841HADYpE{2t?by??0WkKdEd13W?>Vtn%|V?h1?ANgOUY$(42>4Q

KtTCz>4ByvqIG>i&~yv@W*)zqwf5U*z+*M16oCc{2ZR5dTg6cf>i4A&w-Pa7TNP zCVR==uPUEkasTY^SE)|!5MQ;K4M(0pY`uDyJcDR0MI4ETY>2zwt@qdvWXx~VaSj)7 z30H6p*HOKgf4r0rdX#^h;d3VVp8NTr5BbOU`44;fw`f8$p82ZaU8`+`&Dhr}2>o=v$z7ZuDO~qW2-USlz?AeQT`*gB9cR0H|oa72db;&PoCmrZM*oZ;*ZN8qP+&T-G0)K zTjpMkkY;}UeT((9^dhvK_WTj;vC?_|1OLuF&K~sq_j~?3+#k>W+5T^ydrCO}{2kXk zsW6O{&Uj43WTao0e|elr?^FkL2v4WaL{2!X3}?u$M)QirRlaHcI5`Kg0fk{6xd2u2 z85gdVheg6mki6m93)kIeN0NV7f6tHBpH+95ZA~UopJKQB&kC%>YOKLJY``Y8v3dT# z_IGGF%m1(9|MTaY(Lzrply5eEEAsZbF|25hsV&m${jIV|mLaP@r{`nJp!U3bn{S5Uq{>_l|GQLCoa9jQGFe*dp2YUSt^NRg;4%yES2<3k@AY7nd!WCS@b=<@) z+`)(E&)gI4{VPnqUWj2Bf!?Exd)dI1QSh1{E}*f*LfSf(F!} z!Y$mwE!;rBW1I<7XrX}?8kmCXGxo91@y~N?AKS+T6*NHw4QN0E8c>Y~RCo(6sGtif z^nTVM@TQ&j-udIJ=h^G*z0O{H@3o%wte@kNo9r7Ko(L1@ouhp}^hxw7=;`GfK2(OH z3-|ojG~qK)hDyh?$k~{Knw{)3`<6P$K3!#>9)JpZ!*i?A5Uwbs8`XkR&coq9l=o4A}__^EG_ zUCwkr?!&Jqx6W7Ze#7_UJ8WU^w`%~&ZubA{jfZeeWq48f=ow7(|Kt4sO!Wh@fY_E_ zvs?O*LLKU*C5;R_-hfr&Sc7%gfFonr%&Cd+Q*GOahn9q=^FP(8pYH#n9C*^c(^&2K zs0TEY#r28l#rYAQX`KHV*AD*v!Hr>)xVNAjaSxyB$;x(mobwRpFz%r5Lc<&GO<3Hc zG0tH;^zO&QKOP+z-v8jaa1aL$4-P-+eqP^|H5@wzhkb|Fh9AAQHoW)R`mncYZP;^o zy)_)yhaX(s7ol(UCP?;uNV@(*Rd|_q?b?M6#fqT{C)tH zYd3^<_HPIk&ut3ZS8oUhw7>j>T{+~pBS@mxJk|H#(7*QaQDM(>FN7bi7!*>%`@cBC z+^}cDkMQ2+L1FLIKMS=xM}(T~BSKx`6UyolA-#53sLu`y4IPt0Ch@s&c(c6b+N#5) z;Yh81*``t9XLnx;|8(^)Ly}(cOFsHnUkuI8$svydj^PBF%nNJ0Fe$X?V{E=YA!P54 z53PyMgoU^f5{%e^8a=1FOK6J|H|LI80OJ)o_$t*W&u6UIm}}beKC4Ee47^@w?TH@ z_5Hm(I-Ga^v;DRd%aPnaI#ll-9ahro%m++i6@3jFHtPS@ubLiO8tSK-Yc8y1^-Ey` zxe2)yFNG~+IkLj@`d2$q>39bU)9;$BA0IV`_0J=PI@BYL3>vV@HG8lR2XF{S zkVFdYY2yR3{zu<_C)wq=d!g|G^Zn0U{hQ)wLNi*>T5Wtmt?>o={@eEI|9{!K`Rb-| zOrwt_WMbT7d}qR&>2)hgLkh+Df4^rtS4;mg>0dAX$T_EFe`&~*1>}?i$H)`NHVJ=S zxqy?7PopqXy+9e#HQhTkDQTh5}Xj}bas9EOyA%!~BU-15r`M~=-`ck;% z9`E2D9^erM{Gaj)24M(>A^P}L^GAmf^y-bHLvrtcFp6G^v3NQ^|2F&kP?>O-{q^ln z@LTWi*f+B8tweDzj&1AZm(9lZ2rI5X@UxOIjXncqn1$Ks|DL|b*VX@#TEf07$LcqV zADM3PXVpD^Ixx&}?mR3&p{pb;A{S#R+9!Jc-ecZ-?IdHHUiUfA|B~l_%JV;^{(nyW z|B^mPvej|4&AHK@KiONM|NmPrg+5~kdcSUce{rtFDy+deY``XL!Sz`0|DF95zUsSm z+_G0WMee{Z>_M)>e>9ngMSrsXa&dn~Z5chg-JA42S$}!FbE@VK42Q^lNXCBOC{ev$ zyZui6xB7cijx!O*4eW6}ndbj*eb?APl<#N%+0SO@Jn28k(eo%oIhXxaC-~F(e>JbN zzjGhk`2V#2z=~(WG1s2JNu0)6oX166M*AE5f8^g3Ke{%u?Laa^NvC=$)^zA6P(Ngr0-y$mm8Fw!dW#6I-*`Z%dKftUaGxiR$(8RYJbPD#vS( zxgcMm9%;l6dK=>ShIM`~Xb;##zEh)q-}QXhLN7-pc3>CwU>^?P5RM>;6#Bm5*^$xy zHj}X}Fh}N5z%iV_Nt{N_oAN(W8{H@B>1jO4|AqSJ!&&E@$3;ACpRCnB>9~Eq=l_Q1 zzfAr|*GTyvPyPS@UH)Gy|Dz3kzli;H`ue}C?Y_%?)8ef7ls1QUe*Kr?{O1?K4Pm!% z2XXArJ@P@+zhjL|a=?1!DdPC!LFAM1$3y7DFaq%%b}sSG$Zqw40vX32Z*YC}X7i=j z4-VJpqnulcv8Y;YKKNeq!4H}bPBy%3e7^7rNI9-0IEf@zq6oUrWOA)$=k zd6@rxfqzP$jh<@$J3qU-iT_U44CjB3&W_<{BJVjxPdJChac8|=D2IUHbyM=-nth)X#s{{DUR@Z}BvXFYYziD*o&k{qOX)k^0}OUkHoEQ~6oz z>V4Jvne?Qv>g)?)IlZpe9Qyq)gq8GFXt-(~!hLh+Z<|9;t`XL9-Cl*{2IO|zqn+G> z>{a(Aot@HI?zj?9<^^``cb}+nAGPkSxc|a*>38qxh0^c7*oOl+gd<3z z{jTp%{?4NlU5IkUVGoeSSoqUiZ-N zcYTNSV`vyBZyfA`3nBme7^Vld~4%jawGMXIlsdEozrA-|1E8LUBbIj zxS+2eHQTNKwO1b>>a-2i>+g%>@-t`ndMNt8UvTbE|64dKp7Xef%eaE}wZkoj3Ij zV=;XxdOVkc^0r$$Ko?oGKm5z$zN_8rIzP3zzYf_j^4T!WHOsLQtFQ*^umPLU&d=^( zZ}WrM-+}D!X!e&Zpl7P~cQ&>O&3y6}=eF_}v&O&W(1xms`VYiYj!Nu6Qdr#AZWlf7 zIh4X4`aU%5<^R|6|Fr|ulOHV~4*2~Lj-cXezOld2A4gB2^4IL}SAAFXX5^4Z0evMU z;TYN5Ixw6dPvSJr;yfv2Aa9}NrnPykmDr{Q=GLddqS4U%fO-#yz6i$jYC7Da3WxOT|@XOy5|tI&Hk4 zF?!?a6Hxf9Ir`cd>JY!#^l0BO*)W)Gebx8?=S)GYb50{$4jVs1mZ9*PygOIETrb~~ zd2Mfv?DeuE1E9J-(<^8 z`TMT?{ZRfU+Z?wmV~XRGoZG2TrO8Q*_^JcP=xd+vWdGaR8O zkwQgXO#+YI9EY9O1a=X1_w*4}_v)i|U zEA(sVSuYMe**~(TduFILpExzLEX29y^^?m&dVX2Rpkc0g_pbTq_?{b%Z{ZH^q20Xp zj!R`BPj48V;zr2!M*iZ3!rPY`&{u#m>_HtreGTSzG0mOvNy~OWn};FXNFniY|OztEWjcx zMwRFGwEbYMdE&y7W8DAC=84nmq%|d-6`%3k(QwrLdtT{Y_kX=C)JSVBQmDHsOnCe) zBi;rS`~NFKSS7ACSceVRge_>_?D;1=e{`Y?-PQ{zOfECdu}t~v`LFlb*=n~9NG}){~Z#~5hM}!0m?1pe@r#L&$rO2PM%-u z{`{6foKqb4)_AghO|!5#|1U@8k(4(J%>zxQyaeHlgQjwZlK%yjPxTa}Yf%HPwjKZ}BWe=d@j(SBX|i@Z8l=XPcKL3wMNca5H``q%6HuZ4V( zCCXp&PuG9!|D-+M#8aIA@b3e|HG0L@*%z|^7iHlV*-PFb`#x`t0%gd(7^dFvfXr!&4KR`3JJAt<&(EOTF%+}AnxdBJ@u_lNmy1gebhKJWTb^iqsP(zxzw zwqQIx?!}%OY@9KD5*o5>6MGW(YOg1!2%Cl(D8onlH)c65d`q7$S?vGTpEHL(4-2pe zH66zPA*KFMr|wYy9NVtmkeO`!-^CK>ri+7x=$t)M@Uk z+n}&t{SS>x)IY|kf1qWi`iD5P$R*T2(6Zeg2jba;EhtA$8si-ON_yv2W$%9N@AO^h zxoP|Yy006*aM!p7Vf%0Zhj0W*q|pC^l8_;L|DF9M`+lRny0%K)}nkrOP)tuSMnlx8CiO9 zT_E4y701_5@N9ZeoQF!*$Ro8epP&x)S>M0>T3jD1t^Mma&xaeXyM;TrhX;6s0h>I3 zeQzDe>v!wK)A~I<_Qf@xlm9Qt|76RX?DNJC{R4X2GUMjOF$hCYbyr=Qte&j@4I}8I zP>MJPp>DYQQ|CxGeQ5v5C*!V)^ULDRGRX1&vf06*ncjJyA9&jy zZ}dERM)IxoD|b7m3k6|S7p&n&oS%e@NMgzmqrb} zb{YQ{b*RVF{q5)%U2_>%a1A$b3wLl2?Q^X^FyA(`$ zmg_Hs2jVGgO@v3}KID3xBYeRBE&T)e@EAmY20iowy0u$$p}7Cq?PH)Xvhuga z*Z-@&ALkCk2vmGqA1zsm-e2kOC;PtQxsz3Iz7QsmlQ0F-kX)wyLHG=M-FD9gW%OBS zpvSct;uws2%of%%S)V^S4>{l50&)?u>V|PnY}^B8vE!vE(0kIp(xdw53d~yCfd$P`P977(*{=_jr^~#Dg{_*hh;n2oG;ozHt!uw}F9)8j^ zEgV=sE$lz@h4ACvDd9(}r-Xegrs(T?CA_z+Ec|e7S=cl6e}vs@XM`VYoDsI}RURHx z9v;^J??3wgN`3h|FN7`hz8CF#=UJB1dw=Eo{Tt<_{B*?gK746N_-Ti>oLX%Jey{j> zNyr_h|JHc?|0q$eTf;+m_gZC=vZjZhQlRe=mQ!}cwt~E}sQi-ohl&4TZUmq8=gRz{ zuY`9-zGD92{|MWc%m_QwoBmoAIYme(gOT4I!RsZSk*!S}1 zq5jODYh(S3kQw;r;qYx^6r5M}!L#AWQSB0mPlcbYcsBggVg0A~|I+v~4s*j$;KTNy@#?_P zbkqDhvgM)u-{@JsW$v8)-@5JpM!zhcE2uEX@EV!Cul{j+V7NiAbAPEZ)>Vq@|7r_w z80p@GrBOe5V7TR+mSuc!@*Z+ytcgoLLY589Ydh%N`D_^Qmi&tXy=Sd<3hfBp^n!cs z!IS&W(>u|Q4m{bXu648fUN0RRr2~z_rAt_5iRTob+iUKLo~j-c>d2bzL7_H1C=BwS zLof^@Fbbs@i}5J#ub|$OnyCKsy86#t^&jsi)5QPB1m{e`6imYmlwlTTqkW`x7i=`pfSz=->Lqgp3`#B_(yuyc{z25HuZ?A>XJVi|2RjS$*U!y`glp0 zN3VNbn%>l|L|=@CtTYQtqh7yJEK{zahJCLURveEvU;U%rI=ZgkfK7<&-)|wyQHd`3IOg$~x8t5)f7%|{x?6n% zPsSTH(woqX7VHqmF6_ZR96($*@DSO@CKSH;VmLxiqW|C2#mEerkwewH%8-eV>vXYi zATKPr)BFV#=ye071;^+o(Lm4ON&8>rr#-)4j1H&$b{6Mx5tq?#-tQH%*Ls83$iDA9 z8*Y%da0mC$qHK9UKEi;_zH#K5)G<2rInm>ol-LJ7ggy*U*Z+Bq4M5d=`^CF-#vKdjG6d;jl8hDpwuf@zq6%Ad=( z-eDPi7G|T@nhtZwzF+D;PV4_ZgZTa7XFweOQun3E> z6w9#^?Pv7=U()}7Q~&=}{r|oC|4~2>8qL>lny)V(Eoi;1{~tNDp+5C z12XM6)8YTKrD2u-Sc7%gfKAwfa#W(-H63aF{MDuUH$78zyKb_$zKc3x;~D>tX0)JH zoY`*U2FNzowG1w0V|;_ygMG*e%i;jNbGW#1h<*e;-b-O^sW#Tq(1mi(zv^}GVWIb+ z4L(UPystmexz+U4eCfhFZ>gJn)x4sq(&gFK$2AO4Bk$F+=cyIy*PGe(H)H+UH;M*S zXzR>~s~I`uQNS^rz@O%~u5;(lP#3NUtKzR;BX1zN-daH99aLBUuX5y`;|F-suTcN0 zbfV^h^pEkpP=|V?kwF6o$CMcU>>oa)z$6&tCxF5p@(d| z%KyK~|G&@wzs>)@Aa5ckf3{_{{eQ|j6XNOn>`P%Rxve_5@esz-%gJK@hrNC#IG%(l zn1=pul!h5(8D?SoZ-1AVP0m5z-|7<}V>z~f?AKO;eFp_N64+xy^U{MKHR-KF6RHQ zD`)Auum}6l|9nX}Kvs$W5P1Yiq@whH)xtCMxF%uB{WQ~aXc#RV>2ca0Cb~Caaok~n zJQmN{{A@Tuo@7w$b`&hSEoE_rElktD& zU2_qaaRt{o1AXKRPGXFP+BZugO8fB$#+{}TJ_Sv5PaMY>uM=LO`nDYW^&s=?ND z6Xz}5L0ot0$@ssRkL#8__WvtCt{l}~5Z5G(dl;nI@3t4Nbi>a(SQ-I8HG}e#du7>Buv2{uYaws zyL)AlbVaV$1p228)tjq~wx<7e+ zf!`KkF)EB5D7HU6=lh$kEmqiatil=;e)W7?)B$A9lv{`)4H=F0zM3)|32 zW*x^h`r01K|K|7a6IUNwe}FuMBS<2JiW+_IWHWLoH1c)HST`t;$BIbf^u5`!=V!!QD)P>TLW|E|r^ zxmjZX@@KRQpjeLndn}i=3#`yi!1wRoX#GF^`L#Fo@$1X4Lp@(QjSLzvM?CYe0E@5~ zOR*d)(asO;==F@yiLR@j&2`U6-+mAOym7wq2WUndC(!zk&+44qP2&&9Ds2?2#IpwL zumQ;?^Z)s_o9K1oNMQ@T91UaK+jRFhQ~I?7R0@mhKI|ZOA$Q*zmE=BTkNPfa<$vYF z0mp|>T=!wG{GaxnR?Giat#Z!+jpBCw@s*OW+%GN8DDZq8$8(~Up2nW{`6#YuG~x_ayD{ZX0dwnf^12!Q!m;Ha;xBzDZqG9)k{r|OmW&H*p?f>85_g&b7xPL?e`{)Ny?0@*4IaTx{NFs%rgCC}^ zTKc4=URsOu8_r4JDd}`hGjhnIfMYm;lW13_cf@>#PQ*P`yXD8H{&##nJ3sWlS1A8C zdpBepqtQmTOw@)ip0hZQi^$DaM;K@Q0eYwBkZ0$w(66EAu(&(?M^m)n>JGxH)Cq2p zcaR)z{Q>d;it7)|l&-{^1GV})2BaN*c)+*o&muruUeL0nX{rg00 zFDQIL9&tZ4E7;%l?C)mzi>zNQe`mGtck8Dkhd5^#Mqm_5F&5)70qyfY?0?jk(21_0 z^7&}-50~$sQ~%(De00B`;(kDD|5yLxBymhZEHkE&*_$5=GsrUJW~z^nv(Y(HS(Q+> zJ*SK#d%Bfb7p{{Q7aZ!&XH5->Y4| zSO&bO55VthunwD0xz+Rf^~1y#dU-s?4)m)V_Ul)wnyO9H@gD5M0VMBg=No6<6TR+? zXRA(gB!(l_nKF*kM<3U9HV8{0gJ$&q>|r8D=21Z3cONE>k-cC3ec}Xp5~pz%=h1TY zQR1WR9~Xt?gk|qON?fLQF8f0wf8kN$3ccb#ypuQokSGZ2UjK(g*Tz32Y92mH)Q)?U zNKJf{s3YrVK1!s?jN=Bf@HuUQt}Twg*A{StehYVS5AECjkmx{u{~r>a=z3S&S^v@W zheS{M4~fPlj}lE+|Bz_D`G-VH?;jGa^ejDh;SY&6R8{{W@jyI}FyQZ$-$)9p7RMlZ z-5B>K-XZj1Xn5JZ2}@6PuhSnTMhI)U{wOhuEJbebqr_NpJhH;`O^*_t9gh+d98W@V zUl?UT*X~D&Zq$sHerZifYn`;$lj-^X0}YtsnrWDUGR(qk%)vagUr_!ae^dF>D_(S? zfF3kXmVWoyoOqOIL90BURR-k9w(D{H-^0WL@f5zQpOsvSHG>_~ZywDbq%g)#vQDMvg_2KAxO_ zI{7_y+k0!0?=PskWW7i9{=0lF{=UC13H^UxV!svR`J?~DPnhJoIBsPMISn&VhTg9_ z_dl$8`kfLs-##S5iu1UCrQS`CbL+GG(>e4`_C5ccIezp7=wVk2=uT)q5dL(2;a+x% z4X$S|)5qCeG~BiCy=xX@DVAd;R$&d+p?y97d$a!_J5Q-!oMX4xvjW-UxRLE`5_j`K z{x?}%|Bl^^Hnt5_qo38E^P9v5ac#mDBwsWBf2uhR^t$xrhO(V?+=bZB{ZWN3cX+?vE^LhId;VVD2fgMBEkmB;jL z#j$2b^gC1y91!B3bcegmK|HQ*@0==r+!1!Y+M0_$yEHib)0yW&QomGf;*;h$e>~J3 z|71v=8m?|LG}P-WYDl~oGV6yLhtQw+yO;YDhs1FNmA!Wp?+m?{sF;2uv3=)_#Ey;k z693S9I}z8POZk1*{fCL~D^tEV^G;&tvO9@AuRlus@Zcjh{rOP&E$dD-JxKiM{=o3w z8xInDabVdq;U`DEPuDghhdkbobtJMT9oGQV$8bvj(9;|bYq}HqpU^W?Kh0YGG-yT(TIcJ>K@M$mg}qi1u88Lv zZr~Pjn@d7g_#Jv@wnW=bNw`OUfS$c2+8;~!PU=W_BrNV-H(k?eXy*+>=~SG{JfPH7n7 zoH>|>HpB#PM2CQ-II&8ouJe_~hZA~1|>AC0f`ztQ{#{~N9JEOO3kyZb_@TJ7G&6W7MuLGD6Q-+Q(2 zJ@mSr+6GXpe|$^bnVvx!_1Nc}7WaREJcKy@{|K2xwpY7AR=T@mA3hs{0^1+wG?37kYbyWhc|&!f1$W0w72FK@;F zAseSkzqW$r=cFI4Y=7JvB8Rqf?0@axa9TWPaUK^@9RFWEI9#UJ#kK+C1Fq1opL)YcpA;SImv!X3oD<162iSAJ6x9yor40hRIy24M(>p*SzYJr>uY9Ou1A zf7?O(B$8Rj`EKvkb8fHpp6P{&-s^Dh**oqY_d*yUj!`JZSd7O6Ou`hTsy$clpys&e ze93b?=(&2n^}^#Aiei7`aJCV3bJ@l>qzxH*!)y9KtV=viJTp*+iuRH)i=2%)Xn&~v z<5kZeo#?_JkH6Ob!DnbbrTyc$_K!>I24v208`=A>{QplF`)>`u-si3JFU|#6gvD5j z`TgxGwJz$4L|%_c-p#ssoTUSNY#JwJ(r$57~e6Y5T$w;jYP`89C%p zz%iV_)AfHI@?-C^|2Nrx_Pa2b{U;m8vHug2 zK<}C8JDKb|L1S9`@NwI*9N2a)~1dnU{fUWQpHn8Q`^3qCZxrI-G5 z^&kHHJjV-=W2Y98i}7TbKtLUz2_I`K$c4{&emunz}t2uF}a`LXi`D^1t>9Ozg1bGstaTak5blm&oJpIZ1h03pZF8bRWy#Lq+#JvmkW2Aq({_oxT ziBXr3>Hao#;Y03g|&&xBhSc(Y#Y% zH`#hn-!_?Z+(x#HEDiU>^8k-9;P1Wnh0d8-8lItdzN-IwV`&&fAA%mog*Wtn&)5G= z4ii?jq%@2mM>e=0c^aSTIBJaq$&Blwy6j`pX_Xz6lXPo{_K|CZ)jX-=W;1K}5h zBSUXETKY%p|BM%BrL`^I`Hr)d8agJZtZtc3l7NBaqeH_T@k)>fVxfJnP%=S!|)9a4v^;Q<~YCoH?rx`6L`eq$cjdka5RFL&JYvhP4x_f+-Z>H7bs%l}C2l>fKO|LkX) z%pm&6#r2==8iR--uCFwv`<;@3VHkY`+P&WnWqh9OT;iR){-OPs$9qtm|6k4ir{({{ z>_5FYKiB`XZ8k1c9HkhG@tA-~n1cSlH~(AxsrPH8VVdK zU=fnSs+DVt>2>mW>UI07(U+s)WoZ+Zo+_=sGT5+M0{|>(U!mn78Jd zl~{!}_-OynI>+r-`QO*s`TOiVy6ACid>kXt!{2Tk&Hg&4`6l~IwmxKk^&#ZYhL#J? z{TJ;+;@X5Q$T=_We^*ZL93yV$6vzL4SGhmV`P2Q!WdB80I;YBi?I3p{jvd%T?n80^ zJHN#-^amUtLT0mcEt5_%_Mz9@H2*+7B_+N()bAFK3>t96HA$r6wP;2Tc@)sTUHW&r zM>6hJ*hP}H!v~(!P#d88DaT>Xy&KdbkI7{zjv-3Dl zzlgY|e*xXj=^Ce>Cy!ml9t`ZLA4_*Qwbs@L-yZa*DiF|BdW!wf3EU@n?G65n+S zg=NG%B{xJe^J}oadk;NSy!uPxO{{M&U=LZuZ#Q3^4tz} zdRBow*^ zhbiPV%s|&%d0AdBAkH^xzpgzH`6cdISnCJM0^}}e??dBA_886d7L;F%o7x))X`~Ty;PS*!4 zJdXLvvOm-4agJFTS@~V>#~hHJ>Ar)>Y|=RI8=dsHCh0YKc9!#IV-6}xv<1Cu%@6vM z_3swYdpf0Hl&28q{~hulN034W&B!5- z0*dSZ%=AoedbU?R(cd{8kF^)X{h7|v&*LI4BX`_?T;g-mJDc2Ft@lm8 zf#Ui|1-X#fS!ZKUuwT^TB0A@c`f7gThG*QP3F)xT$r@X6VC!H z!eYcWjJW^bQhMip_II!K2k9%(!>$(Yv%k05UvibOs`bV&kn50SQ#X*CP~89TjOTjE zSO&-C$aH(I)t)n%&c^l+>;K(_SKrl6 zDV}%geJlE=+KuPvxFG$;dBm|EUAv9#knX}>>3>)H=lcE@`u>*q{>aDkPqh=>w*Eg_ z)(;4~#J30gZ~(cie6qs4Tzcp3$8AVQ=t=ZUm9LZ&-P^Pwkttzu56TQ#VcbjdwtPm< zp;|smY3HgtCl1F2WE?l>tEwl{$Njd`zJI7iT#K}!+Zbh}x7!CW{-(XF4#!-30w-}A zXK@~#o24C3_5tj<jQjIj-xsE7_v8CFDi^<`a+mzs?N}nG2t=Zo|0ncN52j9pc!9 z%A=nP?+l$7Dn`E?whwjNM^-TDCN zdfEB_!>tc6)cOE}%feQ5)omyrXq`d-SI}4A+c+~^r1yQdt`P?niR`KzYa7)-7+(YiUnZ}>VEA-C0_O{0(`hZ=&|4a6^NB32G-QyWygHUx! z_%BLA_0gGOh~r^M(npY^P&ad?ww#$EJ#l8JN5gPyWgz#a{%*gIMV3CEY$2->kp-sOIhO&y0sIL4HvX;C<~tSZ_IFh8D`-f zYsFN)UlQ7teI3Y8)y}R=`=jxXAEiWFx$1eum}5a0Ech{NfiD6ZEPfp{_neN zBMOK4zi1R^(_sGZK)&ux{_kD3wf95+|B|p;e(J=>&v@%-<-Is_!s8hE488LMakI0{ z^a|q(ivB;F-94TCWn1Ik;Z@>2AHO^2B=khqhwDjN}{Q-JbNb^SN-z@!jB&=$o zy4d&C#Sqs{9z+g7Y#WGUC+jfG@d#vQO8*$|dAf9>hHb9(J)~Z?{ub(|`~H!kH(-=& zN--7_FbPvoXxHCNw%_D$Z+Rn*e z?D96m`M(8i4Ku_U=RlN^vrrsY@;l>7=yNa+3$O?k{Lk2Lu$aCS%drx<`~KrLzntF5 zKhI+geI0t1xwnn(Q5__X71|(d6SkmgqV-P5YIW+Q{9H-jfn7++t9AI(^%44hVU6-n zSijF&J#n7i*0s`uSf1{2UFFZU3w&*GI6yyye)rrT%*CZAkwON|$RUpcj-mf&as9`^ z);_oXgX6wWSVxvTjk7q9mb?0!$ji8bYsjhd#rnq$dgpxKCjA!u4xajhuliPnKlOiS ze(3*hgkki$q5NNZWt{(F3|7NN z_qNtOCiuU?M>uB`O3{*iA&e!n?jh$M#=GC~jwc{*tbZrE^zA>rKlfPkCLa_jan>Qu zDM%xO226786imYmlwlTTV-DIqr;eS{Pj-41UG#40D@fPl_CM*rE&XUY&i_9u{m7vW zRrmGzie~{9VKI_JrGKQc6!f~O?hVW7E735}y&>(K`f)@4$Ud#|`x>l6g*smWm0vb4 z&6?<&gl$1NDzO8*P_tA1-z)#4j{T}v9z+`v$A~sOv{$cl_Tc~yp(lJY93hiPp?$Ob ze^eerCyI9V19?pT>p|mo_U|?Mf3Cbrwl0x3$(-XhGOquV5l=I6$Va(^jYNSS%alC3 zd5nGn#eF^Z%fGw*7kN@xT>s}Zc@{}+2j|I)h<#wicEsZ!3zr>VK}K27;JBWQ>kQry z*7r-kRf)cM?SET8;Ez9i+}>aKg!$!uFMPHn+#&Db0pc2*kH`VLJ5_!W2xy43uFOW@8Rw```AGVaDkBMjXdA0*dR$ zu#>TTSs<)svUYy5;+N+1k;{>L+4JLjt)#~_N#Yt1tLSS`T>ncNyN}BMWX*NsuaLTL z{1xiyX=K=j23(WQMXu8a6ypAA>*yP>30u(426tRwqk7qYbnRxF4zf|~YuxLn@pbkO z%}dxicB*wQ+jg7%LmR5r8fPG$O6P~4YDyy|Sa&HsGouaxiNO{O2ue>_dEyX)Sr>kFWtN5duehBV?_jEllrhU)_$uOK(mJ0ou( z>p$}s)Jt&7@f|!Jf3(>*u-^MW?)`t@{a^6@weQsX2GYp*hGJjQJ=Z+IBMjIRwtg@A zx#S=WLHkYhUwJ&Q9O>Mq{!8zs7rKozQ8qNnn@wnTUW@u?>qFlrnM0ehCayg_Ogtkn z3Z+PH^!|MpW9fAVme^oxzI91_WSFpR!S@WA-_^~l?WEm(=(ycH3`E zaIZF=-rpM^@FnZ{xIX2a3@U%DKmT_xhN?F`YyGUv!g9zX&Yg^9Ux8ltoaZ-F`AI*4 z2H!&5J237sAJ;&u**!Yc?zhJO;nATE^=C%=wnm4{xzVBFgVAAHj1MPq8fS4H7jYTw z8_j=5{*?Le$Mwly(l<{Q&~vvmGbwG^=&WF|4+8PUK-j@89#8& z_<>8t50G8AjURYu{J?eN2hdn;{rM*8L(5L<_wO}-|E~G_&S@iS^mo@HHBtY)zV!NW z`tRu(vO%0z+~YOez%AUt3E%QP@&Q_|z7!sj1AeIfj6ulV^dA^P@0==aV~jJPk3i2_ zY16OX?LWKxNBNuj+77=Iw!Y!}n{WPo?Mq>l^GZ=oPpxoYt|`vr`i*BV-Em!m`ep9Z zbEui@{*ijsc__}y_gpfrX~05_!W2xy43we$W&O{{PnXZ9`Zg!Z*JsqF5$Ae6 zUH{+vZb7RuKa1Q*<4@4y9nTWaY|Oztaw-r~(_s#Jd zM4hO>MVwCCmPOz^1TW zX=LaPM~yLd?IcXWG|WI5W??p-?!PsC^s_Etmi0d9bxkP(BeF5SgK&jW=7Zy2QjJWqtL$!VaGM(jbt$tkY$2B;Y`h7X# z8q_PvRjB;3ddoLLSVLcj4cLS&D96+Ne>(gR>T9JR8PA{rmCo6LUD$(tIDkVqf_Bfm z1I2mvWY>kqb*qBHbLXvj1qEFaIHjHpKpiqbFAyj zhplARaU7%5=DckSwE?{Dw-szTJ@z}?6Ym4$=tckkKOVO&Uq3-`{6rG0Hg=)&?jg$6`Dt zU=pUF|3&?*B{ssC7xl~{!}ScgL6^I-#7Glu_*6zXO^_9@d7`IvmmIM;r*b2ecM%2A0O z*o8f4*U#U<*Us~~J6H3s*YmIStry51zIG%3w&^hc7cFQ-wwC|9oBxXz`abcT_YMz` zhmcD<2S@0gEBMy#D@jkGXEoniSoeCqHe$Z7y2&q!^*?&@x_ftCGpgw+?FF$9J?A)& z3_b2+@MNBU%`)j1PinLC$a?Xo?~3E0^Tn*qC z{|oXsavk!2R^Cs0Z&~)gTVLd1Yl(|9OUAYQrqJX50n^C*81bVE-7hO&hAIP4b<^Dc zpO=K~t^Y@u;k+^=g;iq~y>7L%p^|+qwjU{jGDxqGR^hXq(-+LA|N6JAW%ELq<9Hqx zU=a@2ek}a-p?M-ljcFXL9-(HmZvVrXV znmP`-1?8y3NBb9cINpUl`2Oy{4E^t$|M4qj)4!TuBCPMXL&72Q2r56NUFQcSVgJ>? z2tUS8u3Kwx`k#e^53R#^`_IBZ%8!TS%iS-(6n=2(FT)SVy%hGSYwxu-;CnCsdHB)D zzpxkEpNH@Dej@A~_ldA;;wQrQ?;C3+?tR!X{gdGz_I@%{T>n(qKJZha^6IC;JGVa> zl735}eDaIo?}on^-mfhQ8OP1YA&&wMoGA%EF^}p!>wE0I`th(2KgxbQ9P`@=?0Mt) z@IxHl9l}Y+Rf)I4k^TDbI^GID+x=Gfr~Tgz$==POcH72KbNIWV?%nT%RN_0n*>8vX zH@*`ZwtqWh4t_f{^?oZfCcdTr@SCA|^tbp1-_*zO&2Uq2n?wpVM{|_8L!hj!1r~deMHnaozm$iRrGw2$_|3?8m zXjBF_p&2bGwtq}jj?d)(Ya3`8{cIQ{uHyQ?=K2hw$GsD?>PExpomp{X1bq~GZi`zz zqPYH2^)sPVSk)WPhOy*$B-Oblkdsh7RJxpBhbfMyA#;j-5MOZ}hhBE0mjBO|q`LY4 z{KWck+W+QjlS2b$xTXxVFdG$LvW7D`4-3#<&HqpH5y;MNJ^|V7xIh;D|H=G+H1kJW z(8`B?vOi7RL4Jie7Go(|rVk9u$*ekgPQ7F$eHGRqe_#CQ;)i!5&Y|sD!LRUJavOX9 z(3oX<)e1KB>u-h1e|syu^Zr}fu%u;~H2JSMwl}Wf5%>Du;@oolSL^?7+pbLzmGS(W zd=PRM_Fy0WsQlN?_Fu05tIqe)`riR@9KsPK(ffJzk-s%3;Irx@zff=c^yZKf-mmYf z|5NNcy%{;=(Xzy|_{v+MKtF~Ph~t{#THq(?ozfrIW;snii?}Cw0o~5&Le;^|VT17V zxQNS0p4%L%vzx;edfk2he|vMdM!$iE;afrmY3I~q>s;mcLghDFnzn>n&dIKLTmSRh z;SRm>%f|hbun*VY4xQ+_|F-(!+xmdt4n3#d4)>h%0MFnN2K<ROl-zG@8pvuSpyK<9(#?bo|*!_4Mc1X%yQHN{p>@?ouqr zO02>fwDaLQwi^Sp)A%2<>mdJ*G@c;zkc}IS|3S0!TCN)N!uQLPIkcS<=KEPEo(e5Bpn~&IV6ri^=GlSB*4=$Zva)yfh%}BM+eZg1mNLp2H!> zN08a9UJ-3CnO^Vue=Z40;VBe;q1};eMhDuR7)FqzP>QkW z|J2)IJUIa^w+Dntzg}yH&^y=i4c}CUw2xrrFH0Ve$9zNnQx3$w;@&U6 z9cDYP?~7ZsKbC~@3-Y-=0k&4lSM)jYx1alV$liZ7j)z=?{)#POF_}{4)cJOn(wC#S zzRN-NJN3`FzROBsRST`@Os+w4uJ=uDK=o_p4oUws`X+2aIV!OOyHK;AUxCz3cH|+u zag|>|FRuS{iG5>(n$qkW8OO4>(xdGv?(Z|#x-#pH&)cl+o$P#F`v6Txu+>!%l`R2u76X$L>^A?!(Q{e=*j2YtLrkT zj`~3NJ=p!lcw;!d;VwHrTsr(-oFBK)d8m6=`kI7yOJA+?kaSHR1suZ(oWyCIMf>ag zf5b7hop?I`LmfMowT+(9)916(^YJXR$kE%jduDz=FP@9Ij4OzBgY0ze`1H;<+5eH& z`JvxJ&l2_@-ET<$T4Mx+Rf+E&`2fj{#srW94oLrI>2hw}MDsWt4?;#<4XB?gouj4y z+=u=DzSla`Pn9ocsw0yP6V=a8@zFmlzlOMe7~=e(!gq`fpg$dN|E~J^c4flB$3A*t zoBFx>ed9v)|0U}GWD7r_^}hN)a`d)d_3}^ii$7spsJO;rJSJcg`pxH?LiWlJ)5t!1 z`4#>jV+81Bn1wi&C(o}as7TLt+~dCr=th_LTb?txd$w!*LcUZD6VhS z;knR@?LX$lHVoIN=A7kNiB(vGb=ZJSXdlUMpJ@GoagY7@t_zR-_QENC{BeH!>VctY zJwF~TukqWL@Y`ove_$#<{^fz8*PO`Sud(-^WbeDo-|(!r)`qY}+;I)VxMn~({hhzJ zzJhfEcF=dB|Ci5({$E%>>*bGyJ&yO`01hGf@Ns?U2tBT8m^x!T13iU?&FVu)qyDJ& zf4+4sA2qd|pU6jzYswY-|2z1y8yuttH z6Bqq|{_7QCRs7&<8!^jaBg;K;dQ^%6yF#(g1d_(@9uk579H4RdjMxTL(aq|Ck`JYcv zKU4nS|Dk7(+|DK+WShwXda~+A*QFoL&S^m_ zJ$s4$|A75RoD;cEJO^+HN05u>@XeF-PWPTiik?BwNO3#vMi}CHud=u&e#qmF6!zuOtt@@e&R-TD* ze&b`|7t|h|UqjD+aqspYWL!V1F-X7f{f6IeA&&pKL*7Gi{Eu(E zZt#Hc!0{tw&MDU}Dd&dAbegYtP=3Q8^w&QVhLFQB0;5p)^^2jD9E%Bfj2JOu#DGV5hF&7 z7%{djMvU0v6|Z=KeLpo3j=I)9``kai`TeTK7&V^t_*MO?M#Xu>^1HeFht&`2ko@@2 z9%Xoo`T+x5LYQv9YjHh(kDG7{;{Q&{A2=uUy>0wUIL`m;zrnve>3x@K^OIH5yoX%P zFY7+?e$?#u?)&&A&>LJ+{j!4aF#S<9m9Q<5n5zyz?E&Ne#>?xc8h=LPMB~y(ZZ`hB z-n##ec^u2J3KdZvcF)F7x}=SdzSHg^v*MWKhyaAY~%N2^Fj0f51aqL&0K%9 zcNzajI$`|3nt#A3z# z)Xew{e19qThSnI5+&bg`D~oRVN4}1l-TtRQ{uR{)o~!$Bz&pb4A-Uai z-QhX+dCvB&^$zM*@_R-D8j)DbPr1dvPVj+aKEfyX3}4_Ye1q@MsgCVZ*LIU}E=(_( z5zdl1-*@v?b6*nX8dUo}yV(EQmMPEAcu0OO%n#C7RqTBt`d|Mfd<=tL_U)hHKXPPw zt+6!nbexH^F&2N2_u6I+4(HSJ|DRP?UqDZbOW`7V&qQT|I`AM_VxCQe1C zx+*{af8B5DKjGYJ^`A6<<^Qh^Od)+h{kPBdo#yB5Ha~BdzJLS#1K8%Nqxui_YagIf ze|*;j?TbnJ56Jww{bW`+M~-w2r^oAYU1ySKV=T_c1*jeE{?wWE(r%D$Bbkt9(m9*D zoU@RRgZ(bSWw-))`&;}~^!QJmZgu8#`nAZ-Q~%ES+5TQ=euTKH8QL)9O^EjQEo3RHw z{?7lc_&P}I9&~P(|98s&WY1ptpPsRA-oInB_V-@>e-qdrzUQ`a><^@nMhEhKZQ6tP zN$Y;(wzIL3QU6Cjx38>%{o!BmlJLu@=sGj3+5eCJzjbCNhG#bZLs&OzY*_HL0qNx{?f!j;m__T zX?rWu=)3Tr!lu1^i_ZI9_{%tJIBM5!o+572KZQU0FV;(k&kY-=o*Q0lIXC>N;B<4? z{vlKvGutrd%;8cvn={OT-qe}bd6>L@ZZjpy>v;UhIYuov_0QvyK z%JsqAF!iVLySmNRfU7h|djk8ogni7eOrifL{2+OsV%#)~LvtKLjdQ|UX|Jt`9VI6Kh4B&2mLPGgZpql9>l|V6tB#TYiRjT1bypl^Qraw{9D15;Z^ey ztILe56-)~aGfP74tf`@H-&AGpzrhs zymVKg0#D;vJdYRf5;}|Zzj)5w_U$2iOFSESK92F^cItna>;24kpJe|1Oz~{(cjEoD ztt<*JOXn5TBZ>4z|9|jr#_X*_LJz&0Uhx#WwO5a2Cl_k#emlG&&}c@&8Bd|GJs%kNMi^bCnyV+U3aiKdfbM zIOa|K74IPayXHOe1AK%|`*bbT##qAsAphq7!)%Ziw4x2|NFj|5d?Jm{@CClYH;8^a z-;w>+HOMX+5`Lingy{Qoj2yI8{=%tftK=g=o{qCI7SXpM)uLTQ?{RP4$gjWmsy;z_ z7AO8+9RCN|cd_+9hgk#AIDg(BVwds*7dZYRT!PDR1+Kz$3_QhW;bQYgMr$t#uYTjd zhU>}Sqh>1q2l@LZdc$_l7PruELlZrTgz`PwdbLIBM|D!ZpLnkNNBxyRvP(UMGaYjW z?!rB|5BK9iJdDoS>gT!Y=Q--<#p-7^eP*Hhd5QX&Y~G>%N9$g;?r!$)PWAsX^?RB6 zos4mhO6PGb$10=`+ebNGLGN*W-N^U~1^&z!G`^5R<{DjUz{d+~)fMidx zHXoT0&XPH8z?Y=?GG0Lq>XD4P@1&Kio#gpW@q7`-;Tv~3FOq0N?Eg>OuNzssj`U>L zFqKaqy=S!h$6x91Ah$$YLc1fg%>9>|7a%V7|9?P!gc|Xmke?yn|3Alh@rCeLNRD&f ziLRr>bvV!8^#9M%u0|3~_{K5cA^Wwpe#m1O^os8vr{X{D|DP=Xp>>Mm2a0FIPqYPbB!b~&9M{eR9RPt4oxWedcy$1HMK6&RF{xkJq;YShsBFZ1&mq3rc710kraY(+?CTSw$KZYtCv!=Zy zEVs`pRN!emi-GHWw`AY@`iRK>5!OB@U&bq_L7e|rPbSfdbY!MB-ag+fJ-^<|MBgj@ zb(~y(V2p2hLhS!FzkJGc*EU@{b$VzNPOP3Dl4$BO-`Fv4;;(oIznuT~o^Ypqy3jrA zYWDxt?EkAn2HDwHhun;-LvzCX^pyGOE#{}MGe3Q!`RPcnH9s9yw#E4`A4uyXe1e+7 z>H4mwhtKE@OI_RI>BsXwd+Ox+1M^(#!s+1)`?RgUI($WbgS2yhNB)3R*VX!^TvyrE z;V0o^$hxL@UHR*r>-s8P-)7fW=lYzpeXHw38Xf4|=l%}3KeFej`SbM5-s$RB^XEOo zK~?esPQ^%^jvwTaGs&}2JKysi_lxH{+Vho8@{s2%tt#n^wcq);02iUA*!|n*5_-c_ z$4;IeE~8(8rb5ReG0OehFRrZ_+niU~wrY&NB=TD1=l`9??=;T;3;a;U-Gp0k8}7he z_?!8E+WZOR*XdAC++&~na6cYIZ2vz@K8nZDS*-q5kN$`A|JLgNL+eEOAMNU<6w>q# z{Q|N7Z@F|3fZ|>{{_0&>c2p* zx-^66W0#-5I7<7VSo=U7+-ARab#4l2ddC9w=PdR6++VbxV!J{eoLs7&#r0Lb?O)U% zPrHU^@qE0Nhu994Y*c#Q|L+FtP0?#o#xGX$f1o$GmU{ct(Cg83RN6?OaV1?`od4HK zrXwZph0NlY^Z#BKely0GX}2s?_LKSff0_UJ{J+0C<{i9;4-oVJN8~5?3@7LRvH5>B z|F4CA9{yM6|9v5yuka1NLrv-N`F}sqPt3phiGB>fo&UFTiTsU0)t+;qevdmxgj4Aw zF%Z|**FNYw#~ef9{v)S^Gs&}2@ge)$+Is()|KC{qo{tM~5iY@HxB^${?`AmC=3<3U z{~7;j8qGFEf?d~$71r2aX`Q`QTdn_JJu*D)nABGGrMdRc(x1l*=$^?xfIo6CnXyhl zmdx?1u4R|ju}|w4uz$tUO^4&kwh!f=;Y_zwT%A(zk?pK zm)}8Vl=P89^UmMtUo@t>*Y%O@!YTWumsC;!m_!ZjL zWS@8NG}(Why?y-iv39+@iRKfO$wn*I?!LH8Wx zGkVzzdH)nLcYuv!+ncCw;s1@iUtFTO=V&Dj^4JP z?-}_Mj$zQh`F{G;k>ZEads@^-Tdhe$ABp_`gV(ELtJSr2>QCpY8>f!L$@%y6yq!v( z?wB)iHpb$7T!4#k2|D-bf6%AjP4?*D?<$p-xu16~4-e>7In5+FQd=s)$J$LlZ^d87s*Hx#@kYA@~jyip*x*f?b`5&?T zxW#d|;SSt|dvG7_M`xYy57Ec82fd}zMV6jJe*W)n`5$d{zCUTkf5WHg9b2_OcIn@j z&ck>VkE7IsQ??x7J{(b*>{z+! zPM7A{s5r~>xwSZqrJs)ra1pXUo)a!12fiH|20r!uR`T-^z5-WaI!?@AzLwq~&&N5~ z*VBKGrbFr$;rQ?KI0w6SiTUTt#)f)h*bT6n{v3vR<5xC{5- zK6I`%{*UgB=KmX;={2^WLAK2J|1#tM#=Bc~8b2r7_8R{uQ^IMo!+vdt#)kW)^B^9^ zqevIq2anTxhD+Nx`f~az-PyGLHRm<40Vy^eHQ2*b&v7y}h54m@ZztQVw zy06(k&%b}sy$d%9H_mmRw!f7>RwpJB$Joa>c%UZlrKV~TUqdzQNY z#m+}>Mf52=`Tv6jo};)bvYX7JX5rZII{7B59h+YlL0W$meh2ygAMDc3aNhX;gT7nO z4)2NYzw^}a0r?R=!DsjaUm@!sINy-p;SZB93O|tpZ~VXE82QHqmxSkw|2eGRb5VG1 z)|u0{ThdBhy}kNLiZ!i3%(DD3Ezdr1K)*)f^S2;HeKCrzMXqV8jJCNs~x9m&z)*aW>~1|V^gZ`&Df6)JKM*U;>1D8d@!W?6U*f8gxLgm!6!%Jmj&4bggDecp-QdzNLZb8^I zPrst?A^R4aMEuzpi}P^-F2cb3W5Xq6-+2FsAo~~bFCwqPbX<#`#oA}{uI;yvKX!_v1d)sNbu_KS*y_YYeN4Uql@L ze?t5Js5u&lYs56JFAOVM{4bCly-MGE^zVDvF{yd__a|F>hyFO0qgx%?gI;8iMNVB> ztG=x3QvWSe|F2f3ZdSLd$CLW#o5o1ju@!h4&*FK!fS2$xel@?KQ2NEvckV25Xf`I% zg4R-F4QQwTa(+UUbG;&+8q^~h^)mMNXl(|1L&CKoP47nhCqPm-`bah+EADl?iNE4x z{h;o9NBBK_g#7=9jVpaZ{|sN?E7Y#z|Fcp1e~pjjgj}pnf-rM## zzT!RZHn-6=j>PFW6K7*A&c_94+hraxc?shG?=ByKPTpN`+IKa%%< zR0pnDp!`Qo%Dfx<{T|hs<8@zLewkOAKi$3?SC`>ZU9Uu^a=&0lwD%vMTEL@*yCf#QK8|1D%o+b2b)+2L7w ze*WJX+FuU-&`%cK%=Ie&rXs7c0NeTdMrhj>~G-<;dn1{)=7w7u`!6 z+7I(zMB4pyxSv{M{&maP7G?Yd$vCG!v5WsCn((l5K8nZjL7jX=u0jQ##)_lX`y1|k zj0xdc;pdUPPTnD3Mx1}}3c2P-Wr)6j8hSmFXhoZQOq1Qn;&sHeUE&&NZ_<18vvx1D zt{eRw#X8yoid_ILg!Y46)n(veB)1Li;EPv#b@D(}W zyZDCu4%MaR_w3~NM_)0{_k~r;;kY(fY2_MZ~2BB!I8USD^7UjMbi*CV;nbtC_ufvu;6-;28mx8OG1fxB=I?!$@kwfpJu z8+nj?7}?JY!lPuxr~H!G{g2a^V^xg9(->$T5}qaF`0ev#|D8j^3*<|98Lyyr{r~9E zxAjE*^SU(NMB6%cHW}yrzC*r;w73t*kI=JUIj~aQA%FKG*QX8<_la%sKhxFQg|!i@ z==t@WJ*&7jPLr}E)}@JR-y$05u}t{PabIEJa{nD7*PLY>|KcOzJNgg!3CA!fp=`ye z7>WL14h-4%X+b!X>`w9f$(V~sAB(uQZw|fHY<%(p-*=mR&bRFXq}TIHCNDwCKDFw? zI@Hf|ZOE@nw#+pzRsSz>Pm7L*?5qDCF0=m?xC+y8Ev`rBVe20t-_J8ko}Mpn&ydHF zD`WqUR{s~P|H-x;>i<2;R;1}2=s%DBuMH6YZ`Oa>&|mov{$Bbw;TGJ1yAanBx`(_k z>Nom6o;e!sr&nzp6dok2*YZCgA4N?@nf2Ncesf3kExSkiB<8wD`^0}OB~e>;EYz($ zX4|pQfJSy!0?Dpp-rccqn{*z>a;!oHp2o9y9-XOUp(}Gt|Hr@U|M>UNiwv^Jp?Sx# z(1O;z$FzUgaMDa6z5ZC}K-;Ke;RWfugqQIO(!=djaLim5-(8WkQBO}I=X$fq6duz? zm9Ds|xzZ!MQ8U|i@^w_tblfcWh4Kprhd0F~r@AjR+AlHVnE9>Be|7LGw7IUo+9%~Y z)4Pv_cjy&&t9Q{YfA+|iz4B*<%nHZbF%+$ zjO!ci``fPmcaN=P+g|N|GXH;3vZJ&hTp*2$a0#lqj)u#~>Rm@e&F-V2{N2G}hH#b- zPV570DD>_4mJ?)BxQSdjOFo~W4`w)fc0T{}F@wWQ$IL>_D0%udeuJN@n~T)ZNEE1} zi`kK6(lO0_{DaVnIG3-TU7q6OlV2aa#J_zC`4>2@o9uC1FPRa}lDSL>vz=!S=3*Yc zP-e_07hoZ34{9GAcJ4aYfW|J@fF!-C+Bs(o3XAQx1WU0DC;Sge=_ma6%IGVRo25)} zOl(8<%0H{ct;IU5$G|g#LnXNpo6-M(z74YPX3vJKL-w)4&_b3!#BR}s$@_O7)~*p( zq0U%yYhmc34_xCrC;RR^7PgUb{@-?T2XUALjl4=df|nu#-)@n{B(7jY~G|pgZOu4&gA4qM#|PD8z95YW+WLg1r4* zppHg<{Xe#S3;v_^|El&34Mox!h0z#;xCUS~$~XDu*y8nw{l~?&HL0_cUD_bp4vpjr z-(UO&SIyHtWQUHkPpVA&WAQ0r0=?oI^|N&$x^3&3==!E=J0MHXjnhWjI5bSO-y}@N z6x0}-sMeP;mEN$=`R!9epMj8t@Ip|GfseQ_@6031yoGjW8YlS=au|E%Z$7Ih@ z`Jc=PXUW_=`QLMFF^+a)wY@s&Rfb9 zX~j8!Eo53;suJ}4f4$Z!M}`@%-z3fbu1Q&uA$#TR)nA_)uJBD(?Q|XXuik4LxeYbq zwv#*1kWdC}b8lOZmjO*=ECY7hwhOzl2Lo>n4tvQy>_dP1;IKc2wZ9LL(bsC_GT&QS z2nU4^;V|OZUUrDOoh)dUKM>~tR$NmQ)>u!l=a6^g{bZ`Wqb~18zRl8y+ph?tP_F-D zG&u&v7*C01iC)?qy=u@RfG75%mPN65ZsibC5q&ySu!3sOjLcaOXH z<6=lt%I&VC%gK^($iWG}Vm#YB03jJ$`=0{K(^%f>v2IKMB>`HgG;$2CA>SxiiNYn0^x<| zd#)fXCi_=}u!LNSWr$@!DH+Rv^pb*5Mqi25$d}KD<$ZZFgZ%oCnS!v^wslyK>>Fc3 zCAkrsF`&<9D;d}SuO|C1^{tT!w4nUbA$c1s?+>V7Db4{*iH~ytYt#W<^onbSS|^Yl z?3!XZ(70H>M{=hAfD-wCFaIxf!rvW#-#j$TmPQ8KP=2ZQ2e|_~u?wB+_|H`u=eBPT zng9Q7N`0OAdH%clvgM%q2W`mv|J$Cny@RY>`}6vH>)GkZuj{K%AgP{@W9_@0V-NPC z5Bsnm2XGMi^#|1LC)XcXs(x4hXVw2XG)t>RUqY+4LYp>2yK|(FF4aGQL(({mqbO*N z&%7}7y*4})()(W<6JotSoZhE@cHrX+^J7(2%Fiuh3d7cQ)^wSt{bKtljK&zG7ph~I z4GqQg9(8TE=R1x*0lCqh4KnKXIDbF3KmM@sNSJ8bWK2Q6{o&b6r8n&NZuj+hCl5;7&dG_ zWK8b=Hdgolgg+iU6rMYJIIKT#IIJrEcjI3_g%anQfr`;Tgf&xt2<5|n3abx(AD-U! zZTMGXYcp+|g=hOlgg=ZP8J?N)U07Ejy~N1y$CbmvY}@AGh5bXqpO%?_S9)rwJa}q& zY1h#3XJyS?+vZ_q@nD&Cu<`(?jFN(?eDD z*zgLr>WkQ`toXMvXY)NdJ5>8$+^d_<4fCbB01L4gORyBnP>QyF7ll@|B>p)x@4Fv6LD=nA1yxN|o9e?I$v ziT?HBY=SYz$KJB^Tmk>boyxx5%D}zKLb6>ruFV(w=R3%a(kOqcIBX^3I{wvU9R_aj zz1?o@&*&e(@2{U9Q-AOsoW?KUiQ{0G^liO|Efl1hmo!Eul*n_?3Lv6zP04eLZqhW&a?-J)NHXd#4xM`X3?;_*hlZ=1c z)@Cfcz0%nAX5-Rihj3?#e?Z3gG}$A*ceSzmxz-0TzduK|6&Htn&a)o}a1d$xryPHX z-m}B8`^SdE^rOg?8ZS47oFRL68ULTC+?!YIn^F%d18WYC4Fzr5U#PadK4IM6bqp7d z{|`$}aor{EL;4A_)|hi0z23Dow1{_1VvqaVX}|4bLy_Y~A@BeEKtUKyAA@3yL+5^Ai)deDo^4(0nnZ9Fu)mlkQ~{R5Q2C;h+F4;{kwlRVcc#bJW9CL$@^RC$i?(0>>v z6zktHM&ln1lWd!eno-4J3ONlD|x({+`w|EFjxOk%$-JpSCH z4`g{@)7ZBZYaY@*J2E7!q_4(WtiyU#Vk0&q`mS#!t5JspT9CrP$JTxzqYqz(>_4X< zY$La02X>-oneTbAHi!JOOE{-&$s&VZ`6A}I-QxFPFZz%!lUKI0J02?3{$>BTmpC`4 zfoyb7ajno~rTa@MqkL2Q9J6B2@o|DTw7>TYAH*Sa*2(`k+5b1swW?dQ$oK#4{Kfd6 z{o1v$Qb^M~w)*GLg?t?@=lAr8{C$@1T$)Ew(RsWdp!~{HLP5Ll8N-n^*SLrrc$;s; zTl^;u4GN=#M`H|%Q8QnE?7l%^96ipdjQ&3p=o9g)`3E!g|1DsD;>0@Qjr0UgtRsHF zwM=r{WK6+SlwbyCqEnsTg>Lj9zy3{&Iu1EBtEXDjQ>|p%QT0EW5>D??|EtTQ|KKd? z%*Gtd#R>nxdGr(Y=Y0AC_f1!<@LWD06n0Ci zir>c`vYH=g4ffLeun+YI_=oP}^AUYN$RwI}TgOfte!uPIZ^%&Ow7V;%)!8&L&IEh9o}!eFt_TfnC^*J*eGm{(tNrn5utZvN3OVX@34c zU;QS&>&;`>_h_}P4eh(wzI*up?_}SzdpnP^gN3{6%uh#ehV5I;2G(xOk+mz?|ENa; z8j)DX{$G9E|9`J*=)*qj#{nF~Asj~MX7+!A{a?-gXWRek|AYD=ns>4P(TX;-r`Z1) z_CML-xHk6nQRx(AB|k8YhUUf*yO#)`=87nsc`Zh5Z50Z zW54Wa{13&~&+*()K3-pr_~aDNt;BPj;W?sqDw_`V^N#P=DD4E2>%AXTsBhMkhcLmh z6EO*sF$Gic>-i_YvG2F(|L`re_!e5pw%OVrbA6lh*{5WOaJOgHqhF+x?3(#=zl~?p zOV2Fx+zLD+G*9*%k*0SPd5)tzN7Qe0-S(~B=RWAk8QvQj=Xj4Oag8%D6SFWIb1)b4 z&^D(i%qJIMAr>RO)cOOZMPUiOM_t)%%y21v8FJd!*-^*a*uCn{QgKzIi$WQ>5;aqb z!fJ9Ys!RB;s2>}S^5gk(a9A%cj(Ihy9~;$=39@#H`ahwLthQf9Jed$ql1*iN_1MuX z4<29tF>4LqN@;Aw8g)R0c}1N&R3v$ji~Rt9H~_phl`{>tMmMV>#|R_ytcDd9BP zVZXk6e;4{TnFDY}od5s3uvyxD(?*7^WdG;-X&Q%wYI+?8Mpzq)Y>E2a=Kr^Q2lOtK zUpFMQE%03EDKd?1^zGPz?seLyt6kq_*GFcB<64qciNdhX_MO;;-KY^)jXm@@cfNkc z(6E=@hbG~qaDr@{Gc@cIw;u;^5GU=UsUaK^K8&Nt`@fy1kEcUE!*CR#RvlcI(m#>; z*}q`8zWvGUe>6?vcV9a=jI!Toj6pHRVFD)NKlOjpH_$SH{V%Py681lt8qNMMX8-Fe zoFt7te%F)9{_hLJ6mlv`Fat9&3$rl?b1@I|G4Q2l&emNuN#55^jQ74kd|W#qwM{#O z-lOjC)<#%N&$1uloXC^)zq-FFH8d=>t>QxY{KkT?j9xR@_(Orde|p0L&wtd=P)1*g zra7MfV$UCOt%lX&+A{i)$aP37*VmJkh;>l6eBLv^AZ!%gj5zj_+oznE={q>ee`cxg zANlz&nllik?2Yn}Yp}tuD9PRpwoo_JlE~=7h+xi zH}n5~Q~vh7s&7a-{WtqAR!j@!=S(B74im&p#3W3{6ih`4W+3|h&m>ot`R&)e^Uobs1pb-fq(S(@)H#=@?Ob>NPpam&(&b(Uxi+TR$^7kw;uit$C zjQRdqGAG=;?ds5iRmw`{_M1r-yDFpdUnzo<+ue_o8a?)!~r%!#Ik9F3)hm z)uCE^A-!R>XM^GNA~ezC+6;+G&kCc&wRtC_$uUTen;weEaY$J=pj+M5bGRT(5T1zq zx|7O_UUgLlwYy3}9qPTyhW#bl_uk9Cl8`)55}HP_?+;s_W2dqG?d)Y^uyH=#Wa+GF z)PC+A5<1n#U6a(&6ZC)W*Z*}`9far`ltZ&JrKME@WG=O2x23HN`mj(=@Pm`$I9 zxtNFfSb&9Cj3ro#IIn0KS*uK`a}V{d|K$HqCOMaI^pPpGPZ?HXHP&Js%A1FT^>UD zi|c<^N;|G0wvpV5YGltF66(l-52uHLw|xWj7PNWJ`T39TKmYFmaa~A>@Aiy(Qm#pO z8?vq~x72l3i*t>W#BIiQ?7&X!!fxzAe*HXkSUvLV11HoyE7d{l+c*~9T*cCmX-|?&U|AiZkKP1?ed3$m=o6|Y_P|vn(*jX6%(W}if zsKI{v0UX33REcZT4mnIeih^!sHPY(L)Xbq_IK5}8bE{9Y_l8g;oL@gzeVQ4={vN0P zU2BekGJ7Rf=(`(bpZxs4)u)8f^oGOQCI{JF!}tL`p-sL=+hmtE3fb6qN*H6GwnO^* z$#F=pHdaecM9Oo?uM51SAWRaTjO;@7f^hFL`H!q+m)A}5d|hXQ>x^xx#Ej!@tfpl{ z!xYC%MG0nLCT3wa=Acu3(k0DqL|@ZhGNzj?cAnAdpQ70RqW!r~`%`_?PNsy@WQYB# z*!gp%GY{Dtvt4}1@whz5>Ad&2a}DH`Ob)2f~8o773$#= z{>`Pr<;sbbWZu8;?Ga%$eJ$2uJ!*~p)os-OzJqPAjLH9(dz<so%E%Ctu5Ar z98u&u9u$hiSIgg{$kC_~H-;=moC8ts+7t7io-;rNfI+J)@4W$aJ({K@~{rPy)o z^tk^20p}j)_z9SZNw^Zza1A<*t9LD7e=lT5vI~1hdxpjANHS+Uzga&(%WnMxd-V^H z?ZT<0>_~JtuIT;W=_&SMPu@!J8O8o)i{DPa6FIhNRybqayqC<|-z~qe zzuC@r+b?f_Z*^a6@P@e~!y<9z{3nvaO$XVhWMbyX@Rymx!plX&!)9#S_+P@GmrV&T zjhYfRZv5ZOSDPIEG+}ah0hKFD!iJ)f@W-`Rhv!FM8P=Cf4bR~Z3nzquMr$R6Az{sy zg7ARzJcLIu@MS@GjO^p%`UDxv`4v&}WhsyG!-~Sztp(vJdX=*86>Z|`okPN_qpTrV zJSNnjZfmhVf-&mmbNI07dtW~)G#WE&DmcUY8x>j(kJiUJT0iPyw)&!w@-MXZ(#7E! zX+DPycoCbh1<_CMl`+nHv?y%BstFf_e<{2wylPu5D(bYCr)YN-UlmrD{O{rEwbPCN zT^1T`YsRxJmxn)0z9KwRa#>h6>9X+rge$@ycMS{W*C{*HKQD~_FX2y~_lEh#|K=M1 z^L_n!t@rNOPW0k6Y+4z@8!=AZ)RmFP$iF*&wyY?$l$nb%i?0RR)mhO;IZbw~^ZjP@ zPw7LbpKG1L#pa~gH(~Bda*1^YM-_#8onw{q&QM9-K6^{}ZIt=0v-9@&3$ zK^RY7h>LM4+B}EL$ty7p*C6eA#xaH)==t?O7HA(BXIY~!;aB}9QZa4Y?GG^rPoyVTiaV_#voQ(W7;Q^MWky@+=8BJu%5 zJG*2Yf@V;kqo%qM_1XiFNPvIHV8oST;52W-D98l*vCV?cqX`k`^ zI`%zUXR+_mK97BmG`$0zrRE>ZbR9EX$9(6dXJ+#cAahgs{}m4n&pFQqyogPxnaBQC z4{xD2%vLw!ReCL&v~7||pb;yUD9^RoqW#%udo#*kRR6NO+UXU+ciH9F_1|Iy9gr)Rs*X|MW^ z%p#|MqdBGjNt&%I)&FQOJ>EB+UabB{Tz~6Z>3ol@wf=u3({p`4QwD`2^qx|AbLk+7 z`c9GaZDny{{mNnD;`;j|$kR|WYEU?XJPXy1tCvn}SDzz%9+HREJ!rIF0vC$A7}+7l zw8PkNDgAO>iD_7&E??>1R=J;Yc6>#%=ec2UxW=~Ya070{&A1i)4-5*olYPqKJIR4F z28FxHd$9-);2}JM$I$n!`uB2mFMGNFtfAovaVt=cr=qS6e&YQ;Bm7+4)>|9~UbF^> zvAMYZmS#s-^I>tQuvY(zwx3vkYZJX^r{{+)^jDFiXOTfKTD||ah1$`voi(xHah_97qBbw2UPW0k6yn+0Gy)(v{3gja) zsqNmh&bPbKH(aUxyR<8S^?P+)u>&1gX@+R$ET?0Y!>17rIgQ}`bg@jn>tI*MHf*(;pc$N%5}{{ymW z{kdV7^Hf}@eSbqi7(uUD%MWAax#2W=gMI382K_8FIWF1f9|DK`O91DHYnwDSoJWpF z^n19Fycp3BqT4<8;8NkskzJ-;UZ$PC)pekDm;2k}{`&YIpm7fW10>OeD;+Zp*Wfzb zfE#f$Zbj!_`Ct8(_b*!~-HqD+IMKh`JlXR>E85V0Nd7;n{ZDo{uFAW`9<$FASb_4$*Mq}T2)#}BJvA*5f|GU(a*kqqAcs1^eMl_=xosp&bAJL5- z^zK$vDFd@)PPx&X(*BobtMSD)w9nE1h%`FTcdPOD#&eJR*BFcMzgrs8?Hd-tYvjOl zgTou-TX-Aq;(ZLudUn^|#8$%e$Yde|O9OIPw3@&h^FY zWB&))S}dOw>z65Z{mR1K#qxP^XddqUp>>S+hjx1Y`_2@Hj+C}xo$m+rUA`YQ(i2FQ z`hKvYT7F8%Pvp0*C;$JUABTkR=|AETe2yasIb}SCVFXS??Nap<>d}BkWmH@@H@}W9 znP1<3AN&7+a*S+0s@x(^tnc5sm;Jv~{k!b%+y8BQhK4ho=PaCq^N@CY96ujV?HUH0xQs|4(~$uaOWwMZn6AdApavbh5av|pB(?Q->>Tb1=5$sQ+Nhd z+H=p5)%*3gsGB#?U&JQ-%0H%s?T9!(ev56dBCG5!=VMVzZ$vZNk@uH3HWKaKPT^j> zhBfSkIQ|~j1b$2SZM=*3kuH%(@gcoOd%Jto;P5g1Q{=kj?G|~Q>@64^J{K4LgT5sD z_0N1wev9w%BaR@v=DWr)jKFC)0|Pg*ALi=+lZWfIksI*a@sF%^!p^eqIXDmFu}Xf5 zd1?hZJ;N^KpTBp|>$#tN{`6*f{eO&qc>cbJl*ov#chc`h zTvIN2P#cqMoZfGS z|9icEGC%$!?=?%WWu5z9?Otbj|MR?mr0E?P_K`$0gdzolFElTKdD(A z-h%x9Ma$Ihqm>=S>UXlE$eaZGot!^-K$*cN%J&cM*FL~Q&hZEyLtBY{8ZtFW|APMi z74)oe{-?<9F8veeO{lw9s|$|mM?YkrS?+(X`$u^&f8R66p5+-l!av~1kkCJEaCpY? zasJ!6p zYWB+@*WfzbfX+kiUt6V{{G0WISN>1@ zAJzLEuKz}9obVUCnI3&4ZzXTXoybjfzmwfB8OJM5jAxwTncZ!BRf7LP>G5xHMf?Wu z9xqot0()< zjX~8F`Ud5zIJO_h_@i%eTo=1Zot2y~pDij2jgD(ZJ37&e*YF0q4$9;Ewf)KXkAS?N zyKru{^Eg)=duyI(?5*JV7+h+XGGdIrLo&U`xaLIlM`fIo;2HYPYckp+(tR7%Ddo-q z<&L!975)&(y~-We+Sqs8PN}%NEUbB~%-E*3RCqGPf4hEczfTd%_|M5N(IdWlwz@%F z^u5SURGv@OUa@W9TqmM^i10>m^{I^e|I;(bXnRl=})E6ud4QNCH zNi^Yo*K(obF2<#}9KV|XH)(n3>RTSV(Su%OkVOv7GnR*z*~>%gyyc;7?()z+WqC+V zSgw3s9y&%Z54DLWjXysb>d|oENjCM9Au;;NkR0`-{@W+RmCiQ}*Wfx-d~1D@&Jb>( z--u58bd7m3bdP&7^x$vi|1Etow4fDjGoK9YGrXhOPlj~Klc8gZIM;QvbZ*7%xD)A0 z*I2eZ+)eLExi{QPUxeIR_qN(Su6M81%fkcWs*0C~hsZ}zvuk;HjC=ysQ$5#%o-0-e zmm|5&bKU7Vdp3#Po{u(Ui@vQ^zIttFr>AzBzjx63Ht1wiccB}9^M60z>QmBu2G8MM z?`i}2A~scs^#(eo${}Kh4X}0f2!}A z9{q+ITjXE#{iv;CTmPNshKtF8?+1rV$qMW2t})-T|B2<{a^b!e1>s6^8m_^>=l>9{ zBioi1gd4~kaWig3Tw5izfE`Hhk;miOV0Y5*MsAFJZ+nL99aj+U6}Jcv;2}JM$M6IO z-gwf(cJ3p~Lpj<1t#FZYYMJ&%SqM*wdj`*;>acXfun_0`z9_s2H5-*lwS{2|J^KFE zcggqb<@-9eC7HDSWc{yxX+Yydb;BHW#9Z~?Z1tvNatGCaQ`G+@>VLAWNc}%r-CnHzCp(6l`?mFg(8iX|_rFcZDBrj^tj5#T%fi2QEeo&N_6D9^`cU}8$|u4zQM(lE1mizb3d0C`!(x3X3kt(& z^fS;jPaYDNK;zQFaF)1pP(EZtIFB5Uz6aQ`WdGOyXSkTW6qjS5c4)YgoQ7*~9qRON z*RIq@lW?Am(%mZEE`2oB&QoViP4@Q<_PY@`<5t{`JCXf}sol+|X73Q?${SVka-DDiGK#qVFO;oCbUhGAIUfe<5jX2X?i2s zjGle25t+TN5%Jox7-HKaXh$b{@fzO1Td1APK4w4G7qE|U(m$PjdD8!X0{fO7+eWsJ zW4j}b4s>qUKfsQTbKrZ})V*XzIBTtd9NOwehPR#PUA&JEk=`YZJtM=%^!U%f?xQ2a zr}XT_Y&OSdV;c79wNJD=s}76|c{_BD_FLu1@VR}zMD=FZ-Q~Jhv*WF+7T3>9u5{gK zv@K!ZukF{ieq{KT{2uA*k>N-32vP~>?sMG;Z>pn^-Om=^?c9f41HG#H)G$o^2%Ls9 zP_xte062@@u*4WJ&Y_=&rV?vVA~AP_zEu5X!#!Wr7rV~Uu4|m@LK00F@0bg5F)qdB zxDwNF4LY^qx^}B$*z-MPuW)9Lx&S#eOQQv?6TBbqv3;Uv>pW@a>(D=OoiuL1ji^%I z+)P$i7qKDy=kH@ab^7mb7gxVuS%gMD0}1~COrmKof0mi*quJ^seE?Oyw>#tUGyL}! zchmFh{{;2$B=ztF-(!jVMJCZS+L{CN*Z`|-UuipjoB!^W#v(j`hwumh~tjUWc>GUJDK+jXII32foMl}itEK|i2et0 zUC6!-{Qs{6|Npr5z+1Mxjd$@rKE%iP6rbZu4Cogg`0;n4;%@D;AQ^L!hJ^nuLsZIJOFv3dJSbh$)YHZsn|G`KwFz3uHv6@g8lOj8pQmGi@ps$KlFm6e z593i&!hXet^f)Ij`oCXHzZ6Xw{qIPiv0x|%Pv74X-`_&*=Q+;%q4&E~`ncTw6`#uY z`lIse15DL!nCSZWC1i_T-yZj0ssCfMbC7@U{;!nAG+cx0a070{&A1hvOXWXwqX#F~ zf8FgF%ZvH@Z*l)*R_-_FZ=-V z?XvaSaM6aJKQuff?h!nOC$IwLcnZ(pIc&g-*o0r!Ut5IBSNNV+u&GAqyKD5XdHvC~ z;`?9pe930CBhE$ZBzy51-oRUU8}H(Mw8?WHk|+H8Kc;_*&(R%i^nLQ@QTg+L{Hbl8 z! zp6dMQrRUctvMu`Zq|G#i8#B--mGg2hiQ*y;y_?(55~95cvom!xMgdZtC1nF8mBC{GVuz zxs;vTjIW~`J?KRSS>(_>-}t3;TBR4)x^Ew6{L*>i9Q6)VZ5;4x@uTW8TKQcpo3)V|{Lyfp=$mo+qi&@ zjN6^>PTY+}i2n|JfP4s#pws?c#h&v-&l$h{ze9fg^|}17=JT~8+r5v}6y<-3^565X zFsE?MTZQ2_j|E5K)xLto)Ydq$A8bsJ~qK~F>DO5&9mA-e-WEd zt`8u+-8uFage}57yIk8&_B*{6xfb8R4)1=OYbF0b((XOZ=R7~~{l~AWy6I8f%rHX_ z-AtHahJ*~WbgH`buHFZ`JPy0;GKW3v;e;%YkkDZUU39s1(@i(ssvAw0s;>J*UDWkg zWZ96EgoFtRiJH<06P;p0LPF1Zetw4SBxg?^k8}QbJ>Kun=ll77zPHcy{keSeFJ^=V z{L4^3lyE>#w`ycO5JXHHbJD^4#+e@qUWe_KJW!h7bytsx)Hf6Jjin*Wwd z--3TS|7{z;9oU8Z2in8tLgAHX4$C<|g4QHEGX#Im6hUz}f1BwQJ)ketyo zG?1H)>R%60CeV-KI8LG+XAs98iskbXl%h=DFINtv_Fo)*_9PPhxWge$m;Yq)`i zk@CO%_IUhPzDn)?;#H=dK_t8=nb_oU;VW zumY>F206$@gEi)j)*v*^be+ZGHTR&^T7;yz2W|7sKR9LGIu2iCYd|f%4)wyt|Nm_f zM_k8Vk8LqtZNb$0G%i%rS9}A-C!QqQoU=oiU1)NwxrhG;ls^;paHr<~ob+E8dLh38 z9H5s@^=vpqFG3|fu0beAT!T=w-1DOZr6}7c{cV)~P+6y}zQJbiyeib90WCO+sD<{nZ81-$CgQwe-4j>2Ig>hnjnt;iP!laR%p5pP`;qH(sDO&u2GM zFJGcxLEB>OU4E@A^?z}dU%ol`*T@?veA#?r@-_;lt5>JWx9Z)y-1m`MS2t2VR&JDI z{b#|VuJ}JYYcK~YK-!h<*fdVRc3N8KN$-U|=!dxP_W-ify7@um5Ddcz)a+!tC+ixu z|JSC4G4yfBLfrGJ8ISfMNh(ugAGqih`G2PSMCn5Jwbp%AD_;=jH^uS)3C^F0Ntl9Z zn1NYH?fi@50rzcyM>R+GGzdo&hO;(AgTHJ@#b@Bf|wd&JYx7N>9|Eia3hN=U_Q-&(kqJFS2 zW3@l%&5PB)^VPNVqiCC{{zdCTeoM7K_~ko(l59sI|1;z{6fE~#;*b04Uf{ljir$_J zsc{b1DJs_gM~V9@t>%B)^N*JQP>GyV+M3wFOn$}r$yM5f>UZQ(BcKH|RvlcS}vjnW#@`Q@*CB81c0 zMJT*ujF{|+xQ9T=S@jxvarZ%mbQkxvNd0H@ShvuR-vA845DdczjKUaEZ`Y!CjUevFSHlVa{fFAq& zO{3Yw=|y;C_iAN(Y>D>AZ25n^{GTWP@02IB2U7q4lxHiETVyRc%A8w;S~Q>qN25Mn z`HLn*8(YhA-v&vv;h(Ml=U0bE|NqOWlXqLy-{eX0ZK|>^|4LS9r+3X@qZ?se@C<31 zyK_qj=g14Vge%COE-jL67rq&;kvCAdTK-?I9Hy6!_iUpR;Wqs)Qvd&xM#}qoR_qu# zB<#!>7T(Dm7T&!$EbQ7iGVC5ZBD{BEM0me$blB5vRLJW)DtxeeZ1`~an6P*0n6NKz zY}lVYE_^h0T$mxw`tvV_x)U#kH3SIyJK(9 z9TJK^?ZF1oJ(Om!lV`GvTiaIVpKX;d8|P~66%KZo>mY1?xAgEayXYzQ_C9vWUB zJ~RxYkHDrA!^7tFBSLQf;b9}*=r%IEd2eJG<=8m9)opZmd+V66ZRe=4ecwIg>C2(=Y?GFbDI{AY5Z#`yZfrsQnMJ?MLvy zJHPW{X#4ENP_@?n2QS)Z0T~3%Lz-^rrIc&}_J6RZvUs5V2L{`JAlv=}L+w8>(*6TvnsmeSgIL_wucGEE891eV=lr zie6am`B6(Rz2*6(tp<7vDi^XnUiAEDo!5~Sj`FJ+o(RXulZbPl+Q~DB|1fJh@4haZ zqsM&#$<@jQw489CVP$t!80k8?KlVk_Qbjs<-H8r08?v5r>P#JTz{8S3WDhjZ52 z)NNJgjlo_v_Da?WSG!u>j(Yl$`Rd>C>R)r}OY(lc|8JxIKPoQj|DS&{404Sj7~?*M zks~lFx-kw7+7OM}5>3Vvn$cp+p%st+KTxj!&#rJ7H5>K+_ZffCPN~;kIkKFewm_D6 zCSW2aq24{!x%VmbW_4?md!0s~fw-q(a<*rj?pd`pX7S6{9+^YVL;QE(0&)=woLi#p z5!dxE;a-M{f!ZH;v@-^4XY}>_jr#w1{Qs|PW%E$qcBFcmT;ZHA+E-R_uR#uy%d9J0 zZ+_5r_K%J1A9?H_TiHL1}N2ac}Qd(Y|>6k*|jdpV<*(XsP=BEQ1h~PFxj({wX;1tS+r9TSCKTv z*6E*H*XSK<(nD(eq=)?Enm72}M9pIDGcxzM_cxIJl^)J`e|_Ck5A6bMy5>IsNM3$; zZ>f3W=e6I2>yDo2g|2fU^db92eTw|e#@3v*Dk*#3O_$t5V& zr!A|quUqRg$_M+r73*K?Q&->7zkU$c^6OuJrhmO#e|n$(^;Y}J>BBc})IZ-I`{&Z9 z_tuIey$$R4sSh#FwPG840l5fEune2x*p&9-3i>LnK@M^w@!jX?!_&862X>);u6w{9 zdh>MGUi(bgOFw|N(faa}_37Xfm(X$PJQ!M^BCwYsJx~B zzE%Hyy}mkml;3fjL_5AXzIKK?mmTIDx%pb$%P&1#pkKljTt$&IQY_7skflR?o9us_ z|E~#i12=IScX1yUo#v#2jW#r*>7qO!KmW7+FT`1`EICZph`W{^{|!^`dPnZ56U5OI zy->5$+FkO|{ zwOw1YK;1HIVF5P9IrhfZ7SWer z8CGBw)}X73ZIFz%@?0{m3*JI*!w&4i9>jGId&vVhgd%j5`3_{Xujh=GMzbHbKe}$~ zr!TSRWehVyI*R)TjMqo#X|De@_cJdeTw=m_^#9TZ;hMdp*am2!A4S_rmj?NqC;Ls z-1DOxB`H56y>RZQBtg)LRsTOyL1J;5`BdEV*S&PdjJL@xkCG^T>W3Cj$}7z zCR?~$+1it6L)Ah3e;h^)Y7y7P)fcG$U3a4Ujy8a}e{3ziKAWvw+`|xU0I~1ij7{Cy zz4d>R^ft6CcOAsO{|I46VGPEhaJBIZ$Fk_9;*K_e3G|7G|J12KxwzwAyQ%f}ONCkJ z`M2|PPvskhp;wZVoHGT}FaxtN2lKE14Z<`E+vL2*|34_#MksKfYxVygc>WWf|Fq{P zYcG3#vYz`0S@W5(eeo>8GOR#-hA_RK2&?F+^^fDNOQ+}HpRRw*<(Ka}Z6UXz@MZfM zkh#)*!A#G!)^ok$ew0Id_*cyJT-;@3x#LB7`oG7d3zVV^Xtgb!ebBZxlCL=tt2;Poi}u_+V`HW2-kTggsbE=+(6ej>u009t^j_$Lei(p3Xg*=yqIrpp<|HR!c4>@Ou;nFz%0x`L$UF1G|e^6KHa$ba^vbqt~AcR*tq-y zdVX~%qBl?W{4cvNdKKEZliaOj%XIPa%P+Ri0oj5= z`cd*Y3g{&no-5Nl2JUuLa947d_4S-&(O_+Gl%N!4C`Sbzi> zn{Qq-y(cPXdp3UMC|jNudhzRnei(p3=*W6L3?Vxw*vo+&fl-L#@ngtw$if6n#D<}s zp|`feKG8bF04bi5&gWQEZ*o)Nq19i^Bjesb7syMf*Z#OdUX?zYUv`iB{jIOKN3rv~y*5T3rkMg@&+a;dv z=!sq^?C<%#mp=5;9_}sMm@a()Dl^?1%29?v{Dxo{M&OJ6k5SyY(qP=TVDq&QHcfdZ zjN_Mu37CkYed>Sh(2~f2vPg z?#Z7>=E*I3(|ooyJ~^$TC!+YZTi7*?<-t#ilFf7u?p4r=LVS&Y-LPiExhWsFEJZ_;24! z z0W`kfapn)x>Tl`e-7~i1c=JX1|Ex4{)f{y0*d|S)74cs+MYrsqh7y#bY_z^DDo}|- z?x~OS24M(>VFX5D3>vPR-;XAJ;pUCz#p^e>>KiA~R;RBt)BN+f=AV-_!qwh0{~Y!7 zBOT_?ug(nP#FK>yn25q^{qM%7!z6lXp8oev^9<V+Ow(Hiuc{9MoSo z*MMArI(ieEMKfE)7|*eYUy^O2jm@Bif9pZ})(#;@=3 zu)bQu-7wzsqv@3AM_iN8iujMeHdKxD{9`@8`%V2Hp|9uf@A-Rpe&iQF?FYj0X^#4V zjb|epL1DS)#}@vjx7^!x>lcO@U+Bj!cG>fv_57cDe*W8psrgJDK<+}E`?-hQi@48c zYFv7$^vrz-NqXB{-#+$1=|$%77n|2#GFbjc8Ojm&_^d>cbIMSKT72A_%^U~Z_d)&I zhOO-HNZAQ@>(lDnx2}{&*UJB7)u+b4KQsPK);uuxj$U_}oy@sMgpK=GynW)?&>-#> z97W+l_Cx;1>8brk-N#9KJ1W`0D~7vA#5n}}huXu@yV!gEsj#oF{RUt8s_~Ym!k*b* z4SCbQ8s1y}FB-1&xH3c zKNBuF)@48ccTRmNyu0>GVXtzobJf$~3jdBh_6sEA`0)qIt`C)m9qH=hEg7Nn(ofQ^ zId%g#(WUI^+Gb9`GTdi(Q1--l)(_Us81Go^XK8WmV_d&*SNK%>XGBK0Pk*$2A^frY ze?l5SI=wp{=YMU}I9EMa_}l2|STFQJKMcSi6zO9We`hW zhf#xCbzB|d8vY~4;v7QtBbrugA0V~=-cWUKZ*_2{`hRjp7~(p^Fao1cIMwoq zRR3WdJqwlU!V1K_%gQi;-$YEp6imYm%)%VRceNg|yqw1!$Bc5Zh`t0xC)EGvT?6r7 zzhw`^J&c20m*@Ki5VUbJX``L)D8TwUNpA z@6871wBRU?fYMR>i;_Rf3^A_ z$@a9+E}k>EfJ?Z7tLVCFER2jcyc=Za2=gPz+eqyvJm7v>^HkTyU4Hk`)?1vbwGWoN zmzN)2Gxyqk1_1Rcx}#pbUx%LbL*4GD6%74ZTH(c?rG3)vZdxyX2m9J1b)ddQwlb#o z-=&pzyO~xo@^8{AyZudCQSX09E6)3ew32SO(@F>aU0Q9oYiV^0|0*qc{C-+4s=D1s ztDf1Fc6e%6TFvsmPU|DSei(p37=mFKfl)~9|2rm~U6FnN+7n?6zj4UI1mujBezT=t zauW9xOhfD&bRA{?Qm)S6-sBlKuX&I*i(aUVjdLjH&`X#4wwOm>fYiL|*~-u9o^8JJ zef#HW#k)UGE7|vXS}9pp{drn>>*r||#h<5DVxn^vVF{LD1y*4Va?s%2HIDpQS`*ov z{j;KFek?a08NrR9pFW6=GyEoA4a`)S+A z9oU6E*oy-=gd(KsU+HQCx_UlHtC{JZ`Bx!1LHmrXU+WsHKTm6*H;?>0tqH05d1caM z=I3e2jL*|r`+lC*g8bd?S-7J(j*}>C{aIQ;7}3jEUg4*=;u)BnJP~FEUo=7IC(&eAvc|w&p5jfAwkQ4_R|r z`9s!m$2sCh)|&ra_aLp0c={pQU`Lt8{gsSz*m+i`zjqUd@{nuILaaOIkn<4N05-`Z&GOFz?nQ`e|Jvqi=d9Gup)cXL3@fk-YY_Vn zIb>W5luK^GHgu_XQuB3x-hSV~FV*fdmwIzQwt*4p)~rkOI1s}Z>9>;|D?U#vB0FzN zA7mA3(SQ~l#c|Z^>=sUvkJgv9)6d`>ntbbK>7iw*^zlIXd`Dimu75LJ{WnuRft3Aw zxB73NI+l$4gzj?g1zf@vT*Woqz)hs)|7GY~tIM0o7JdF!GBy65t^a>V`#_y~n5oese(@X@Uk#ZW|0vcz%6mBe z5%)xzB%Udlh8d_Istq;T+yZ(W`)Cqo4t*Zlma^q9d^q;evf5k&e)*nt5xE3~!Y?D^ z9>l5n2ad%tkfb!Zihspm&*ht!WqZ!Cp8bS6pjz9XEJ5u1mywlsCpyo1knbNGuawEY>+pj zEg*Tx-rHn7_KH9C-^Q!r-s&1?5pQdZn;vZe`O2XK!W}{p$`EY;vF~3+FFm2`KrOui zmGo!>hGoIxkM$vLuPTeon5yo4*bh8sxj|GZN9x0d~2 zf_b(#*#lN(hMSJv#$DV;_)}#!x}zt0p%40D00vYn~l0~2C|LH@O z=%<#FWwCEfruNArue$dQ#`V{K`iviaJrib#V;(l`F&<~lXlnnzSM;-|>SvQJXr(8~ zw$$Br~j)zTT`I_->Uyh#&z~bc0U~!h^xypE+Us;8CGBw)*$Kq=a9MBf^FzD zk75VeVb6*DPwfpz--Eq4fI@x+ci1lIaZN#q{&*3+3~^0;#rTIdjxzG`|M#YQemvga zU^)8-s+?1c2DIRd_O+wY|E&H$n$YaJEgky*NTLl@y*)n;qXxCNJU{Aj1U2sMq@LBb+nztISe^&lQ@UrasOXiw{*L_pQj8!#a3ki;{WIFI`2N>{@Vt)bJEcrJ@L5z@0{kH z>cvL&B4Qi74ORE_|8ZEkQlng{y{!LF)*sXV7p|tx8e8%7K|c&YeScxJEe6q>-B%Na z(1#)ZQ#$f_5YJM*#B?%e+75v0QM*{_J@{5w>=o+{=+NitFQ(+c(ng; zF8$H|!&~Uv(00+iopq0Wy+6c%zK-F)1G}&Xh34uNnE$tzp4$KaRz^5LKZHtkQJh0x zE=(D6rfdJrR{qUri_m5$66VqR`!afJ{of4b_h;G%>fRQ_eS4B;_5V+69|%*62DIQP zj-zWrdN@gTjAG*_J9njrGvqm3z$IKk?E79Nui*x6qVR$JdvTjyYTThjTjMVMJ}Qj^ zRLoWX^DA?#$UPLhmlDKv0{`g$yZg+mzbw6@J9^@a`M(!;!&deUq{jcXAzB*M%}7qy z|3}rQ>i^Hw{}0sv+CVA$$O-lTG4;Q2MZQI`Z&K1jKOJTL^~+H~uSD)V-)jxqLy`85 zI;fv(4!|G`iPsmV5lzl%?)|V#ZyoO(ZQIzsty2D1zpVZLO0+d-|C4o7wg1T@!VMG0 z2#iAfZ|@lL(f>8Z(X%iCssD2*4_fxgdmH6JVcW>K#&@FQlTaXEPa*Tq+mn`@feL!% z<%eyGa8bhgVhN9Uk9fXHdJzN3fq+uzzn@#P>n z%Y4W_*7NW6|MevU!*ONb7XI7fv6ca02e}J-P;*Z^g*<>mC_>!ZA@1>-yktz=yKh?U zyYjC>s{YNE|Ii{#$1-hV{+->~%E+#E&p;l<@#t^8=Si~TOt;WZrsiMV9uVUH|ITpd zE8EVI7jOwzP$*3mT(qAQz4Vy0bhZk&Y3zYN@cB^M_xVtkY43aE`xU4}RqyA`FMB>5WHW~=I?3_k z$QGAy1NaTX5Ddczbj*7`j3PU02ZS-?IAozK3=9**-7An+|9vrAq_q zgywSJyw11x?vrHPi=ik_{zpls{GZ{Q4D@Zt)cVu0%6W0Dz$&ak4sx*t+t4sn{vRp+ z@#06zaPcE~S^7UO{ZEzug*hxt4OyElZIbna<$u@OA&ye)BkPqF76}zxio{C`NsCip6G=>c(nefA3gT@n=pVr2yGq8 z24zAk;`nz}qxu<#dw4&n?eG0$ct1GeT{V!Y|En$aeiwSb^Sxg@T3@+w&XZw?c!yyG z`p7e*$T1j)qS5RZT=V_`znrJH20^e~A& z1zkO*Q*s7oVNUd0@tpZ@*8jyi_+@38GIJjPN9%tU&`amKHd6op$zrqbkhUI3V< zMt#8T`oCmpp88K+7Wc)iXw^@wHa|}si?9UCumY>F203U@M>dwL{|bzM_fWt0SHB~* z|4)Yc{jB;Ahf#xn`hUQ^I7D{N(H_b4F2+i4*=!s9%20*edHRe?pcs-bhowvj?#~#lI@^^?Vx<4_5)7xtGWI}XeZC0o_>zJfI9jk``;z* zEBI&jKmGAyq}C~Bd;ded|B>GRWbglFZ5!uY!wuZTZQR9uge%_vWa$%eZ>QAyv(?fk zlG1VN|9>a`U+jOvrHi9GdLsXzeg4P-vJidfZx8-ST0gQRQ~J%2eg{gwWCeF6`Mxo= z_ZAvkd!={SwePFp-Q`~m@67CLf5pE_+cEnmX&=phBJ3ZV5%#_EY}mW_+3?}a{>JE@ z3wg86dwRv*HuFLlApXuNFNQ&6esv;zeBOSDbx(!^Tcw?D<3d%B7qu;42#2@7VC>+V zp|;x)YYSfp$x%H+@u}~HqGR6+rPisHbo?v!i|;C&PAbleCR}`-Dy7zZN#1{#wXAkQqie z|Ba!&!kd|23ETF4C2Y?e5Z>zle0W=3curZ=71F{xOZ$a)Cx1QcTKn~|JHvm_Wq~tzX)ufb1WZJiF{epnT>m$P zjQf92BWGY1<{i?9Us`}Cjk5@8v=d8__Yt3CpK6;k_`P1lB-tu41x`+s<5 zSi?UDx!8ijxz-WP&J5e=rIU>fuhrkC??Gjrz2A1)1CA`4p81d4|0B(%7N(A@KV|(p zJ=Om2t_knL`lB}9< z{lQY}57-)NP`lXNf7B1O{-Ce*2f{UuWuHKEhU@chy`^kL8}jGtN4rk$@owQLc^ris zr4#-q>7`rsp}E`XXHdDF{Q%`C!#REza0yp%71wYBH*p)W?u&CaqHXgo_kDy8WkId- zw_N=%y_eu0)&H5=|I)-Ja{@NA`*at+Cwid|`e6VXHhy9K&pve;ck6b~N4B;8y#24v zJN&8k?*rxDXWGA)wSTn@jyN9skAuWB1j8@__3HY%-TKt@IOi_zeLsdi4ypg&4QGQx zTmzJBPY?O>Se9cGP4=tJvpzW`s5L9CX>M zr)!e__bqie_ZDo!4y69U+hJW4J?;tDwAh>sdhUVr&^BH^pRJ76M~H2Sz0rSLdN>ee z7#9waMJPiRN~FJ-28$ZygKBB-0sBpdw8uVFe&^?QwT0|VY&_Kk>`LriN4RUrx|M84 zbE91+G# zuSKt0YW#sbaz~mNYo48W`e6VDp?Y2>CFe#;ZuzR&_|%HSo?!tE7?+K-yeQs z&~-B-j3cu!0TVF^Ioa}l>|YKHVG4K0NoxtoPWv&;AoImBi%i-7CfFyMUU<>?#aVm3 z(M!GKlGE(`^hJoa`)I>2C!?)m3BQ_F{RDCa>ecD1$Tg^=H_g;8nAP|2xZ9Ex2PfuM(cl|mfnCC#D4?Ce*~0icf@_WAMMZE)ld7kuXil3 zM_c?+VNYVy-7Mv%xdrqyIEM@9T=HZ{mzFvfS$oUagi-W3$7LLu6_2Yok~8eRu%la;z};yN!HMJ~Ou?oL-p_4o z=J#cXeef7xLcz4V^7*|)O89Qr&|_SdfJ{bDE=rVI=C)f{{=EFzbn zzWl|oj9h`bYHR%`TbGVi+-s1ex1j~CD4O|VC|>xYISMa^QnGBVxEH%dqxJ8|aZWC_ zU>kN|7xrK;8fIGmKG*tpvU#a^SBiJ7JsZe2?y6JPzvJ*l>)%mJuS2~sN0z^oN;~-- zu2Zc}LE&dFg#+SE>Ted@itF;-7jmx}i{)QIuS6NjQFA#f6!A~ZFECzHMz6OnzV1#| zsG>JpJKt1oEjhgbZL{6mboVGs3yP+Dex#m%zIaiN$N&F2AWb>%D30SK+HnTw(4a1C zM66?)(IU^M+W+)6R6Y1Y`(J%o+o*qwdioJ<&zeuO!v*nN!WCRaeTOyj*6v@U$A9uR zjZcId^qXi~%l3!X)!L7}?JL1A|6q2wOWsFefdcQUavBBY(vGmDQ|+nA-4hk!ivOS~ zn<*b>CyXh{yNKmu@@QI!bAEaWx0%g&Q&zfqnaz{CeFpR(`jKMg>{eQB^ zNBjRwpijgk{D0m5C+@RQ-!!UI=;f$T2UQ|h9J{awdvO4VP=qoxEI0ojssH<3)@MGU|IenL zB-P;;D^sp#}BB)xYYvqx9zG>ffc>AM}%G(-%*w zn_FkAf9Gp|@QeH3oFUJlP}mFPB@`@t*mfyZPhR1^ii*CT>#TOhP|rEgeCkZ^PW=(n z-!;c?;3je}u{V*Ma@g+%S>H{+kMOCqjqd1)Ug%tr9{P}RPrQEQ01Uzq48sVF!WfK0 zS8aO8A}63opa1dvi(=)6yjhM4dL`og{fWX&!W2xy49vnDG%S_>^%I)(A)C=cZ_QNp zkCy*Yby5DuVc}{P%Kz%BI_85#|jr4Z)bvg0pbygcKtF?Q)=x>z|JPqmA~pa2 zx_$``3sZATzvR4r$z}Z%^2lj^$L!%B+i!KsFH8_;TpLiQkCW6VYaS_Xo z&%d|v@1d?W+!_IX`L4BTi+pO2ia2+E3cqP6DA0Fb?71+5dlo8YdM?dSb zSqDVLWIz0Zr0=*m+h-)2;M?Fq|Wx`h>H%tDuuVVwfVW9W#{WJ~r z{;$jb_hP%=ae5o7CVT(Wy?@l8cC2R;Z#{X$dHGxQ1;w+;*m(T^$5DErf07jNKTa>* z&Hl699)a|BRNiC%u}-9XBl{1|@T=J;?@yAiCZvb@nQ7r1zY9q1AF^D(z&wCU+*gns ztzUqa>GB||cA6uUXKv6zWe{r5%DbpX{6|;A9sP@Y^6+W#oN(S{=aEV7Hu9=Cui*x6 zV&j_r;Wl{}3#8qm!Sc6!7x%z=Y`?sv|A0!|7cTszyoB!Pnr6Q#vKRWGGdF~OWUBo^ zKHhIrTYC_9zIz`=rsm&U-#3C@I9+*IXZ|C-RQso7CHo0|94f~uTM*|S#5PbCzX_O# zNtl9Zn1L>Bp5&7B(2-+LP43P<`iJB^EWjcx!7{8soZq>MtkdR;|94(P&p|HY9P;Lk z(#JmOW2|_yr4O<=!x*%Gqm-%_OVo$u(FTq9@1M9A8U$!s& zKDK|diM=zmzK-ql(fYdh@4xtu;P{W+!`;;D+8DLf#=gsqfs^?=wK29CpBN_pYX9vK zXQ40!j_swFo^oFg*!k&)kg`84cb}`>=UMm8uO?4@PF5kdD{9FGr2ZeIub52aN_(h!rFSn}4Ou(eH+aRn_kE9$MLXq> zJZbcx{DHEw(&lAp^!&r|sR6Dr2t&{|!g^S81V&*D8szuJx$>v}eDgy2Q=h&SNwgs~ zzP4Tcw^9Aa-dDR){$4A8lSjD6i6aX&^63P!PTtAgqO7Oa%OAD!XL8s8=}R5EQr%kZ z8tUdYGTCVU-{M4=BhqMbC8~zznpDOFMS>=l_fFnmy>b+a*;YP?m=GC z>iNmCa?dYcRLCEdF`tYW5EeLZ5td*XR$=oZ^8*G93=Q&B<96*YvRU40p+6qqUCsV9 zUHc1<_kX;l{e{&3*FQCf|AMxKdT)J!`VVWwor7F-9k;Io*}*ojjqDs}jez+9JEEVo zw2Rz>)OxgA!}vE&@@vN#oWmE#-!5=p!WGmhFPA^-zH7bX3)(>3E4hn| z{}=cD!u*2K<`)>-uP~>ea`5x)BmXn}>VI;L-o}Qt|uGP1lTp!K(=C<271b^Nf2OZhT;<|DEkj3kUlS3Q=*zZ7;Z9UgYz{d>K`d$)Rr-5W<(yEf8X058D8K1?U1|s+hJqJh_GqkFNV#le=(G~?r!(-#>`&|Zw~*m|DIV7*Z(Wwty_J< z+XKIvdagXj-}kgJlx0jOKzO;J81>NZN9HP)$OTJ zPQUIN)#SFFzZte4GVkW5_Pn94W7yh9xc*OtH+zQgtmfH&M5FL+;GMJcXt(ft^oFHR zh2JO7;ZJZGS76y;&M@)(1-+5pap<}59q!H<_GK9So1xtvY+a6-5}Wf=+<~rCPJ52= z1nUR>f&NY(Vdh!G7!u)g?x&pJaO_RMukvfR zj-%nkQ=w_1zGe^i&F^=g2swX_pJD*_-;>XiPdyoOMxr177QcjI{x6V~0Y{yW(`yHDcz?jh&3>+){l^~-}3xr?6+8`t|EAvW{dalWVE@7{B^TgX%I zexUySQ2D!e@e}q>?UUFq%ttt|vQOe*;}hZI$=2d*9GN&YnB8$CJ0$;4>eSB%dnbyg z_esS66IBoP94%uLr?bW;+nj7^fV61`P+b4SdTZP<7<|zGIrE zIhQkhQ+`bu%Bb_w!*zSmJh1o7DRt|oUrV%Ps%yKcw>#`{g)05}YI8;Ft^YhCZ0#L; z(0pcHXtuWFG5^sci^tkG^0YZ7j+K$+Co;mv{PQz1!U5MfINrR1ff?Zw?n7ijU;Ev- zhYy|qA~_TLFK2|0MyDGmPlSEvGs181+loW(dDJ)JRB&}0{cYdwuklmt@XZsx{}Xsm z`rpM}DKGxIV{hVpe2kKaWB(wXefb&b8|~@eO`NV8m1rMCULkv75oV1_oZc4o(TUT2 zMkm_OMftr%`?5ia(`UYwXdgu%!oO!$;`E}I67Bt7N}L`OWp?89(O*uqPt2l^PqdHw z<;3YC?k(iq?kzE)O7EtG5Z^dKM;qZxSRt1ra|N! zCVySJ(H!x==a93KUfJq@c6q;;C@<(4ew(}W#5WRYjvsgY7sxXtvslh>`ds99>z6pd z?^*E<#7A>`hW*ofnjg|Le2e?T*@F|6WHoBhfN$gXaR%>>{AS{h$R)n@ujB2Ep5d+K zJ;U~eJ;Q45H(wc)nCJetj{T*?|4IHT*6?3O{wA2Za)S4=-gR^4Bgp^O5p4+RA)SA2 zFZsWZ{l>0#51X^>88;#$Y(10_-UzEgcgMF4(FP~`pcfjHB|DG*FuXJA2jSf*tHS_( z?dGt(ce6)mP>}67+9M3%_YQaaU1fMJe>TTqG28(8PP<@C)UVRDKgQ9s@Ne*a6m9Ds zim(1K%%_*0i}p_8od4gr+tu5@PFCmqFf?d`G_a$z-`8$&@4p$3Ym@vI8L!c_*FQ4n z*i$O339YwZ3(31{L)-n|2su4oNUYC%A+e#`3yIe=UP$Eje<87P;0uXOysj_0^c)QEPt3A8^_^F=jp=}r*FtJ3xxYr zxMS*s^Y&-J`_=Y_^n8`#tich)chF4!F+N)SD~UfP+oHb;tX-Qiri1^^{X5a`VhF!S z{(02T8V88-mT>LgP5fol73A&LzMJ@~sCx&+3q4Ep^KGNe{>ao!eYwm~$DR{yI}I~K z==%%OM1ON*hO@!lk-yE;X`Ri^Gq$I(;LOm3Ml>L&^{KF4TVO-q^zizrr?kH^!p6(y zMV)`j{3q@i(!aLF8}{3NGqZcx*1LPyZolrg>b{kD`_rewj>X-zExY@tZuju+QstHP z3%dt)5ASVsF1=mfIWN0=_&{6lL+9^x{yyioPjal6Ii?Gr3J0~7J|-J7o(zYo(><^8 zU$P<7{QULY(oIR8J;LW|FS?KNV#kJ>A27DN`Gws>^(|qJYKw309%_dVOVka1%KX-+ z!VxqK{!(Zh+C4O|Pc&;2wPX%Vv|g4jMt>=^A;+`!_rCTX^#5vQ1JiNN#{NCRro0~h z1KcBQ>F5!*4jr0ED6_tfU&PCpfnUXr@~6Vi$rM%Xhp zK7U5|fc!Ainu@;W!m0cAlOOd^FBbO*2Op>>@2DrcsTb9a1;Q1QpE$3GEaomDOZ%%= zxAyS-aerq20)EgVRCVhSs@K|Id8K;V`L&CCm^-PS7XFCy8_33vp@}Ab&1gA0G|`&f z!}^>a*0J^ouaC+I2Uq!za`n3K1J84aUNA`;hyFKsb4_}30%259EV;-$|styKkS{@sxg$ zbnkt?i7nWQw>I_&6P>%QLq6EvBfQi2oy5B@e=D&g?>mW|C%zQkv+u|IE5DW4bs{tD zK074w0r!U&zn$1a=B<7*{AcI>8umW;Qdmpwz~AD4XFG%BdcJFn7KysL2va2 zy#+^c94FC^GdPC}xP&XXifhp1&$)@)xQqMHZOGZMcU?#)58RoNICybJ;^XTx68U#l zhC`pNwD;|+p>X%B;gi-^%@2Mx6c1l(d}J;A>Do{>aIL=oTJt~GnxD4T`jWMwdh$B< zjdkqH>)3bJvG1%4^^4bqBWRetE;OQP_&WJ-U1%A+F0>-qe_d$9JL>ZB>g7Ksrz+zN zt0>FN&r()??7Nkr5^2AnszU3~RiUMBR^oq*mKR^J6zkjka0(aEfzNOU576T~+8tPl zjo6I>)ZsjGR=*b3uYWCU!0Y>73%T203mdCn3!AVRTRL6~Tk!_oeDGS>_UUV3`)99( zx5&3IzZP~BycTwrzZTvh-)(&@>>_t_zqfQ6@<$ z`{^Hjx-J|TxGo$Vx-NW-{Oon%5Lw{7Lh_Tj>q61Ob)i^XC1k1hT1J+0SCEyCR}Ecf z{e|~W);PbGtn;4hXa80>a(-oKc(6J&USAoS?yU^X7gvWCdMp3r*_EN~;z}9e)v!MM z)v#gmtKs!mUJbb~zZy19eKl;lw<2s_`>OsAy9W0gvtRWe^dE$6!foI8gYZ_heR~G} zAne%rpAtKVzZ%|I|7v*m^sCm3zZ!O5e^owsHN1azc4E)LS3};y?h}|S z&%PS=&Hq8zzy3cZK05zuIH3P=aQpWYACLXdiToa~hC_p2_59M`*6$}i$@_kysPX%W z;+3z4lDWSXF8W@V(R+k6gUOhVnV65YSdTpHLjmeAePqa4hypa?7*60k@>V_-?vg`( zQ8|Wbn1gv(fDJc(5ME#K!?1>)gH0=b7&f>3FqF|7a1PKvN7saR$q$i__6d^{ zr^Dn#`~82X-|_DgrTmWIGdw_V`Qj~nfD#-oTN7#~tTC3$o`T=S@1u6fnoyU%MuvDY ztl#%!*g(EsY%YAG_AfSR_;1FR>raNQc;lY_FSgOQ<1J(8Z)1ms`cAx~|N1U=&F<#E z7yg4me;<2R>;K{d`iI!-*gov1e}n^#=6d7fQ~JL+bn)TZr9%2AD7t5i0wwfP#5GIh zsGwJ(s*!yahbJ4~M(u3&Rn*ht`pU=aGwID}ajX?ddK>=GyZ90Q1n>MZiNC;4@Ynb$ z{vMy>%imSz;Cpx#KfnfT!drL;^=QJ6@F(~){1|_WXUC{}@D2Q1Y{DBTK@)x(KgJVd z*;Vi#@PFYq@B`QbCTA1gz{e=WZ{v6IhxieGjGy4I@l*UgKF9v=NekG&v1j-T@+bHy za@f$cBi{@UVkp2mZ?in9N z3B43$j+LW=UWuwk7f7f`UD8-?D+H>>MLm@uF z0q%nvwfDAb?~!}wY7Z{d9$czDNbcdzdqsP3s`eteYoPWd|Mwh!pWMN{GedhbQ+t!# zHd=nfTX-8=uoZ9MP2^%DHeoYzupS%mI==a@q-lH)zkye=4jZruhf$9t{t!RHPx1Ho z9N!oha{e{`1O5~K3*Nw6*pJ`F@8i$#xA+{1@zOY+!dLMuzJV9>3@yZagRMZ{XYbWn^o+EL0aQ#?T?2 zXI>)Y#fGx^MeFUQu;J8M8W`7XB!@&soc& zQT?2B@W(McJ;l|u@Tbw;Ap9ImNeh1-!%w2$7KZ+p(LILF-XGU?|5bG3Z_rtr9{!H} z2mB0OOVUG{clIQ@W~GO&Y3boh^snMsw0e!5chkdflcTwxgQ1R`&b{fO`_Pc{-%x>g z-k*@^-=t@yhv8&SkAI)oG~l;V?`nhhn4FdfFY?dpt=`J?jmx}K4dv~N)LF+8;mtLq zvBCYr)gj&4FxW5h=Bo3^{WH~p(aoN`lT1!ZgpV2$;opjHU#;U))qzzH!{>1)bNLms z^=?aq|0>L(9LKnSjegL%8(h2cDkiyvt8|PUi&IUgW%TaWp!Q9>c_EkKaD8*g3PDe_WV6;bUCV!-iel z+s)&oH;3T8n?q?lY5#KbCb&1uV0%xOf5lfLkL6Xf{V!8Td)_1CggazOmqOE~vl-p6?x-PdC8z&*ro8lN$i1%(IP;|}2+>y)139{8O)oSiz7ojQ%F z@?^U`DMz{A_J`->+3T^(eRs`}|CeD1h6yXq{jI`!=G6Byex5qNz%|D3^ZxVL!{T@7 zx-YD9DL#LEr#rm=xV~3+J9T^y_x^!#I=p#vL%*GRcd;CcX(y(k3dg&SibH%G`iXm- zYj_8F%H0o?b9pgOt}s_wxQ~WZ0E zzZLIAIQNswZ^JNiQhFuAxA||$a?ao&eTDD??yh0lKCaU>!n@w$`YT<(ZwT)w7k8iW z-bZqu4dE?elPAqzTx73L<=h6x_rENj;pPQ6wjuZ7cSv@3UG67_xo^+5|1MVwd`^;gHyhhrp0V=Ts_>rf(OM>%9*c$us|5kiLWHRR#T`u?Y^ z(K=>rmTT0Lac_qPG@oX}MyoI_XcImuY!ByVVk+jNH~ONh&#JVpFe$C$!ql{maYc#F zIjhq;Yp11^J64T4G$QqXPvdeMF022S`u60;<-UD4^Wn6$whU$e9LxTnoe|#D2HVEY zwp}~yE&AKi(~df2eXIJPe0QpSre{h6=EJ;4zR$g9rSxFF%Ln9#+9a8x3$YkW zu^cP08f&p08?hDJu@k!y%iKJ2vAAy1my-Lq5266wxHGVGMf|tcuOxcVJ1=}Ak@Rp-M0n;oKJg9ekR*kT9~W2_{BgcwpWVnt09o2ZFmi>Ij3q7sdY6;El0%K!rm z_d5*CFgFGm?zfp?pb3mMHrT|96Tk+i-CN4Zs{d9_u6RwE zs~S?~$_&cM+*ak}CHYDS=L>~1BmK&m?L~@#V?H3hGhwyLnGZftvOiKJ75gQO#kqYm zav74>U2_ePS-lgbBW9uFIVO+S;_e?Q%+ue zt+JmmTbFXatv^!EY+j@6zCbpCmSUuy`nCl5SC0PzK7fkARZ_{i?W%Lkze5ewLfr}K zKxl|vC5^}?=JgHGoJn0Dx{9)zF$!pmV~hd}n44fN_n@}t%4yG(ow!IUBY4N58ow&2 z4P}f2bmzFn(-%n*vT*St(f6XOg}u~qk(9W&H{3Pz+#50*f9(nG5&wK--i*3;g1Uzq z@1~b`6ug*6VVtsH40OK=&+V1gK*x zM%t8S9b+u`tKWySlry%1UHv}XM)K_l>Gvjl4DUezZav7qLpbJ3p&7FVhJY$fa{=;z zs!-F8q}ib0H1_cHhP z&ZTn?U$gY&Up6h}`(I1vR{eG9+--kcdh)i6yBSrE>#COXqH>U3&6YtxHeVVcr}sb9*|Mp8UaY!hDvd4 zsOT}5q^^+C>1(7+`+X@-WWV!-%cYVz+bU!=_8MeuCg0#g)?;r#Hl}``@7`W6255#B zFdn{KOwbBuu(-Y_R!f9P?s{vY~MS^p0M zDXjm8p;+dBVI-LK|KO$__`m7Le??mSH+%!Kv@Vi&;aA`J2(joun6bD zefp2Gn)cUbxCi2hZ#O&yPr%EN1Jz)MVK@fwz(?>Ud<)AXct7Decp0t(O$g(EkiCKN zKgf+`{15Ul=R*O0Iw(wK{11vV82^J3%%xCw*H`O)h9ncwz?lkzn z;}J*Mm_Qks_^-W1`;#dfvH$z~BAJw}yJ-J_fBj!3`q2l_7vvh$JybQ^zhgDe*b<&C zp3!!2@|<=;w~2KQD7u-_zbD3&?}>T-d(z4?X+8Blv4EXt%a(MBxCl2wC&hzw6W)s) zbp=TjagSl|#ojX$#M*`+>BByV9Ke4VIfQ*X;qHc?LLZDmIhf!$WX%Urm<37BdG;s) zE#_RvC(gXJSMzQKNgXsY$KSwwz9AzB9m^nTNe!Yu86@R8_U_0ElFIrZ=4hCc#9oE0 z%?y$nzCwho3+Ne4?6pzLOyZ4!~K$Zp8goxEt2N1MoQf8lHz5I08q(27iQE z7=WvJe=dd3F-IW(iu?y`!F(I+h6mwMcmaL`dC&}2=!4(GG<*U}xTeb>1lGY;_z^q+ z6O;uX2T(UGV?GF0UPL(ndw@qylM9V-l5li2HLD4G1N;c?gA{lP@}U8QI4*~4ArfwZ z$1mZ%g~Pi8V0|=Q1`sCnRYyNjR%=P{j{ENXTDe|7a{+` zqxA>E=BURI{$BV8@m`9(3T`A!Abdc$FW}F(p@yMZO}ISdKIA_MZ$p+}M_v);L)_Ox z6Fdy7IsPl$PhnmKE$}H^1xKM3zJTw;F|fgZ!+AheKvRfJLM{c=7Bum&3%0@4@OR?= ziZGFwm%~Tc-@;x3$sDs8x69#E+^2CXho9r_L_Ur@dG{5xo#F2*<>Ux_f!PSp!YfzE zncoI6W&kf?-UDs$7a+~ggsl{&!ZduIM{^nRq1vK(7c**FnzfiO#_uWQa^%a%t;n09 z9rF;N%0jg6Eo6+-HN*ne><{`@QxVPk#l?pvJ<-#_b&Xqkv(aA-#wl0yJzx!cjN%} zLEMLe<75~{qT<8_Zt%cpFyEbrF&GC8-*?aA-m|&)oS>bOyLzW+Lw8CZG9P!Z(8|06rFcjE5EzYE!oy=RcIQI7YoFHdBB(>}&VlhOA_4r3olr;nA14m8q( zeH3>u{$t2-?49JD(@J_eNl#=OcE4TF7f_0{M|hejtt5P0PqH^4g3XHISZG z(v#!Ikt5h$TGCrjdLsvB$q(Fz@E=C@VDI&jUsL24viQIb(WmW@5@c!S4k<|3fxg8K zDMS`w&&6Gfe;zU)yJp!AL0?m{kvZ6#lDCWD6wlK<&l9o^dws%o<`1_^BeKf1o%zG< z%pY!N{&2gLaa=jaS0F2w#L90egWiSr;VisN`4moh$&j>$ZnEY<%m(D|!3&d=ZGWL| z`cLYjHz5%FCb$zGhgYE-{zUzD76J(y4l!^y{1gsC7F5AEfEt|UYFH23AOY@!hv4Us z3f0gI$z0?9%cv7z2g%WSDRs>EI1clz(8TyDV?DyWptQir<+NjPf0cgMIqJYxpWcPH*I&=)*Syn5m-B+Og**Wh;q_bTe*SFz_|zlM0e$F+-wTzirN8O>=<9wVUdtKgb-t8w%e;)p2jXHg5jPv-c-XAT@>>zBEEn7B z%$KtW;zQ!Un0r{p`JD{XbTW77{Ai_geR_p-|2$aQ&R!~x9|TJK->;O8f2@*gxTb5l zha0*64fIbBLnPP#BG;Eq9B<I-|L4_o;XE5T&tq^U;cf++dOci66XVzI*r&Lz zqtL+dH)4O3^K=pBY2vz^YkdTHg0#Pb`2Le~Msuz{NFT?E(_9aYgo;>&O{6VGbGCUK8#oa;CE z-;Ud(a8S+vz0ChFVXi5e@&A3y|0gp3pU(V$CiDNu0_-~63k^S%q9o@3cQOADC8S{~ zlpSS^5GrOE|A#8f)ljp@7$MYQu7?Kv8lefZ0h(RtKY?)7Pf^KSx%_(LaY;>@`pyO8*`j)9BxW zzki>F?o{f(`uADHuPVmoaK3Ef*K+>cUd9Ko=OK0Y7vNq*_(G&UgMBFu1u=FL#JZUv zxskMajkJ4>eEAHnA)Q)rdy;evBHcHVW(&wu@Hf(bCF%b);kIJFh;XN{e@57L()Duk zEQN3}ylXm`#_t67aN_(Jc5~d5@HG4yzTucJkrkX{KXF`&eH+|I-rNuS!3`U6dkvz9 zYagUS1o;{ThaiG;#ltSpLOHy|^%ZeF{osM}zps!#B7eg*b;B$?LjG$u@cu(~Ebl+$ zV%9=l67N40%-kTlG~WLW5mJ=G`wx1!mV4g;Wg+NmAS+f=rXt^m>KNYtINpC`T@dd- zehs0#|Hvlnh6UdL*&F13(!{uIiA0kItxJ~3e&qdNB~Dx55_tskqwq4c6JH1Mbw>Vc zjK7Qcx{0rcYwJx%|1p#DA31=15ceVchmj+Mcg0ZtbG!#RI%1 zDfY2Jj#Xca9vo|vyA*cYkikXC45Ep#L2tV7moua#!qwbBAj*bUe#ma%?#fpt`Y zyes-xsp-9zekJ`3=E$p;{6Gr5j7!EdPN`*_(!jVR=F(k^Qyz^KJ@cK}t66VMJ*!!! zNER}0ih8}5dLFyh$9unsz6=B6y~v(9)=Z#KJ`_wj5JEYC-4nxm53UHx0Z_j=J_hPH z$Vb7zU_YP1 zr##ToV9w3DUWzamf^LwxY$(NC0($(5;Z5>t2k-ywupfR6Z-T-*`7~U?dwC-~3Xg#e z+Ckxcd=IYW{oDpQyjx|T@t$+6i@L(gJDRD!r*HtuVJ+`!90c=zu7xF_raj1k2XoPM!19$QMJ^(+390=rH4uwZ~|Mo*3T*~7r&<>+HHjfo8~1&wW6rG}8X|unjl|a!_X9CFc>js71&pUcS^q@+e}MWwllmWN z#cn$nDt5}5HWTCD@c;S#9r1KIsY9%cf2V{Jfvd))hmT=yifluOS`nb+}E+;P`3pT+c@Dw};1~>vE;DvVRg$o?J6nP0~ z;>rIg%85ksKY{#DCjXIn*z?y?t|YG!9r@y)|2RbXpU(WpKJq__{(2JmPgz?0zoi)CHVoT;S8LFFW>`Mc@^ml3z+{3bOALQ^wP56Tg*AktLF}~KPqAJdeIk# zf^zhUp%99o_#E@^Py(e;R*wFEHTwSy5>ygTm6rK;1`cYV7IU48`F92m8i=>Cm-%<^sC9{dUH4;;^$uVmJK zC9yv+?%lZeM6thb4Ey`WvHmQUy?-Lu-*+wh3&Jqwkq~t2LfM;WHS5oUS$`J9`m;cE z@y?+`M7^R3B>#hW|3k_D5b}RUM2mh=-Zc9Wp~F=`I33{%&kO4x*kAE9`*=|Il$>Hs z>m2oxf%U)j=zclT|8=1MJ4n6ME7VgWwV3O4=%DG*S>Ku=VoBYjSkv|>wiMR6qW9F6#G21!`oQ}{I`*>9NFv|I*~Ry9;&}gKdH*-; zQFO zi*t;Ce?J<^HTB!$qp9ra$u&F#w*keH=5DwTcEHR{dzE{T4?+g_HOKQEwSuw*!XXqY z<3-jZZ-SLjn?%}^R@KO=WYYW)X`f0S93U^C1)5XHgQy3TrZldJfhZTa!2_ePm-CJ3 z1LR}Q*E37oATdDt643WgW<3CM5c?4FbNt&()a~=}((a_Lw^FBjsoRlV*t?NN2W^A$ zc;*go6ZQKF7Wkj>{-tIBehWUBf-m6`o|~&+E5t%P+zEUhUh^nuoV@?k zr`cZKe`GFp?KI{8EagA4V4nJaf%5-6^*^#0yB_xv{7aE#gfAbY{O9;eWEJ-6edzxs zqyLAjOGE#UI-?;I{Xb+Ab_4Fs__rX9PWq68=>K_XOCl}Ut<%hZhcKQ;z226F{=-_< zzs#Zk2b~*O|8k1HBmUjcQ%(QhK>fdj^)Kbr|DmjZS;qPo7xh1k#IpWHueM=X{}Rl4 zmn_~r7|Wpj54og488kr$ybe?FCHenP_!VYecTEvcJZM^g*ICmKsAOoqfdHPpba)kV z;S_uZS8@Cn*bcYDUh*S|_^yB(!2~uKgR{gF$T2$j0b&2dv$m7)oA7%aCg3jIpTNBZ z`XHER0u?UJ2GH}YRlo^&2hM|ndn(}$!Z3`&5#snA)IcLx;z;iV(mN4-1EdkVDT4fn zBtMXyndn>M-i3cRvJJcA0PV3f@&h?Q_(31-ttrwQ*@L||g8bn4e&pzR`U<%J&whm? z*j=Z{k9o!vkh#m2i#BMv`|eI5-5d&4a-Hhdbt!5 zwg_1jw_K`;zlQVIBFmO6|JQmfWF<6lJ_GK}__rYIu-BjBdF1?!NKI;hWI^_!0Lq5| z$u$IsRvRFBlz;ii0u<48mYo5J3AkKq{fCH-P#tfblQp zf1CkQR~;brOjb9fGXDcj8O;AcGx~2WtbsNLG5-^aZbb<5KMCkoEMxvB5&fSS=6?oR z;}FdJ541x^4C}2T1H`!`fV~L=q&t-PpDE^l(wP5AXMa258lZ1Di2D$782bqExZ;@q zL3*%{;_k(N3^|Tnvq1ajJbL=b9O|#!dFp@qe|eA(1yS@Xp%99oSWEk7HT}#``kBbG zG__w!`v><*!*}f;>^0bHy|jOj_1GJbjrcbq4cMEJEn4c_OzPe&o;NUq1*~8LJG9Ym zacFto^gM6KP6N-QiRTfzp(ma85A@Mq>esUVe`=WwF0lUpJbl@ty#I?lZ{Vh`@qmB) z&$m?G52k#E+@^2kdft^ELJB+yFY}&cLp3x&0x;#Rc?iFIct6eHg-Iw&r~Sh{R8Y@W zLe-LgQ2t$@{JTK;cY*Tng4Cy8kcNy4^kpwd6J`T6CtZ*hFj_B&$$5b`$OY=h3)GJn z#ERL5n;qF!e}OTy3(|i2f^^JZkj{k*;yibOJ^U`Pf9D11(O;0>>I>3Gn0^@0T@b!6 zPx*I2hP@YLM0?mT=6x{>td*-yd@M*7pH z!AqoT3GGbiq@C%6_Bh(15wuH@ju_gRnY1%G-v3=|H6M<$55oc4rQ}C1_`ieA@uM8; z1vl+d4~%fU3ws`I)_ll~WSt%|JBa6ycs0R1m&98_JGT^y6M62Y>B}ER4;>0H>kjdp zA#1sZs>EedOLtFtO&)#= z3eQ3ocwhj2NPb@pYhXY81d`z>tl>GxNB$>RpabG?+XJt_Zy*osFbWfZih|~AI0N@m zE@wh6e2+3d1g-$Kl+#2Yqu^JN1C{VoNQD-#!DDa_^}+q{H_YF_4156VaQgxDz(MY- z7uw-{_!F$)-U6W%9t90`VHRXV4&)lB|FzWrI_iHYpib9Ow-+Ld^wj^=)c=GjL6%}K zAdZ{4|BCa}{|nUr^VI+N)exo@S?8et2MsR9|DXx8fi^<3mHt0rj0YJ1Py1i`|G3*{ zX>*^V?QP}#ck=!tJH5RB$ba|$XQ^*aQQuIW_ag_0e{i3=zvOrQ|7qTT!nlzh;uxL! zZv3Bk#s`PZbrJ0F~~pA*4alo)IlTskbB+_MmPqa!sqZe2qG<3z>9DgHmm6cwa@}RJV(7e zPkl)BJ1_&tLF_}w;bg{t4>10d#yBrh{TBEr^3NQ%gX>5I%`%r{&7qqPIgkr&+*?27 zpGLQR2L1cRUMXDYMaR9D_HVD~7u5P69ryF-#^YZO75Fz1UzM&xsXmAP2VrWV4!`>8 zK53ZklSbMNO)lzxXvW-Cr<~PN?h@`7*azw((W-YLH{L`_77$o zZg$!yZN%w-cFv)G$Dnfq`Y+HGf&L3&ds1lsKwlc|pH$jE>9l_`Y5yFe{eyXA7wsQ# zbFH3u+CRy(e=v{Xrs}_Fdg&iRHvQHd7yUyk{X_b$dDI>Gm<#HuXP~f}dZ(N*V?ATX z)ITM(GfJuJ$_A-tIJSZ~Dlu2l->rrkj;$q*Is;?Z8@`+WX3o4Rj`{D?%zrb#-7?4g zH)Haq80NoYng7PmLKrL9BANf@xHiV`9U;trBRhiG7bl3djTx*3OlNKE>a)_5dRBUI z?<1am!VNI4Pq`pNk&Hz|(7z91>|u`Qf1c<66wm+S5_y;BY8J*9@)SGsU>{=s1pdJL zIRpK?>vKFBnUpEthVxFotyPYFpjG~fIco#&@O9KJpodY?#tUQI^LYIzy1>iC1#TVf zEf^pT2B&*vXtq~|3DeWtC%vRypO1C|>DEFVlQ3}uWfGi&g6l=2(oEa|WeqdQ_?jUo$r&anT*4C^0eSpP7?KASVrP57R`8R=a;!}C8Q{m%F3|Icv$?=k=X z7a5+Nm621k;#!y$_xV}s|5+L3<9}Z3Ec$A*a+YT#``mky1OH8aeDo>pw%~W^_q|y{RL(xgzIuDyQvPm$G=BAGDZd)Mo2`Eq!%G1& z7RtEa&uI&@{=Jw5WBSejInQz4g+irm?YKB1$E6)Q;>Iao$HlpCT)Os-Grl}7#*j^Sw4 z7yZm}=4XD7e&#siGvhKe-%r0`oc^Vk{-sw6QAjR|@JcZX*LpAe>qU%7scyL_rzvxS z0wvq3jt8(WS=J?@g}i$9r*tltx)iU}Lql31^UDUMiHQM2&?tKqu)gL{pp>m1lk(se zr6Q(9sSIjSsshKP8fp%Cr53VB0;HCD*ELBRK8!BYz34Krz$=$D>WO2oOU{2WP*h1U z@+|)DVETO5k&YkH@9#v{2}VX}XOf>aEE;29V7XEa`wH=q=QS)Q>*o0H;}!-RAR2ap z_V0Wf5}B`|&GlPFk}y9BFG3A?V%ZZN*#bHkCok3f^uj3ReD!ZbrD}4G)V|K1)9B2V zhcPDqHf`~DnL8Hd+CI2Kn$Ur3p;6V0-h2Jo5UKm>`_lOLtE53z(LbUc4jQhnllZ4D zrA@J%zCjRUP4G0l45&?Leg(gQY$%6XXaEC*@&5b{`40R8{t1^c&%}_Mh9No4)%Zsv z8HUsRiT23~#+ctjj>t{oD&8dSf{o&)-pOmaN{+okIb0Vh+Mit|!??Wx1%e*JO{-)+ z09_f1?f-&nsJ?H5hu|eBg6-VOEpQv$0e8c_V4>}|2icm=JR@yDBkdOxj2?1}w|q#( zkmJ@N896s1u5`D!>)#X)av;SmgK2IV%5c-3a7#~!oBGzx^X!)XWp2I!>lWvsA>J3a zbQ7-a#0c{hZsu!-n6Ge)0X8*iXeRe_zY9km_bUsYWT|I?7|*w71-hZ@JOOdQ)32CgWLd+BXCuH zE*YHl$dGSXhEI6NPY?6I!{SDIraa95dc=$S*ijE_=vfe1Z;`Bs0wvpFVg9R4e#&#J z^;#s)K>fG6Kq)Y26`je#{I^Alsx4Bywm{KG6(}Xh(s>K@pH0dWEmEUeomO0nG@*$ zSf!4z_2(SYfNbkj`CN#mV(`#hsAuL7mc*nXB~1Tct036l$m#|5%zFm5d<7*3*u~RnM z3_68vKQtb_O%o=D-wDBHMWLK^%mN2 zHffKr(WkMIc9fy9HmQlRNp+BocRgRJtS?k5g5Q#I)&D@A(&DtCE7{JtB6AdxJVQJ$ z4bDQP-dm{DA@ehBlE?Y9yEx}Eo8)+HlD#ZX$y#iull_*IF0@NYf=%>C3zcH7v*=u* zQpovq={lt#%|>Npm#npR$&R%%?q-+Vy>_YNTzR|fd_RG<|J^!esz9d{I{qj{33e%t zw~OA?fu50FN~=52|NNsQtYiGAo@*zbs(L&9KfBbNuuJV6ah|6iaqIPB7xR-`nY zwln`zq%_ai(c$Y5V@AGWinOzy;E!U~7AcmpJ%aCMguj9m+KqL zr2W58tZ2L%B`eD$*)9|Fe7S%Jy<2K@IZ7>Vbq@0Xlvx_)%+h$mO#ja;hQ(r~*=1&2zEm-Wn92WArIquU z>$}na$Wg3uX4b#`N$l07N?T~Q;#g~zcEWY6Hls6GsyKT~l`dpA*VPkmVm!etebe3O zf14@)%`!OGP5;j6SN!YW@%n$ z%;QDm`C1VJr2`+Ahhmc1rfHcWKXcO0KC>w3GpP$ozDp6r{c@Iv?fNdGvRpUSs?c z{T&8=OBlp0Jt~y{Cdo=%CE05k|D79Aa%0;?3wgm_C4cWK#`hWjpB_;Pr+EJ(JxX!P zD$&pAm69~Re{*1!lw~mg?;24mc6pV`1dmb`v`VTOL;Q`) zR?*(|DhBO{(wyN@T680dG0Ur%GDehEHo-Diuj2jh602^N*yBqYO-Wl)?E`GE_aP49~5i{9h$5a33~_$GYlY z`hOuK%Gil^^#33MT|n^0TqUFNOq#ydA z7kU_@>rNU`x)K?aOT7x+B*y3xuVSp$DUSH7q%HAM=^~F#kcZaHtHc6kQx{`B*@`JM zTQMSAV!Na{ri(sn7j45*rLjI+X$XE#>Z7`(?m&rBJJ>2U%ev61%vP!hSJ|7bRHS#& z2gp*&BD+{K*Ci#1U98FIl48OYdAp==S&5=sukTcMQmrciT$e^#3Eb8`VU?-lz_5mpJXQoOU}Mv$=whj+S6Vok7p!* z^_WtS87#VlVDujYq-e&g6bE&So?}W5G5=5avRxxe`Jk8Qe@v-#d6lZYBTBWytJLgT zF0~w2=jf37j9}*f8J9^6=J^j0L)w_qjJ*ZrBcmf&Owj5I7Bg6M0b)(+5SuGN?AkG< z%{!(z48hWV%B6Ia2TSL>GOr&7W=533`e52W0Wuuv zRYsPzp>Hv!xQW}dg!aERnDLol*1rVE_!6(8S>odR7voAc6EZnzD`@{MW87<0$&2Wc z{EQWpAB=w|vj655XEyX}LRYU#={~nYdd{Jr7Bi~!MLOvJj4K0KD`YU1_P>6G3~N`& z$PDArA)|`>v`g`v8dpZ0D`@|%VEmKu>cCOO73pM+fKxmPE5*Ict$61M!`#9+$Bfa2 z?hReZ7`sR5Pg_a9V5JOZtfU;U$?){JqUu;U(pR!ZZKZT*9n|y8P3+O6BRyT+B8LARo-Ux|(?d!xpTD}@W}zcfQ0O}mv+7vCt$S}9ph zCcEa{N=^uQJj?z|=SG#>;CA}9g-V|47uP9SDOX5#JoD}cu8`aikD}#y$qS+UKh5*M z)}!cV#*{*oAc~j(*Kgn*JWc&H)lU6Uq?8$olyYm4QbAr;Le-3eYjjA>qC;w-uA1_{ zyhv#nEK(ZvwEuPOVt{7)ge_oP>kt#PX10qtgZ6)7kz!3RLjSW}?9jHsLHm!i4Ro;n zoBm%yyEx;El&%9sN_TRR(nDR<3w`nAKMWjl$RG?ww#)E_BFgY0#kH(RadXce7|n2q z7siqtG7i}kT4|IMhd>Ww+*^MAI&_*D^TPf*_9EAH;=rt1yiQ80*D)@+PI}ewy6ez; zTPMMoT~I!^PRyUvr}M3os&ngP4!3&ZX)IqSi>i5cot#Iu1V&0A<(WZ0O}~Gg7zh)O z-L$Yy5|H+gb&UUBCux}5=dY6tWLxMuab#R4U8~ngH^+1aUMEiIi(ALI7-M!(ToZ-t z(7tstoUo2`SjRf>b>dE1$9;1@gmGaX(?v=(=1~?s{)KzfxFRGgGhDK>!l@g=CD$1t z+L;K+bA?NOdAJl*hqK35gw#+c6!k_(aaM%rp~Mg_r5sa6xboBCQZX1Vl_$cbYB8Mt zU%1rFhD+^KxYS{<$KHTnV|}`4_1rKI=Rl+1MEhxzWII{^>fOZr-zMh&HZlLRN%ChmNx`X2qMP5u z{!g1^k?>2X=axYbtcJ33zAXq9PzhDNo1_|Q1~*auZDDVkEmFU43;RE8k;afM(p0~N z=YI?D_GW3>wVD3kW-%SzO#hF1iUnhqsm)?#BYWGa&5ZwSmbSqtak!$SeLhM$dK;wk zM2$FmcSzUZ4hiA>J@Hkve`{#}Hqid9q5WG!`?rDiZw>9=8rr`#w0~=8|JKm{t)cx} zL;JUe_HPaC-x~3$>7N-Utyy8Rf%q)W^%9AU1$%jzBqJT?*7N;^^_+jbbY`s=Cv+LY zq?==UTw&6CI!yWo!z6{U1B+qI_k}Uv7be3~VUmEG3wt8cQy<1$1MeE7Lk4JK*Gty< zFv(sLCOIKtl7^c$X}#pdhA|HlCIzd*(DPYO`md+`x}I^A_4Gg2qkk4Ar5sa6xboC6 zsfY=q{t1(+j4-L*7l!^-81rvoQir`Bdjo!rAz_rQ8^y5f9lpP?QCdLb+$mXuJLyC1 zr2otQ7udC~*U^7x|BEU1zgS@Zi}UP%QU5w)C+t_SL^aKjv*|LKxx@4JnVGPDW{kUOP40=~_|I)ki>tQ+2%VJI>|G+f+ zpP>K1{Cgo3F&|&NcvSQm=szT(|3H0LmU@)7DD(eu)KN(P{J)R+|K$IN{xAEEG$)~t z2}TF{rqJp_{|_vftzer${}0+uqi+iBi|GHHNB;*MA1As#UC8dV3F$!xuowRQ{cOyG zQydz-R{gKQM-Q95h1vlV>gys^R0)luyV(@WKiB3O>jE zEC`QG09*q9z9?XE8J7p~;1S&nQQb zfygFg9T=ep2Ea-9N#rp&4sSy*ZkG{$5c%KOzlDFoH1*HbxW5-ef01*!@c$9^Al!Fi zzYDy${S^5Ow6Fb&@BXsyhn0OlraqCbgtO>Xvd_w{vy4%EC9MZO6f^IJ(Z#+WV5?>y z5Ci*y5Z;P(2i_vj5zh$H1;50c4?fOu82V<}=VKZB)FhsfUf!vp%(INKd_)_HeQHu~ zkx;I!l<;P-!!Z0FCg69FJ@~Es0jW99d&;|b67whUPq>(VaD$h9WzJ7Y9kQPLXij1u z5NHAeRAjL)P3md-sUJysFZk`-!QbDW`6!C z`uQ__pTgmjP9OUDQ+%HS*}cg8J`;YXIKFRzH23<%a_%y*f(`6Ym&*Q#(2&9Yh|q-D z0L`L&4%}%$u>V#Ub{!m}i^>vmXAh^|R>Rb7S-ZeK8OUnv?TjsS zB%tSmbi|<7lgyX`_D-a8A9_K_uZejHdP4Y_IL3zA?t51(==@m09m^i-Faj0$fHr?@9=V>*1F?BbOp?h1+4C zXX<|BuW;Xnd;zjB=fTUE3n3fM5v~N;3=Lp|37)+nWD#MHAght@AistN%$G8ja2d2< zw!n>;cfbO1-HKd9?ngcXPr?gu9=AfIgYY(F7jh*W!Q77=;k_L}dVy^&G|S+3n2*3Q z&@6dhvd&TeK@Q}uCSOULJY+uUTM+R+`Y-QGA!%C##pq<|p@cLn&3r@365nV1n|y}K z`NL9`NxmK+pV$6YYEyYGlF-A5IwB4GcupdDF31A|dC?rja{|T_SFnK{ z+D`CXKs$6mXB5u`bU`=t>?L2J5Bg!?5cvv2FbpGmc`m@c_!seT4Wo&yYv&rqxQ206 z&ePYW-#Ci&($91sMNbY!e7_eLatQZf{0ESO`1k42v%}tVVivs)^zhEn*M&~}oD6>Y z_o@^Izn6;2y{yWAw8Kgap-lOlg z{`&s}&w;`?-9>N(1m8&89{!B`_1Hr&e}MTEdGRI+&3b(!JQC={R;f9f*8y{Kt785TIAEnm*INM zKZFX*n~@F3+diY+PWV2|LlBSIja)^$AcinIAb~Ku;eFh{W{g=gh5qj}`oFV`{~~j- zYZuV}K9Bw{vH-gd_d@)O76PUCDEi+}!rlX=e5bxFVGZ-&Yor2M$y(tmWHt60WNqde zsf+&|<3DQ{|5?NM&l<*m)-e9VH`<{k<-f$}T_Yyn8rr{W(1%#V_|F=#B5m01$hPxq z#DQ$b-hu4I--+zP-i_?x_}&D*Z=bk^eH!?_{rRIZn6!rPAFbj0M{DT+tq~WvF?)RI zf9+c%-sCkhh8*XZOSh5la2bTqzqTcxqy3fY*+ z`ah%rdo!{nc7pLg+W*iBX0U)2Y+#4BIokj5@AZF}ozMl{@V~bIO{_on<%`3SrTf5QGAQj1?9T+h5qH0*#Hi&0awJ*&c~jKxepnQeHghLnT6XU$S2_- zybN00M)9{{?gJfmJq%$!jC(oe*O0%5<4}$LYy7?eg?$Dth4*kTkylpm0XIUfq} z(?Mb2VJYIdC?4eb;W;TmmSQ&&hY4E23>L704eZbcthbbQ=zvaeLKk#H5A;GGuqIOm zU=W627}WY7l}T}f`mLE!@WL3V-EXXZ!cJz_tA7o$R@U;{DdDnCu9FD4Ue=>9cSF<$*sT8fP0^}pi~6@! z{fklmwyA$NtAE?ozgYg%VA1MPYEU(#8c+?V22(?+f#TtI_%X0g;0+0IC)@>h!!Fnj z_rSf72=~EHU=Qqt`+<#vZ+H;)!G3rM9)=`%1XAEJcpRR91Mnm~1;2n)cp9F8XCVy^ z!gKIE9D*0%MR*C);br(GyaE~UD$M0-{Queh{~Z6n*8iX9|IhdT>-_&k{{LeCe~JIU z)c;@R|1bCdSMdMQh#KcdBWi3Pji|?dVNsD`rp{7{x|VKof}R$ZTx>+YgPZ73jF`-89#1nQ~!VJVKnp_D?Kh>c3UJwok0;c|Xlp!+)CJum1O$)c<}p1y1LySNK_v zdS9Ps*Qx(S7WLntUeD)t^}L_=sMq;LcBlGZovZ%WS-voSXP9sF8=BR$`ogRp^+k_* zgcSR*Bk*(^kLJR=-n>M8)6~<9CYjJ8hGwZ8&Z7J8knj-7HZz<8-s%>1Myvc8S`K z({{hpcE3}sM8)D1>vxLvJ8_E=#h-d(2rkxt)9ODL!$0TepHuSBS@K*M*oaiWt_J@x4CrLCx{mBvSPl)I(675ffXn!I^Z)L|W|FQlA zh>nqH|IJ5llW6}fqvH_mzfmg8X#ed|n??I?Dw+!;HvT!vABTE*{baN*)CgRWed^Z=#NbO$5D-PiyEc>O1H|^n}qz@ zx>>eH$<}Dux<$6)7$aM^$<~`?>vq{1D_i{$`0r?|dZ_=dVkG7!AzvASkHLSN|9`vq zZ!|^?<E3QZ}Y-l$1@6!;6yg#+W=NkEOgIkIUnm zo`ZCGJmpz=9P1PEcpATG*dkBJ6SvC~De}a#@*6r{pQ3{Du62d;3L-q)Mv#gQq3+prpPmsjtY>@{BwaFVAp)&k*dH{qoF1 z@{D>ZWZpCUr}2x1E%MAu@(d~YtUUX)JewxZ9+YRFdrF>tMV=+`(}Yk2N{g4Y{gU>O zq&+5SPf1$pvy#RSZ`EH7@Eq0`C5@E(?mt=%%E5R!c&8lPF9#ozgHOxBG&%U9JSWfX zm*?eqHQ}C*m*?-4=dnK|&p$5DzbJ>~1$iOm2^D`_-^plsQC@sOUVKSjl5}}lUQXrb zCslqaugEK@{8A-DUKKtnEtztpRgReDh*gf*G?6-DcsY9MkcakB3mYMWFl84 zv@($=6ZtYxAQL*7D3pmJnJAVCy-bwIM5#=a%S44tRLVq^OjOH6jZDt(V*CL3k4NhS?4*({SSGHH}alT5bCq**2{ zGHI1bn@rkevP~u(GTAPZ9WvP|lTMlJlF4qF?2*Y{nH-SGL75zq$zhpv$)sB*Ju*2e zlU|t|6JL(_a>b_=U!M5##aAG{BJmZAPcOa_@s*0NOnl|ys}x_A_^QRn1YxcC>cm$s zz6S9%imyq02JtnEuSI-D@tMTeDn7ILEaJ0@uT6Xo@wJPuLwud$bBeD^eBI*f5nr$P z`oz~Sz5($Kif>5J(D#jq&m}&$po#Ar6-p>xtJg~8wK93lD6g61wT}No)tx0ra%Jmc z-b1-l(TgS^GieBVk@TVo5I~~u`@ZjcqVIvXn@O51S!HSG6v=uaD-%Fs{}_Zm2UzDy z#$3^QxbM#+0uk=M{$TmO@e zI8{DQmya{$<81jjS3WM3kBjBwQu(-CKCYCHtL5WH`M6m=Zk3PQ<>OBIxLZE%m5=-7 z<3ag&SUy_BY590oK3P(%_)tFmP(HPkPp#!sTlw^%d}=SB zI?AWc@~NwQ>Moyp%BSA)sjqzMFP{d=r@``RsC*hOpGL~3(ei1md>SvGCd#MD@@cAk znl7Ja%BR`#X`y^tET2}&r;YMyvwYerpSH`Vo$_h7eA+9Y_RFV(^69XA`muaEDxZ$a zr<3yOqWm;eei|-6jg+6p%1`6vr|I(3Z24)f{IpnpS}s4WmY>$kPa8$^|4-ZHr=9ZC zZux1y{B&4;`my|URDL=xKb@4HPRmbc<)_Q?(^V0L__?M0+**EaD?fLapL@#Bz2)b= z^7ByndAR&MQhpvUKhKw+7t7CA<>%Y~tJu7;ZDZ5O$!Pi9Q9gH-&)wy7Px;(eJ`a@7 zgXQy3`8-@ckCe|Q7%QL0%jb#md9r+-Dxasz=b7?(wtSu|pXbZxh4Oi^d|oP_m&@nX z@_DU%UN4_F%ID4Ud8_z~_j#v$-YuW^%IE#^`JjA0ET4ZYpO4Dt1Na`TIio`*{&Z z`O;Fpw3aVJ<;!sSGE%;bmM;_K%Vha7Q@+fWFLUL~eEG6azATn6E9J{-`Lb5Ntd}nv z<;!OIvQxh7mM?qd%YONCP`(_NFF%$qN9D_L`EpXeoR%+V<;!LHa#j9g{XbjFf1Liu zdULYdoa{Fz=jA`{o&00SzyG{wPA;31TPOdREB_ewuemb(pAY39BmOm4M*VBf$NX!~ z$N%%HIdN&CIhkrsrvLM_IdN&GIhpnE?Yx=KoPW)g`SOp2=6tC+S#C~N%0E_{^R@RA z({41EHk*?z|C;mdzfS(q@^<+jt9~!Ex)dnU)Rg88~**e z*_`Y(C;QFGL347{oE$s(t-U$vXihqtldk5ZyE*A`^3Udue>Qjgv$^A+L(R!}X=b_k zTidT&&3rb0``|y*Y}%xEa$0`f@~>IqR~UMo^0%7hZ#B!`YG$$3 zEPAV%WV`&j<6kouC(TsNnR}Zv_wO{bcJe;K{`&-$uww~t6Cb=!{IxlndeC}G$(J5en+|Xek1J3+w{$Vy+BTziJZJ$ zZRUO2Xx`77DVjx`HZwhayY>AUa{2w5xt%t1J8d3*=3g@hCx4y2-P+9IteL}^^?lVm z&V}YdZ)gAQ?J4l8{crC3@LP`yzjYbnhrcHL=SVZ*+k@VgdG`KDi#zwP8T@&(Fel9& z%~^9-vrH$=n}5G-)GGun=yI!HYaA-^EQWO?jC4OdgN94?YaoP z{&rt}dnmvEP=0SKzkevdca+~d%kN#~_rCIbfBAi&{61WMpDMpEm*3aQ?;GX!?ehD6 z`Tel`epG%xDZgKr-*3wA_vN2Ilz+CAf3}u?wilm{J{$kpX@Nc-|Jhys*<1eESA0r( zh5vK3{INFv$J(v4iTAU~_p`b8v*q`*mG`r4lkL7=JbXVpa?5G?V`KS`4fEJoc|SXR zJKO9ne{A-ZKei_R*mCFA5Fc^r(D zKMs48>xZr%x$oo$)v21x?mKbcne(&e+%y+6YEI3oh4RN${~wpf<&W##Kdz_W&z8#{ zH~oKHyYbdz?tb{={>)kHANLo|`rglm-_IuA&t~7xj{dmwpgU*q&h8E6e(3Gt_akqU z+|RwAEx(^_yr1p9pB?|#V~*b+dh%a4oHRH5`NKx}vu&=}->q6yt7=mpRJ-a>ovKT9 zs~**>`c%IfP=jhn4XY6~rpDEbYUVw!7SxhjRx4^%ZK`dxtM=5sI#7q|Se=(Y+l{N; z^4l%1eNs)SX=PsR=GAUq?dH|Krq&ftyZN?nDf4dMQRd%n{_W=9ZvO4&-~OXIQkK(x zqE6LW`Llz&gS&&bgQtV1gQvszJ1nom{5s65!@N4oqr*Hp%%j6RI?SWPJUWfH)8jjh zxzqaT9972NY22OGMdzaOxK5ATvwrtXyJx-bnSRgodzQI(qwdO|`~3T!mwnI4spsZ& zLYdd8^?GVvr_5)jJvZh{(_WhP(sY-mzht>=Znum}W4g4QOV8n@WnFp>FD>uVb9iZ) zm!88*&*3HWrR855^CkDC=kd}qFD>)Zy1!(&^t@iqDr2}bhD+<_lIhZOa%ml1dQL7q zCzqa+OXIuroLqWNF1M8DrS*B~ zb#Qs1F4dK~R?L^T$~wBdR}baSEB-6~EB-6~EB-6~EB-6~EB-6~EB-6~EB-6~EB-6~ zEB-6~EB-6~EB-6~EB-6~EB-6~EB-6~EB-6~EB-6~EB-6~EB-6~EB-5f;qX7N_^{8#+f{MY=~ z{MY=~{MY=~{MY=~{MY=~{MY=~{MY=~{MY=~{MY=~{MY=~{MY=~{MY=~{QjW*dChsv zdChsvdChsvdChsvdChsvdChsvdChsvdChsvdChsv>CfSx*PPd!*PPd!*PPd!*PPd! z*PPd!*PPd!*PJ(;H=H+|H=H+|H=H+|H=H+|H=H+|H=H+|H=H+|H=H+|H=H+|H=H+| zH=H+|H=H+|H=H+~=Nr%SjpzBs^L*oZzVSTY@Za#?@Za#?@Za#?@Za#?@Za#?@Za#? z@cZTOKX3SN_;2`c_;2`c_;2`c_;2`c_;2`c_-{FHId3^{Id3^{Id3^{Id3^{Id3_A zto(V)dCPgrdCPgrdCPgrdCPgrdCPgrcguIncguIncguInb<1_jb<1_jb<1_jb<1_j zb<1_jb<1_jb<1_jb<1_jb<1_jb<1_jb<1_jb<1_jb=$Z+ueYApThHsQ=k?a}dh2<; zn9p5>n9p5>n9 zp5@+icF%L~IlE`N_nh5x-Fr>mv)!}Zv)!}Zv)!}Zv)!}Zv)!}Zv)!}Zv)!}Zv)!}Z zv)!}Zd+zS}?mc(+jQ5Pbp!|8ydGC3VFV0_?v;CtYEV0++tV0mD9V0ke9 z2jhP*{s-fKF#ZQ)eK6JsV|_5r2jhG&&IjXsFwO_#d@#-jV|*~42jh7#o(JQ3FrEkF zc`%lT+w!j;hSaDUQ^)@Ob@uf*e)Vf{%)#2ej=Ktkm{$Cwt(=nrF z)j|1JXTKUygXLdct?Gm7RNbmid0dyrb*-rlwd>zsXaB!T?^h1~)n%q#&DqTX znrH8_GT+`+wWEx&*L1z68^f9~&17%+*M|9Qbd-PX&i_A_a<*K){?MXYRhQ~kJ*rpr zseUz}2Gx)nRwHVxssH7xpGSAD&EemduKWJF^w>N9?@I^&zW1qn^sn2V#wY)MX>RYY zOAc56`_k-R_cn8%P?P0ri_x?gNsCdm7(t5>w79QjO3kP_wV;;Nidt8jYFquNj?{@d zRp;tLU8!qz^R)%Ixz0DG(xnzk}Kx?zq-M3{lQ+R+Gx4jS8mHn@6 z&7|(xYwGQWX0a|AQbXNu>TBEje@*DhU$^ZW{@eVTah^F6_W_A6KW&U-E=6z*(Vq1@zX|+3i zZ9P_v#aezpdwZ6!EU)M7_GYg0E}g2grkaJm4dCs{+q3ev=mzMO((0Acdhxa<{(9;z z-Owz{OQ-dwskimkJeBX4x?F0M&1>miJrvfa{yoa(X>V&)ZR&$+R~@ReskbFI!~RRy z=BaP%ep|Z@p{?g@+kmrP)u*hXx9bC6|61j3gKimoTb0echnji=d46|Y@g9DE0soKT z{`G=xCimWI8*S=+;xQM;zIvH^B{t(}X5f1Be7#??mfO5f-`)cMew{S~_Lf+ETWSLt zf4irdbfT%RMrxI|ZobcD(rwY*L)cVX zYFjmnwp_#h>-lu0xxIO$^*v^W@0YAFPgQesGn~Em0Xl2ek9XSpoSW6rpskr^jxKq- zws}*wc`Lpx`_#GjP+RkiG|!K>P1}uXmg4O(V(pH&eZ*~}R@InS)2zQ5v+iH;Mp-iV zW#VPmS34E?+MF_AEwk*`PR}=zO_!2Z`SHJ`z5~lBJizivwZ7WD&P8A)M@$FKV7~Jjw|Ow zz2)2RZuvH*#^=?ta(Qx6ncvh|`8IQ3zRh-)ZyO%7xlz7tAE@*4ZRfar+dC=W4hPD& zlcx67W%+hy`L^x9*|yg`sJc~;vMk&0-)xhAvyJ`D_VqX0)8A}Af3wZ}&Gzy)+sof< zBY(4f{LQxUH`}ke4$C*&h~I1*ezR?;|4@x7Cfjn~Y^Qy*?e)!e);HTu-)!ILNL0h+ zoA1!yeAoTvd+j&hW54{fkhPA#Y<<-RWS>S|M+%HzA{mF0CAZ`VzE?CwyO(|xSW$2Y3So>{f3PSk06 z?Dg1QxAz)LuX*=cey@4=UMchGW9u`YKI81Oj6VLpOJ)9j{QWKJgEC#e`ScGf>!IIt z{l?pGJpDf^%j-Aa{yXJy1J=R7d3hW({=qKQqZkJlmGKQ)hlAF`;GsHF9zSFpL#7>K z8uDBW@eUcwkg*P%-?00Jt#j|Q$B`9fS)-OQHdG$RO+P-WIL6I?!k8!6C#>@c^O>-$ z3Cox;{iO9eX?~ODF=d(4Ow;B&ZGFvJ2ea1UtYyx5-sW7NGv+zQh2HYGIG{|oWQI`Z@#dCZYjaBNlBM|>Pl^*RudbIcG(O#oRdu<-=k$JQy=FuLQ zM|(~l?Kye0cjVFDkw<$%9__(+w3p)X#%%3{c(k|S(H@0IdkP+Hg+JQ*ezax%Xv_N1 z7WAX7=f^wq^B3aLmhPi1*hgEgkG4P`ZFN4{dVGAqv}O4CVD&y+l_y&dPe06+rPrd*=`S5$P2l}b~gKEzE)PP!2Tk5(zb+~SG?#W+-r;bB)T%Nr9pX_CQ z>O3z`-u+Kq9%~&ubF(Ys2r?s*2wBA~tHVl2! z1GZe=vVa};?KK**F@9EsXP*%f%1s-@B4-DeKAPx-7ahs>Z ze&sQT=Hp%W^rOo^o|Y#c`cFrj<>?so_@O+VSV1SIJ+b;uEZffYr&EtPg*-L=sqvo~ z`vLo)vTj$A6wkJE6p6o<<@twMwVb}v9@;v4-V{RK;RBOt( z$JocrbL>P}2V=%PW<8A?@3`fSyMMy6Cq~M%x5M)k%hbFwkLgxrnbW+}*4>QtGs8S< znLd=n9n_|ByEnq~T#GVxA5hQpyz{2>A@#gqnnlwun!gwP^Ro53Y+bJOsp0axYPwaI zSB-6LM_D(HbT+U5XP-3B>#Xa>W z^QL8OSx4K(wC%QS(`_5CUu}Ng@%SC`UphgEY^+WIj9S@@%K?vmLw7cF;cCY5II;Ty~Z|+bQ~NC+M@Co6kNHp6@*` zcIJJzv+4T}?W#|WsFU*Dm+$X(+k9_1R6nYF^-#XIcB@{sua3)i2Q0sTa9`(?T2)(W zN4dY#d^&H-_b&6d2k*N*ci+2A=Y8?LYq5N{tL?j8ZQp(A`)>Ezcdz~LJ?7mrtmf5` zGLN3S^1XLmZIthQZtt_4zVq_E-&p%cmFokRH(F>T^eP1@VW$Sj?xK~@%pfaY_ z1!dY*>tfa8)+}$`BpJ$BE0_Ka!Y`uEN0`+;@z<3;&?WS)+*e?J;mraiWPkB#A&>6qih<4%mxG5GI} z!GAxozE7R|TmAjaxX!#b{B8bzZatn`kN!4)zp%b9-0lF>cVB0|JIM3hfsF4CUVL{T z;=2P7-+f*AZfE*;JJG+}0sh@V`R{gmzx=SOF3XF*zb|$ZzO)>x+w#&nsD{;8d9i2Z z#peHu4ceEsyYk}T_sfUb^5XCCi-XQDHcnm~5Ps?GP<_f{J5A#&$4i%a`!)BMZu9J& zR8!@p??ZX%x4glt@-j4DUdAuV%ft_=SM4j;r{>f_d6{;b*TT#6nKG|w^O>=n8RMQY z-x<@+xP8XFX4}+?a^LKha{sLR=UkuTn(tD^<-`5Ohx^OCY3G?2EyLfum&GMz`Tp*` zEIyPMuc4QvcIEyh<6biD(oK2sW&Oox`^&1w_>%szYCfxn%GlQ2?wj|E*UZa?^|G<4 z_R7m{Q`W!F4+J>Eg^m%Vjm9(%^AUE^hcM!9}uy&O%J7oXcNC)WL$ z`JG#@7uV&*!RQwUO<#PqeR0t8<=VU)oP4=9Mh6RD90+`IAn?WK@r#3OFE<|VYwe2z zYcCF*z4&~7@zwU_)>s`7d+|DXaWLw|!KW7onqC~Vd2!I@#Q~TX2VPzrc&T%nrOqFg zx_43P{zwssQ!T1hwW$xPU3I8V)up;skLp!@s$UJLK{cd?RWq+qHKxYZgql=SYFf>x zSv9BT)q+}7#`NQ|T2ZTNO|7d9WvoALscp5RcGaHRR|ksY#~;;^I#ws@RGq1F#b^tr z{&=OX)s4DUcj{g}lU(vy^;-SSuKs4%6aEwa6aEu^ zf5WT4;q`=HFICk`RecYyzK2&kz3K`73I7Sd?fs$KjA;&KjA;& zKjA;&KjA;&KjA;&_r0c`@cUa)PxyVuslMaX6aEwa6aEu^JKO3B{|WyI{|WyI{|WyI z{|WyI{|UeENA-mNg#U#9gx}wxdcuFg@4Hhy<=3!NPx(*zPx(*zPx*az)l>dc{!{)_ z{!{)_{!@P6yXqe0fl;2L$YFntD@}Kgb@}Kgb@}Kgb z@}Kgb^4ngjr~GI9wwtQ&jMW~Ddd7dof5v~tf5v~tuZOPcp{si6>KXqT{~5nM9`%g> zjQ@=PjQ@<^j?j9>f5v~tf5z`SX+7ijzgVwl{Ac`U{0?B&Gk)J!tM3T)jQ@<^XGJ~Z zw@p`V(^Y#`>KXqTzdbDVjNkW(dd7dof5v~tf5v~t?|VkI4Oq|k&-gW8ReNRX8UGpo z8UH!|IsZBTIsZAoZN_@ef6jl-f6jl-f6jl-f6jl-f6jl-f6jl-f6jl-f6i}bbv@@l z=RfB^=RfB^=l8#vujl;d{Jx*pbN+MwbN+MwbN+MwbN+MwbN+MwbN+MwbN+MwbN+Mw zbAEf8s_#_QXKOv@Kj%N^Kj%N^w->6O^ZU+K&-rZ=)N}p|{tNyK{tNyK{tNyKe%lQ7 zg8zd5g5P(wdcl9ef5Csjf5Csjf5Csjf5Csjf5Csjf5CsjZ~MF23s&`BR=t;1?`6H< zzu>oLtlBeHFZeI`?IEl7kkt$R3;qj!+wS#(|APO5|AOE5#_D@xwI{7!@L%xT#;F(l z7yK9e7yP#QtM422g8zcwUbkwmTfN}7v%cC{Uu_pvJM61Qys8neYQ(D=@v26=YI~{L ziC^u+uNv{H9r;xwUe$Ar4z^T{c=eiJBVN^rS2f~Q2Vkm3ys8ne zYQ(D=@v26=su8bh#H$X@RE>C5BVP5Lwra$y8u98i|24mTP4$}pn&0=`>OfA_pjS2M z)oXqSb*k^c)vn&EL9c4is~Ys`HNWkn>Ht{PpjRFEsT%aE-N99ZUe%yi?G~;Y^r{BE zYWHx}pjRC%s~YsG{ae-d>Z(DnYS611^r{BE>cCOepjS2MRo}U*2ED35uWHb%_J>ss zdev^`YP+o3H&z{Fs=k+3-^;58y{bX4`hH&Rg06N!SKDq?gI?93S2gHW4SLmX>8e4m z+NV}E=v57RRfAsDpjS2MRSkOehF^nTbpWes(5o8sss_D!!>>WFYS611^s3$4RfAsD zpjS2MRSkMogI?93S2gHWyS}Riy{bX4YS611^r~%#dc%Lif5UG(qS}tAH~bp+ssm>A zmS5vuwadKPzNi}as>Z#laj)L;Yuu}LrB{u6RpVZ@{ZVayRE>Mp!M3V#ui6%=_7_%- zd(}2cz2(=yS2gfe4Sdx>%c_B|YT&E4{I~qJTdGFBY9C{@kFjdxs~Y*L?cb`Auio-& z^@oVU-wyCOyzN(?GYUrzX{CE5g zDOCH7s_m`n&_dPNS2gz4JARFQ^^RY2U%lhk;8*YXHTl&$evN+BLCWfIM74ciHTqSJ zepRDi)#z6>`c;j7)nSUN(XTpGQ8oHijega>*m}pW(XZa|+iz7J#;6+p>K(tPzk0{7 z@vqvSRqfBJcl;Uv>mC0c{~f;rq*XIu)eKlQ16Iv|)qbz)plQ_%STzGy&4AT@u<8&= zb%>ygjEA!)j(Ju_#gNm_%#z&hm5Lb!m63D zY9_1?{15yO{15yO{2C0a2E(esus-l>GOU^mt0u#$$*^iNteOm~Cc|pKd(~)IANU{m zANU{mANVyN)(3vght*-KssXWTK&%=Nt3y`Re#`p6uW6t@@Y{c1A0A46_(8R(R@J6H zsCLz%I#rkIRz0d$^{IX}pa#{D8df7}RE?=|HK8Wel$usEYF5pud9|Px)sk9PD{57( zsdcrXHr1BeRy%4}?WuirpbpiK>PQ`{6LqT2)VaD)m+DGgs~dHz?$o{FZ{cs@Z{cs@ z_oFpw;kQ3GE&MI~E&TScMSoqi@TG-c8(&)ZweqEfUprq~__g$}!s; zz}W8>t%1=R7>6aJJur^Lq=jFLU^F_Uh2OrxI5rder(^$gTKOHqOe=pYe=C10e=C10 ze=EOz*wIWF`?1r?-^$<0Zy#e?`R!*+D}O7${f%km*NBo<{#Jg+fuaF1t^BS0t^Ar1 z)5_n<-^$<0-^#DYFq#R|%HPW0%CAc>x&)&mFs=Np{H^@0{QBzB%5UF#G|NRtTy(@m z(_1vXMGI9lR7Ia#^tnY(TQproPh0e~rHx-FTiW>B_;s>HCtKS19ny_sSJAr`O>5Du zmNx!2e*09@#;-Fin$eqeteG%BTyzm30* zU&B%~EJedoG%Q8KQZy{ZKHz9tN*lj0RfMS`Oclo_B21MJ{K8Zbriw$^5vIxqeqpKz zQ$?66!c-BaiZE5|PmVBEKJW`vMW8Al_&@M};QzocU={nEBV-jJs|Z=;1HYhE92d+7 zenG2z;Qzq?fnVq5ID{Yj$a$R*!D=IOdxUej&AV@OSVFtEGd#gI`NTI`}*IJNP^JJNSj!qIoKs zry|gn4t}i@5p0WKTeMI_uq}>bL~~U-_%&FigTI5ngTI5ngTI5ngTI5nlfRR{lfRR{ zlfRSSv5*MIMK~^+w9?7n$?y0`blb;q>o`sl$FAd8Njmw3=hDgF$=}H@NEgTXB21S~ z{!V_4UJ_xMZ;G*`8)X?4@@V2Cx0h@C%@x^>Ew5eFrECJ{Eii-lfRR{lV3nD zT1VpeVLJIWkVP9wI{7>KJNY~LJNX@7OecRQe<#0IlQ`ZO&4STz7RMda#oxuR`7GK{ zB1Moc{w{vUB-6#;#oxv6xP7DxB3+O!{x1G5{w{uvYjONCUHo1Aj$@{aUo%^}_#M+s z7k?Lj7k?MOghIObr4*uhG5P_b@hy4+q9-6-{9XK7V$#Lm#oxu>#oxv6m{s%*MBhNV z_#MAW7r(~2=p#rMe;0ojzn+40^NTm6o4=c1%pu+U8tkIqAl>{T59#LDdXsK`jd$th z@8;K!kZ%5Ner-AF=I`e3=I`e3=GU)~Zhk!r>E_qElWzWQ{%-zme!UFo=I`e3=I`e3 z=I`e3=I`e3=I`bgyNF}KaV$99{9+ezOgP>A;ujIWh+}rq7#PQi<2Z4;`MdcY$4fVV zH-9(3-ie53q=&zUUrZxn8tLKh;TPA4=EC&wJJy^Y{vQ4w{vQ4we#f8F!{5W-!><=3 zJ^VfVJ^VfVJ^UIJ)5Gsrb$a-F_E-X`@8uVVi8xHeVImHbUjAPGUjAPGUjAPGUVcrU>E-X` z@8#F<84aK5<<}RJUViOi>E-X^*BqQa{yu)kcB3&g8bhOfEPeca{C)g={C)g={C)g= z{C)g={C)g={C)fyO4G-$sWg53ef*kB)5qV(uO}gW{C)g={C)g={Q4Bq$KS`_$KS`_ z$KS`VmnVJvef)j=ef)j=ef)j=ef)j=nw-WTG{#16Mf6rggKV_grJui_ zzn@>zbu`VUpI_r_`uY3$`}zC%`}zC%^<|`=zn{OKzn@>rUNqXKpTD2KpTD2KpTD19 z18(~H`}sBErk`KWQ~LS)`TP0%`Sn0W>tFi$HR`6HU$bro_y_nk?M7Q*^hsrae}G@V zR0jCSr^e}I30UjuRm_y_n0 z_y_n0_y_p)y<~uYfPavGkbjV0!*aAkMmuB%`3LzmFh`F}^vGn8U!P0{`3Lz2`3Lz2 z`3Lz2`3Lzm*k_P`kbjV0vwa5n2l@5bWRPFqSu{jvkbjVWkY96jv|&brbO!nL;$)D2 zkbjV0({u*;2l)s2_331gU$0IE`885!kbjVWkbjVWkYArh2Kfj12l)s22l)s2hxmv1 zhxmv1hxmv1^~7a}UsHDU#$||KU?4;MnzS>-Kg2)8Kg2)8Kg2)8uO}o!{6qXh{6qXh z{6qX&OryCwL;OShL;OShdhasCKg2)8Kg2)8Kg2)8Kg2)8Kg2)8Kg2)8Kg2)8Kg2)8 zKg2)8Kg2)8uZcXG$TP%0#6QHZuPHVg6zMVg6x$?YkM~ALbwCALbwCALiHA zn_>PD{t^BW{t^BWem##F;UD22;n)9|5&jW=jk+1(AK@S2AK@S2AK@S2AK@S2AK@S2 z*Eg9Fel5fq;UD22;n&EY5&jYW5q?em8Q~w{AK@S2AK@S2AK@S2AK@S2AK@S2AK@S2 zAK@S2AK@S2AK@S2AK@S27tF~Bzi>`Q_=N{D!au@4%0J3K%0J4l#W|z=qx_@%qx_@% zdOtJDKgvJKKgvJKKgvJKKgvJKKgvJKKgvJKKgvJKKgvJKuYWY7{GG5#@r@!E{>kMWQ3>u=2%{}}%m{}}%m{}}%m{}}%m{}}%m{}}%m{}{jU zQ}hRBj9-sz#`wqh$N0zi$N0zi_0DFDe~f>Oe~e!*ZN~V=_{aG5)n<%;j9*YGWBgALrNqn{obe{&9X`tBmuH z^N;fjUS*troPV5OuW-is$N9(k$N9(k$N9C}XPkeWf1H1uf1H1uf1H1uf1H1uUteCv z`N#Rk`N#Rk`N#Rk`N#S7?PY?0f`5WvcqSA46Z~5GGr>Q>Kf$lRFBAL|{1f~W{1f~W z{Cb)*!9T%2!9T$-4v-0ck$_C_Pw-Fh3*}^je}aF4e}aF4e}Z3hAQSu({1f~W{1f~W z{1f~W{CW&C!9T$-W{?T~34S4;Oz=-5Hi6(!9T%2!9T%2!9U4A$uAU? zN&ZQGk%dh1Px4RlPx4RlPx4RlPx4RlPx4RlPx4RlPx4RlPx4RlPx4RlPx4RlPx4Rl zPx4RlPx4RlPx9*rk9b5T`6u}&`6u}&`6u}&`6u}&`6u}&`6u}&`6u}&`6u}&`6u}& z`6u}&`6u}&`6u}&`6u}&`6v0O_^0@%_^0@%_^0^w^Ja>FihqiKihqiKihqiKihqh< z4|}Hg_5MblBUAiS{8Ri>{8Ri>{8Ri>{Q5$p=RH&W`rae|ktzNu{we+`{we+`{we+` z{we+`{we+`{waRFrJ3TN;-BK5;-BK5;ul=Z6#o?e6#o?e6#o?e6#o?e6u&;@O!H6k z>%Y%5|1|$J|1|$J|1`f~N~ZbsvSylJKWnD>r}_0bM~E!b{L}o?{L}o?{L}o?{L}o? z{L}pUp)<`t%|FdQ%|FdQ%|FdQ%|FdQ&984d!fDYvooRmk)0yU<=AY)D=AY)D=AY)D z<`jM4`n5B|Kf^!6 zKf^!6Kf^!6Kf^DqlNtUQ{u%xm{u%xm{u%xm{u%xmexaVs@Xzqi@Xzqi@Xzqi@Xzqi z@Xzqi@Xzq;7taj;4F3%O48MFtX833LXZUCMXZQt%GQ+R8IC_htw>Wx>Gs8c_Kf^!6 zFW4ISip=uQ^3U?m^3U?m@(UehmVcIimVcIimVcIimVcIimVcIimVcIimVcIimVcIi zmVcIimVcIimVcIimVcIimVcIimS5j<^gU;mf0loiU*047pfk%q%RkFM%RkF62a;KS z!Klpg&+^am%Z6l@U+;8g`30sj%RkFM%RkFM%RkFM%P&k7S(420&+*Uk&+*Uk&+!Xd zMbIj9{B!(s{B!(z_cO;o$3MqE$3MqE$3Mp}KpEkz%<<3h&+!XtWsZN2Uv?$3E1BaL z-pU-mKJU!&&+*Uk>s5|?K<4=8_~-cN_~-cN_~-bAGc(7pKRk2%bNqAsbNqAsbNqAs zbNqAsbNqAsbNqAs^ZfJt^ZfJt^ZfJt^ZfJtdd@S?KhHnUKhHnUFKCu|{&{{q>6zyj zJj*=)Jii|G%=6Fl&-2go&-2go&-3eF&piJ;|2)55_RRCo^Uw3o^Uw3o^XqZXJinYo z=K1IO=lS(^_!syW_!syW_!syW_!syW_!syW_=WDWz`wx1z`wx1z`wx1 zz`wx1z`wx1z`wx1z`wx1z%R>`1^xy81^xy81^xy81^xy81^xy81^xy81^xy81^xy8 z1^xwoxsojKFYqt$FYqt$FY+()FY+()FY+()FY+()FY+()FY?P7WRZW7Ul=%x{EPgH z{EPgH{EPgH{EPgH{EPgH{EPfD3R&b|hLf&JzC;{}TTa{}TTa{}TTa{}R8TW#lfh#J|M9#J|M9 z#J|KZ-;*W&CH^J;CH^J;CH^J;CH^ITxs5FGFYzz&FYzz&`%#4~@e5-|7&}Y+OZ-dx zOZ-dxOZ-dxOZ>7QS>j*fU*cclmoLgP|1$qF|1$qF|1$qF|1$qF|1$qFzbsOg`Q_@e z%)iXP%r85dW&UOUW&UOUW&UOUWqu*?Eb}k(FZ0WwWSL*CDa-uJ{PIm%=3nMt=3nMt z=3nMt=3nMt=3nMt=3nMt=3nMt=3nMt=3nMt=3nMt=3nMt=3nMt=3nMt=3nMt;g@;M z3jYfK3cvhoR`^%=SNK=>SNK=> zg@1*Ag@1*Ag@1*Ag@1*Ag@1*Ag@2WQm0zwbtNg3{tNg3{tNg3{tNe14S><2lU*%ur zU*%urU*%urU*%urU*%urmr=_q|0@3~zr0aa`B(W@`DKr?$}htf8KkW8ukx?*ukx?* zukx?*ukx?*ukx?*uky<*#SaT)mEVsGWR-uFf0cig-;WJMHZH6D@^O**jEqxO`B(W@ z`PcZ@`28qB*7(=>*ZBQFLDu-!_}BQ?_}BQ?_}BQ?_}BPl?y|kTw1_ z{x$wJem`=MHGV&IkTw1_{x$wJ{xyC-evmc(HU2gJHGV&a5P7V~V?|aqYy4~cYy4~c zYy4~cYy4~cYy4~cYy4~cYy4~cYy5JqS>u;=%^Lq2{~G@~zsz7{ytB@~&cDvT&cDvT z&cDvT&cDvT&cDvT&cDttD;vMgkahlbe!tR?b$W2LA^C2LA^C2LA^C2LA@XjD0rvH~2UBW$&`Vzrnx3 zzrinymkoZ|<81J6@Ne*M@Ne*M@Ne*M@Ne*M@Ne*M@Ne+@Mdobq%PePuf0KWcU%oG! z{G0rn{G0rn{G0rn{PNA&?P+x*-7+x#-8+2-Ho-{#-u-{#-u-{#-u-{zOK z&o=)y|2F?N|2Dt;eYW|x`M3GE`M3FZ_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs z_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs z_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs_;>hs`FHtu z`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu z`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu`FHtu z`FHu{+2eN>vdh2AzstYNFXNtF{ylzK_w4cS@%t^4?D6mM@A3OJq3rSd(eLc>`{D2G z@%!=b?D6mM@A1n(XODl6e~*8Ue~*8U-w&E)kAIJUkAIJUkAIKf??1%vreu$QkAII} zmO6XS{CoU+{CoU+{PNb>g@CT z{g>?X@AL2T`~86I^UH&0pMRf!pMRf!pMRf!pWkoBWS@VZf1lqkHf5iGpMRf!pMRf! zpMRf!pI^Q_`~3U-`~3U-`~3U-`}}@;Cj0z;eYIp9CwKj1&$ zKj8Pnb~)hpn>ab(Kj1&$Kj1&$Kj1&$Kj1&$Kj1&$Kj1&$Kj8OUIyvA!;6LC$;P=}) zIp9CwKj1&$Kj1&$_nSNMn>#t+Kj1&$Kj8PfBRS+hHpLJ zr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c z|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUc zPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>? z|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*{lHpLJr~gm? zpZ-7nfBOIQ|NYi}{eSxZ^#AGq)BpEd`}P0n|I`1c|4;v){y+VH`v3I*>HpLJr~gm? zpZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v) z{y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6( zKmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp z{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7n zfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH z`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D z|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ z^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ z|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I* z>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq z|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq z)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ z|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJ zr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c z|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUc zPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>? z|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm? zpZ-7nfBOIQ{~h)p_8;~i_8;~i_8;~i_8;~i_8;~i_8;~i_8;~i_8;~i_8;~i_8;~i z_8;~i_8;~i_8;~i_8;~i_8;~i_Ur%C|EK>?|DXOp{eSxZ^#2|9ANC*iANK42)Bkta zf7pN6f7pN6f7pN6f7pN6f7pN6f7pN6f7pN6f7pN6f7q}8Pye6(KmC9D|MdUq|I`2X zu>WEI!~Tc;`v3I*J?z*2_ptwA|HJ-={SW&e_CM@@*#EHqVgJMahy4%xAND`&f7t)9 z|6%{b{)hb!`yci{?0?w*u>WEI!~Tc;5BneXKkR?l|FHjI|HJ-={SW&e_CM@@*#EHq zVgJMahy4%xAND`&f7pN8f7*ZAf7*ZAf7*ZAf7*ZAum4Z~pZ>qo{?q=`{?mT_fBOIQ z|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I* z>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq z|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq z)BmUcPye6(zsvs1{>%Q${>%Q${>%Q${>%Q$e*J&?|1SG4`!D-1`}P0n|I`1c|4;v) z{y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6( zKmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp z{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7n zfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH z`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D z|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ z^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ z|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I* z>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq z|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq z)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ z|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJ zr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c z|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUc zPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>? z|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm? zpZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v) z{y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6( zKmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp z{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7n zfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH z`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D z|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ z^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ z|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I* z>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq z|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq z)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ z|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJ zr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c z|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUc zPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>? z|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm? zpZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v) z{y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6( zKmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp z{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7n zfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH z`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D z|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ z^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ z|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I* z>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq z|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq z)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ z|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJ zr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c z|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUc zPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>? z|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm? zpZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v) z{y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6( zKmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp z{eSxZ^#AGq)BmUcPye6(KmC9D|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7n zfBOIQ|LOnJ|EK>?|DXOp{eSxZezyN-`}P0n|I`1c|4;v){y+VH`v3I*>HpLJr~gm? zpZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(zc>5e?AQON|4;v){y+VH z`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|DXOp{eSxZ^#AGq)BmUcPye6(KmC9D z|MdUq|I`1c|4;v){y+VH`v3I*>HpLJr~gm?pZ-7nfBOIQ|LOnJ|EK>?|L^S&-a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I z0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv3_uuwFaTiy z!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a z0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1Da zgaHTx5C$L&Kp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!- z0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K; z2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu z0}uuv3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx z5C$L&Kp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S z1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rX zAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv z3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L& zKp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST z7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhl zfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv3_uuw zFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp229 z0AT>a0Q6!1hy5S+f7p)!2m{cE{U7#!*#BYwhy5S+f7t(F|A+k__J7#_VgHByANGIP z|6%`!{U7#Y0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv z3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L& zKp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST z7=SPUecJzN|EK+*_G19T0E7Vu1JI}apZ0&+|7riH{h#)K+W%?)r~RMyf7<_P|EK+* z_J7*{Y5&vyr~Oa+pY}iPf7<`F|7riz{-^y<`=9nd?SI<;wEt=U)BdOZPy3(tKka|o z|Fr*U|I_}b{ZIR!_CM`^+W)lwY5&vyr~Oa+pY}iPf7<`F|7riz{-^yIfG_|(?SI<; zwEt;81|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu1JKLa0E7Vu z1JKLa0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rX zAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv z3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L& zKp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST z7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv3_uuwFaTiy!T^K;2m=rXAPhhl zfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C$L&Kp2290AT>a0E7Vu0}uuv3_uuw zFaTiy!T^K;2m=rXAPhhlfG_}I0Kx!-0SE&S1|SST7=SPUVF1DagaHTx5C*{8f2gga z0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?W zL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz z1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaN1*ei}eD zfM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCF zXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks118 z0MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT z(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz8J)f1fmfXaLaw zq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V z0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?W zL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz z1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$ zhz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c z1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh z5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC? z4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1 zAR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ( z8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2 zKs1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4Immo zG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4 zfM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCF zXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks118 z0MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT z(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G z0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLaw zq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V z0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?W zL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz z1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$ zhz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c z1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh z5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC? z4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1 zAR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ( z8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2 zKs1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4Immo zG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4 zfM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rJ(8bCCF zXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT(Ey?WL<5Kh5Dg$2Ks118 z0MP)V0Yn3c1`rJ(8bCCFXaLawq5(t$hz1Z1AR0h4fM@{G0HOgz1BeC?4ImmoG=OLT z(Ey?WL<5Kh5Dg$2Ks1180MP)V0Yn3c1`rKkG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR z7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|n zMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y z(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifp zG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C z4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU( z0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy z07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y{lyQ07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy z07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=F zfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfP zU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR z7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|n zMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y z(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifp zG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C z4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU( z0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy z07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=F zfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfP zU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR z7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|n zMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y z(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifp zG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C z4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU( z0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy z07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=F zfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfP zU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=LrU(*Q;T7!6=FfYAU(0~ifpG=R|n zMgtfPU^IZy07e5C4PZ2Y(EvsR7!6=FfYAU(0~ifpG=R|nMgtfPU^IZy07e5C4PX!Z zX#k@Ej0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP z8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn z1~3}HXaGCyKkYy5KkcUhj0P|oz-RzF?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5 z?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5?LX~5 z?LX~5?LX~5?LX~5?LY0m?7!^4?7!^4?7!^4?7!^4?7!^4?7!^4?7!^4?7!^4?7!^4 z?7!^4?7!^4?7!^4?7!^4?7!^4?7!^4?7!@%0gMJP8o+1(qXCQtFdD#U0HXnn2C&Qi z%YGWbXaJ)Dj0UjF{>%Q${>%Q${>%Q${>%Q${>%Q${>%Q${>%Q${>y$Ez-R!Y0gMJP z8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn z1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y z0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U z0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|o zz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQt zFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)D zj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1( zqXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}H zXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP z8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn z1~3}HXaJ)Dj0P|oz-R!Y0gMJP8o+1(qXCQtFdD#U0HXnn1~3}HXaJ)Dj0Uhjx1R-&S0^Fwug=Qc zU%gDYzxsc7f5idZU)_X%e|1av{nfo44*u|OuB_kRT&8sRX9pJr9e%v$^!okHIf46| zZ~FH)-zn~Iz69UjB&+u~E!h3-@BJCy+^T+mbHDZd&CMzIyDKX9yUPRjyYuw-yOXT< zyR&-tyC1dgcRx$q?_NCK?@S2?pW6G=^S%H0)ZX2peZRY%_I`Ii(f#fQlKZ Date: Wed, 29 Jan 2020 20:17:54 +0800 Subject: [PATCH 57/59] fix pylint --- scripts/bert/finetune_squad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/bert/finetune_squad.py b/scripts/bert/finetune_squad.py index be3e520262..5fcf818ceb 100644 --- a/scripts/bert/finetune_squad.py +++ b/scripts/bert/finetune_squad.py @@ -467,7 +467,7 @@ def set_new_lr(step_num, batch_id): (accumulate if accumulate else 1)) == 0: toc = time.time() log.info( - 'Epoch: {}, Batch: {}/{}, Loss={:.4f}, lr={:.7f} Time cost={:.1f} Thoughput={:.2f} samples/s' # pylint: disable=line-too-long + 'Epoch: {}, Batch: {}/{}, Loss={:.4f}, lr={:.7f} Time cost={:.1f} Thoughput={:.2f} samples/s' .format(epoch_id, batch_id, len(train_dataloader), step_loss / log_interval, trainer.learning_rate, toc - tic, log_num / (toc - tic))) From 92e044bc9cdd201f0e18847d74c9cb9b2d944a97 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 29 Jan 2020 21:00:03 +0800 Subject: [PATCH 58/59] fix pylint --- scripts/bert/finetune_classifier.py | 137 ++++++-------- scripts/bert/finetune_squad.py | 269 ++++++++++++---------------- 2 files changed, 176 insertions(+), 230 deletions(-) diff --git a/scripts/bert/finetune_classifier.py b/scripts/bert/finetune_classifier.py index e1cd187225..c267d37395 100644 --- a/scripts/bert/finetune_classifier.py +++ b/scripts/bert/finetune_classifier.py @@ -1,11 +1,8 @@ """ Sentence Pair Classification with Bidirectional Encoder Representations from Transformers - ========================================================================================= - This example shows how to implement finetune a model with pre-trained BERT parameters for sentence pair classification, with Gluon NLP Toolkit. - @article{devlin2018bert, title={BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding}, author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, @@ -105,57 +102,52 @@ type=float, default=0.1, help='ratio of warmup steps used in NOAM\'s stepsize schedule') -parser.add_argument('--log_interval', - type=int, - default=10, - help='report interval') -parser.add_argument('--max_len', - type=int, - default=128, - help='Maximum length of the sentence pairs') +parser.add_argument( + '--log_interval', + type=int, + default=10, + help='report interval') +parser.add_argument( + '--max_len', + type=int, + default=128, + help='Maximum length of the sentence pairs') parser.add_argument( '--seed', type=int, default=2, help='Random seed') parser.add_argument( '--accumulate', type=int, default=None, - help= - 'The number of batches for gradients accumulation to simulate large batch size. ' - 'Default is None') -parser.add_argument('--gpu', - type=int, - default=None, - help='Which gpu for finetuning.') + help='The number of batches for gradients accumulation to simulate large batch size. ' + 'Default is None') +parser.add_argument( + '--gpu', type=int, default=None, help='Which gpu for finetuning.') parser.add_argument( '--task_name', type=str, choices=tasks.keys(), help='The name of the task to fine-tune. Choices include MRPC, QQP, ' - 'QNLI, RTE, STS-B, CoLA, MNLI, WNLI, SST.') -parser.add_argument('--bert_model', - type=str, - default='bert_12_768_12', - choices=[ - 'bert_12_768_12', 'bert_24_1024_16', - 'roberta_12_768_12', 'roberta_24_1024_16' - ], - help='The name of pre-trained BERT model to fine-tune') -parser.add_argument('--bert_dataset', - type=str, - default='book_corpus_wiki_en_uncased', - choices=[ - 'book_corpus_wiki_en_uncased', - 'book_corpus_wiki_en_cased', - 'openwebtext_book_corpus_wiki_en_uncased', - 'wiki_multilingual_uncased', 'wiki_multilingual_cased', - 'wiki_cn_cased', - 'openwebtext_ccnews_stories_books_cased' - ], - help='The dataset BERT pre-trained with.') -parser.add_argument('--pretrained_bert_parameters', - type=str, - default=None, - help='Pre-trained bert model parameter file.') + 'QNLI, RTE, STS-B, CoLA, MNLI, WNLI, SST.') +parser.add_argument( + '--bert_model', + type=str, + default='bert_12_768_12', + choices=['bert_12_768_12', 'bert_24_1024_16', 'roberta_12_768_12', 'roberta_24_1024_16'], + help='The name of pre-trained BERT model to fine-tune') +parser.add_argument( + '--bert_dataset', + type=str, + default='book_corpus_wiki_en_uncased', + choices=['book_corpus_wiki_en_uncased', 'book_corpus_wiki_en_cased', + 'openwebtext_book_corpus_wiki_en_uncased', 'wiki_multilingual_uncased', + 'wiki_multilingual_cased', 'wiki_cn_cased', + 'openwebtext_ccnews_stories_books_cased'], + help='The dataset BERT pre-trained with.') +parser.add_argument( + '--pretrained_bert_parameters', + type=str, + default=None, + help='Pre-trained bert model parameter file.') parser.add_argument( '--model_parameters', type=str, @@ -171,20 +163,19 @@ parser.add_argument( '--only_inference', action='store_true', - help= - 'If set, we skip training and only perform inference on dev and test data.' -) -parser.add_argument('--dtype', - type=str, - default='float32', - choices=['float32', 'float16'], - help='The data type for training.') + help='If set, we skip training and only perform inference on dev and test data.') +parser.add_argument( + '--dtype', + type=str, + default='float32', + choices=['float32', 'float16'], + help='The data type for training.') parser.add_argument( '--early_stop', type=int, default=None, help='Whether to perform early stopping based on the metric on dev set. ' - 'The provided value is the patience. ') + 'The provided value is the patience. ') args = parser.parse_args() @@ -282,10 +273,7 @@ cast_dtype=True) if model_parameters: logging.info('loading model params from %s', model_parameters) - nlp.utils.load_parameters(model, - model_parameters, - ctx=ctx, - cast_dtype=True) + nlp.utils.load_parameters(model, model_parameters, ctx=ctx, cast_dtype=True) nlp.utils.mkdir(output_dir) logging.debug(model) @@ -416,16 +404,14 @@ def test(loader_test, segment): if use_roberta: out = model(input_ids, valid_length) else: - out = model(input_ids, segment_ids.as_in_context(ctx), - valid_length) + out = model(input_ids, segment_ids.as_in_context(ctx), valid_length) if not task.class_labels: # regression task for result in out.asnumpy().reshape(-1).tolist(): results.append('{:.3f}'.format(result)) else: # classification task - indices = mx.nd.topk(out, k=1, ret_typ='indices', - dtype='int32').asnumpy() + indices = mx.nd.topk(out, k=1, ret_typ='indices', dtype='int32').asnumpy() for index in indices: results.append(task.class_labels[int(index)]) @@ -445,8 +431,7 @@ def test(loader_test, segment): f.write(u'%d\t%s\n' % (i, str(pred))) -def log_train(batch_id, batch_num, metric, step_loss, log_interval, epoch_id, - learning_rate): +def log_train(batch_id, batch_num, metric, step_loss, log_interval, epoch_id, learning_rate): """Generate and print out the log message for training. """ metric_nm, metric_val = metric.get() if not isinstance(metric_nm, list): @@ -472,8 +457,7 @@ def log_eval(batch_id, batch_num, metric, step_loss, log_interval): def train(metric): """Training function.""" if not only_inference: - logging.info('Now we are doing BERT classification training on %s!', - ctx) + logging.info('Now we are doing BERT classification training on %s!', ctx) all_model_params = model.collect_params() optimizer_params = {'learning_rate': lr, 'epsilon': epsilon, 'wd': 0.01} @@ -529,8 +513,7 @@ def train(metric): new_lr = lr * step_num / num_warmup_steps else: non_warmup_steps = step_num - num_warmup_steps - offset = non_warmup_steps / (num_train_steps - - num_warmup_steps) + offset = non_warmup_steps / (num_train_steps - num_warmup_steps) new_lr = lr - offset * lr trainer.set_learning_rate(new_lr) @@ -538,14 +521,12 @@ def train(metric): with mx.autograd.record(): input_ids, valid_length, segment_ids, label = seqs input_ids = input_ids.as_in_context(ctx) - valid_length = valid_length.as_in_context(ctx).astype( - 'float32') + valid_length = valid_length.as_in_context(ctx).astype('float32') label = label.as_in_context(ctx) if use_roberta: out = model(input_ids, valid_length) else: - out = model(input_ids, segment_ids.as_in_context(ctx), - valid_length) + out = model(input_ids, segment_ids.as_in_context(ctx), valid_length) ls = loss_function(out, label).mean() if args.dtype == 'float16': with amp.scale_loss(ls, trainer) as scaled_loss: @@ -568,9 +549,8 @@ def train(metric): label = label.reshape((-1)) metric.update([label], [out]) if (batch_id + 1) % (args.log_interval) == 0: - log_train(batch_id, len(train_data), metric, step_loss, - args.log_interval, epoch_id, - trainer.learning_rate) + log_train(batch_id, len(train_data), metric, step_loss, args.log_interval, + epoch_id, trainer.learning_rate) step_loss = 0 if step_num >= num_train_steps: logging.info('Finish training step: %d', step_num) @@ -608,8 +588,7 @@ def train(metric): ckpt_name = 'model_bert_{0}_{1}.params'.format(task_name, epoch_id) params_saved = os.path.join(output_dir, ckpt_name) nlp.utils.load_parameters(model, params_saved) - metric_str = 'Best model at epoch {}. Validation metrics:'.format( - epoch_id) + metric_str = 'Best model at epoch {}. Validation metrics:'.format(epoch_id) metric_str += ','.join([i + ':%.4f' for i in metric_nm]) logging.info(metric_str, *metric_val) @@ -640,15 +619,13 @@ def evaluate(loader_dev, metric, segment): label = label.reshape((-1)) metric.update([label], [out]) if (batch_id + 1) % (args.log_interval) == 0: - log_eval(batch_id, len(loader_dev), metric, step_loss, - args.log_interval) + log_eval(batch_id, len(loader_dev), metric, step_loss, args.log_interval) step_loss = 0 metric_nm, metric_val = metric.get() if not isinstance(metric_nm, list): metric_nm, metric_val = [metric_nm], [metric_val] - metric_str = 'validation metrics:' + ','.join( - [i + ':%.4f' for i in metric_nm]) + metric_str = 'validation metrics:' + ','.join([i + ':%.4f' for i in metric_nm]) logging.info(metric_str, *metric_val) mx.nd.waitall() @@ -659,4 +636,4 @@ def evaluate(loader_dev, metric, segment): if __name__ == '__main__': - train(task.metrics) + train(task.metrics) \ No newline at end of file diff --git a/scripts/bert/finetune_squad.py b/scripts/bert/finetune_squad.py index 5fcf818ceb..ad7774a3ba 100644 --- a/scripts/bert/finetune_squad.py +++ b/scripts/bert/finetune_squad.py @@ -78,35 +78,31 @@ default=None, help='Model parameter file') -parser.add_argument( - '--bert_model', - type=str, - default='bert_12_768_12', - help='BERT model name. options are bert_12_768_12 and bert_24_1024_16.') - -parser.add_argument( - '--bert_dataset', - type=str, - default='book_corpus_wiki_en_uncased', - help='BERT dataset name.' - 'options are book_corpus_wiki_en_uncased and book_corpus_wiki_en_cased.') - -parser.add_argument( - '--pretrained_bert_parameters', - type=str, - default=None, - help='Pre-trained bert model parameter file. default is None') +parser.add_argument('--bert_model', + type=str, + default='bert_12_768_12', + help='BERT model name. options are bert_12_768_12 and bert_24_1024_16.') + +parser.add_argument('--bert_dataset', + type=str, + default='book_corpus_wiki_en_uncased', + help='BERT dataset name.' + 'options are book_corpus_wiki_en_uncased and book_corpus_wiki_en_cased.') + +parser.add_argument('--pretrained_bert_parameters', + type=str, + default=None, + help='Pre-trained bert model parameter file. default is None') parser.add_argument('--uncased', action='store_false', help='if not set, inputs are converted to lower case.') -parser.add_argument( - '--output_dir', - type=str, - default='./output_dir', - help='The output directory where the model params will be written.' - ' default is ./output_dir') +parser.add_argument('--output_dir', + type=str, + default='./output_dir', + help='The output directory where the model params will be written.' + ' default is ./output_dir') parser.add_argument('--epochs', type=int, @@ -116,12 +112,10 @@ type=int, help='training steps, epochs will be ignored ' 'if trainin_steps is specified.') -parser.add_argument( - '--batch_size', - type=int, - default=32, - help='Batch size. Number of examples per gpu in a minibatch. default is 32' -) +parser.add_argument('--batch_size', + type=int, + default=32, + help='Batch size. Number of examples per gpu in a minibatch. default is 32') parser.add_argument('--test_batch_size', type=int, @@ -133,100 +127,84 @@ default='bertadam', help='optimization algorithm. default is bertadam') -parser.add_argument( - '--accumulate', - type=int, - default=None, - help='The number of batches for ' - 'gradients accumulation to simulate large batch size. Default is None') +parser.add_argument('--accumulate', + type=int, + default=None, + help='The number of batches for ' + 'gradients accumulation to simulate large batch size. Default is None') parser.add_argument('--lr', type=float, default=5e-5, help='Initial learning rate. default is 5e-5') -parser.add_argument( - '--warmup_ratio', - type=float, - default=0.1, - help='ratio of warmup steps that linearly increase learning rate from ' - '0 to target learning rate. default is 0.1') +parser.add_argument('--warmup_ratio', + type=float, + default=0.1, + help='ratio of warmup steps that linearly increase learning rate from ' + '0 to target learning rate. default is 0.1') parser.add_argument('--log_interval', type=int, default=50, help='report interval. default is 50') -parser.add_argument( - '--max_seq_length', - type=int, - default=384, - help='The maximum total input sequence length after WordPiece tokenization.' - 'Sequences longer than this will be truncated, and sequences shorter ' - 'than this will be padded. default is 384') - -parser.add_argument( - '--doc_stride', - type=int, - default=128, - help='When splitting up a long document into chunks, how much stride to ' - 'take between chunks. default is 128') - -parser.add_argument( - '--max_query_length', - type=int, - default=64, - help='The maximum number of tokens for the question. Questions longer than ' - 'this will be truncated to this length. default is 64') - -parser.add_argument( - '--n_best_size', - type=int, - default=20, - help='The total number of n-best predictions to generate in the ' - 'nbest_predictions.json output file. default is 20') - -parser.add_argument( - '--max_answer_length', - type=int, - default=30, - help='The maximum length of an answer that can be generated. This is needed ' - 'because the start and end predictions are not conditioned on one another.' - ' default is 30') - -parser.add_argument( - '--version_2', - action='store_true', - help='SQuAD examples whether contain some that do not have an answer.') - -parser.add_argument( - '--null_score_diff_threshold', - type=float, - default=0.0, - help= - 'If null_score - best_non_null is greater than the threshold predict null.' - 'Typical values are between -1.0 and -5.0. default is 0.0') - -parser.add_argument( - '--gpu', - type=int, - default=None, - help='which gpu to use for finetuning. CPU is used if not set.') - -parser.add_argument( - '--sentencepiece', - type=str, - default=None, - help= - 'Path to the sentencepiece .model file for both tokenization and vocab.') +parser.add_argument('--max_seq_length', + type=int, + default=384, + help='The maximum total input sequence length after WordPiece tokenization.' + 'Sequences longer than this will be truncated, and sequences shorter ' + 'than this will be padded. default is 384') + +parser.add_argument('--doc_stride', + type=int, + default=128, + help='When splitting up a long document into chunks, how much stride to ' + 'take between chunks. default is 128') + +parser.add_argument('--max_query_length', + type=int, + default=64, + help='The maximum number of tokens for the question. Questions longer than ' + 'this will be truncated to this length. default is 64') + +parser.add_argument('--n_best_size', + type=int, + default=20, + help='The total number of n-best predictions to generate in the ' + 'nbest_predictions.json output file. default is 20') + +parser.add_argument('--max_answer_length', + type=int, + default=30, + help='The maximum length of an answer that can be generated. This is needed ' + 'because the start and end predictions are not conditioned on one another.' + ' default is 30') + +parser.add_argument('--version_2', + action='store_true', + help='SQuAD examples whether contain some that do not have an answer.') + +parser.add_argument('--null_score_diff_threshold', + type=float, + default=0.0, + help='If null_score - best_non_null is greater than the threshold predict null.' + 'Typical values are between -1.0 and -5.0. default is 0.0') + +parser.add_argument('--gpu', + type=int, + default=None, + help='which gpu to use for finetuning. CPU is used if not set.') + +parser.add_argument('--sentencepiece', + type=str, + default=None, + help='Path to the sentencepiece .model file for both tokenization and vocab.') parser.add_argument('--debug', action='store_true', help='Run the example in test mode for sanity checks') -parser.add_argument('--load_feature_from_pickle', - action='store_true', - help='load features from file if set') args = parser.parse_args() output_dir = args.output_dir @@ -255,6 +233,7 @@ 'BertForQA model parameters.') lower = args.uncased +epochs = args.epochs batch_size = args.batch_size test_batch_size = args.test_batch_size lr = args.lr @@ -263,12 +242,13 @@ accumulate = args.accumulate log_interval = args.log_interval if accumulate: - log.info('Using gradient accumulation. Effective batch size = {}'.format( - accumulate * batch_size)) + log.info('Using gradient accumulation. Effective batch size = {}'. + format(accumulate*batch_size)) optimizer = args.optimizer warmup_ratio = args.warmup_ratio + version_2 = args.version_2 null_score_diff_threshold = args.null_score_diff_threshold @@ -279,37 +259,33 @@ max_answer_length = args.max_answer_length if max_seq_length <= max_query_length + 3: - raise ValueError( - 'The max_seq_length (%d) must be greater than max_query_length ' - '(%d) + 3' % (max_seq_length, max_query_length)) + raise ValueError('The max_seq_length (%d) must be greater than max_query_length ' + '(%d) + 3' % (max_seq_length, max_query_length)) # vocabulary and tokenizer if args.sentencepiece: - logging.info('loading vocab file from sentence piece model: %s', - args.sentencepiece) + logging.info('loading vocab file from sentence piece model: %s', args.sentencepiece) if dataset_name: - warnings.warn( - 'Both --dataset_name and --sentencepiece are provided. ' - 'The vocabulary will be loaded based on --sentencepiece.') + warnings.warn('Both --dataset_name and --sentencepiece are provided. ' + 'The vocabulary will be loaded based on --sentencepiece.') vocab = nlp.vocab.BERTVocab.from_sentencepiece(args.sentencepiece) dataset_name = None else: vocab = None pretrained = not model_parameters and not pretrained_bert_parameters and not args.sentencepiece -bert, vocab = nlp.model.get_model(name=model_name, - dataset_name=dataset_name, - vocab=vocab, - pretrained=pretrained, - ctx=ctx, - use_pooler=False, - use_decoder=False, - use_classifier=False) +bert, vocab = nlp.model.get_model( + name=model_name, + dataset_name=dataset_name, + vocab=vocab, + pretrained=pretrained, + ctx=ctx, + use_pooler=False, + use_decoder=False, + use_classifier=False) if args.sentencepiece: - tokenizer = nlp.data.BERTSPTokenizer(args.sentencepiece, - vocab, - lower=lower) + tokenizer = nlp.data.BERTSPTokenizer(args.sentencepiece, vocab, lower=lower) else: tokenizer = nlp.data.BERTTokenizer(vocab=vocab, lower=lower) @@ -317,7 +293,8 @@ nlp.data.batchify.Stack(), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), - nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32'), + nlp.data.batchify.Stack('float32'), + nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32')) net = BertForQA(bert=bert) @@ -326,11 +303,8 @@ nlp.utils.load_parameters(net, model_parameters, ctx=ctx, cast_dtype=True) elif pretrained_bert_parameters: # only load BertModel parameters - nlp.utils.load_parameters(bert, - pretrained_bert_parameters, - ctx=ctx, - ignore_extra=True, - cast_dtype=True) + nlp.utils.load_parameters(bert, pretrained_bert_parameters, ctx=ctx, + ignore_extra=True, cast_dtype=True) net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx) elif pretrained: # only load BertModel parameters @@ -442,10 +416,9 @@ def set_new_lr(step_num, batch_id): log_num += len(inputs) total_num += len(inputs) - out = net( - inputs.astype('float32').as_in_context(ctx), - token_types.astype('float32').as_in_context(ctx), - valid_length.astype('float32').as_in_context(ctx)) + out = net(inputs.astype('float32').as_in_context(ctx), + token_types.astype('float32').as_in_context(ctx), + valid_length.astype('float32').as_in_context(ctx)) ls = loss_function(out, [ start_label.astype('float32').as_in_context(ctx), @@ -463,14 +436,13 @@ def set_new_lr(step_num, batch_id): step_loss += ls.asscalar() - if (batch_id + 1) % (log_interval * - (accumulate if accumulate else 1)) == 0: + if (batch_id + 1) % (log_interval * (accumulate if accumulate else 1)) == 0: toc = time.time() - log.info( - 'Epoch: {}, Batch: {}/{}, Loss={:.4f}, lr={:.7f} Time cost={:.1f} Thoughput={:.2f} samples/s' - .format(epoch_id, batch_id, len(train_dataloader), - step_loss / log_interval, trainer.learning_rate, - toc - tic, log_num / (toc - tic))) + log.info('Epoch: {}, Batch: {}/{}, Loss={:.4f}, lr={:.7f} ' + 'Time cost={:.1f} Thoughput={:.2f} samples/s' + .format(epoch_id, batch_id, len(train_dataloader), + step_loss / log_interval, + trainer.learning_rate, toc - tic, log_num/(toc - tic))) tic = time.time() step_loss = 0.0 log_num = 0 @@ -568,15 +540,12 @@ def evaluate(): all_predictions[example_qas_id] = prediction with io.open(os.path.join(output_dir, 'predictions.json'), - 'w', - encoding='utf-8') as fout: + 'w', encoding='utf-8') as fout: data = json.dumps(all_predictions, ensure_ascii=False) fout.write(data) if version_2: - log.info( - 'Please run evaluate-v2.0.py to get evaluation results for SQuAD 2.0' - ) + log.info('Please run evaluate-v2.0.py to get evaluation results for SQuAD 2.0') else: F1_EM = get_F1_EM(dev_data, all_predictions) log.info(F1_EM) From d34ff72c4c99c11e5d4a7018294cc5d9854cb73f Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 29 Jan 2020 21:05:42 +0800 Subject: [PATCH 59/59] fix pylint --- scripts/bert/finetune_classifier.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/bert/finetune_classifier.py b/scripts/bert/finetune_classifier.py index c267d37395..5d0e7f108c 100644 --- a/scripts/bert/finetune_classifier.py +++ b/scripts/bert/finetune_classifier.py @@ -1,8 +1,11 @@ """ Sentence Pair Classification with Bidirectional Encoder Representations from Transformers + ========================================================================================= + This example shows how to implement finetune a model with pre-trained BERT parameters for sentence pair classification, with Gluon NLP Toolkit. + @article{devlin2018bert, title={BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding}, author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, @@ -636,4 +639,4 @@ def evaluate(loader_dev, metric, segment): if __name__ == '__main__': - train(task.metrics) \ No newline at end of file + train(task.metrics)