diff --git a/python/tvm/hago/quantize.py b/python/tvm/hago/quantize.py index e683b6f0ff63..48869ddecf0f 100644 --- a/python/tvm/hago/quantize.py +++ b/python/tvm/hago/quantize.py @@ -22,7 +22,7 @@ from .. import relay from .base import * from .hardware import * -from .topology import Topology +from .topology import Topology, analyze_topology import tvm from tvm.tir import expr @@ -670,4 +670,5 @@ def create_quantizer(graph, hardware, strategy): # check model hash model_hash = tvm.ir.structural_hash(graph) assert model_hash == strategy.model_hash - return Quantizer(graph, hardware, strategy.topology, strategy.bits, strategy.thresholds) + topology = analyze_topology(graph, hardware) + return Quantizer(graph, hardware, topology, strategy.bits, strategy.thresholds) diff --git a/python/tvm/hago/record.py b/python/tvm/hago/record.py index 0728a2034604..e123b90fb6bc 100644 --- a/python/tvm/hago/record.py +++ b/python/tvm/hago/record.py @@ -3,20 +3,21 @@ import json from json import JSONEncoder from enum import IntEnum -from .topology import Topology -HAGO_LOG_VERSION = 0.11 +HAGO_LOG_VERSION = 0.1 class Strategy(object): - def __init__(self, model_hash, topology, bits, thresholds): + def __init__(self, model_hash, bits, thresholds): self.model_hash = model_hash - self.topology = topology self.bits = bits - self.thresholds = thresholds + self.thresholds = [float(th) for th in thresholds] + + def to_json(self): + json_dict = self.__dict__.copy() + return json_dict def __str__(self): return 'Strategy(model_hash=' + str(self.model_hash) + \ - ', topology=' + str(self.topology) + \ ', bits=' + str(self.bits) + \ ', thresholds=' + str(self.thresholds) + ')' @@ -42,6 +43,10 @@ def __init__(self, accuracy=None, kl_distance=None): self.accuracy = accuracy self.kl_distance = kl_distance + def to_json(self): + json_dict = self.__dict__.copy() + return json_dict + def __str__(self): keys = self.__dict__.keys() pairs = [key + '=' + str(getattr(self, key)) for key in keys] @@ -53,6 +58,10 @@ def __init__(self, strategy, result): self.strategy = strategy self.result = result + def to_json(self): + json_dict = self.__dict__.copy() + return json_dict + def __str__(self): return 'Measure(version=' + str(self.version) + \ ', strategy=' + str(self.strategy) + \ @@ -72,36 +81,28 @@ def compare_key(m): def serialize(obj): class Encoder(JSONEncoder): def default(self, obj): - print('serialize: {}'.format(obj)) - if hasattr(obj, '__dict__'): - return obj.__dict__ + if hasattr(obj, 'to_json'): + return obj.to_json() return json.JSONEncoder.default(self, obj) - return json.dumps(str(obj), cls=Encoder) + return json.dumps(obj, cls=Encoder, sort_keys=True) def deserialize(json_str): - def decode_topology(obj): - node_conds = obj['node_conds'] - edge_conds = obj['edge_conds'] - return Topology(node_conds, edge_conds) - def decode_strategy(obj): model_hash = obj['model_hash'] - topology = decode_topology(obj['topology']) bits = obj['bits'] thresholds = obj['thresholds'] - return Strategy(model_hash, topology, bits, thresholds) + return Strategy(model_hash, bits, thresholds) def decode_result(obj): - sim_acc = obj['accuracy'] + accuracy = obj['accuracy'] kl_distance = obj['kl_distance'] return MeasureResult(accuracy, kl_distance) json_data = json.loads(json_str) - measure = {} - measure['strategy'] = decode_strategy(json_data['strategy']) - measure['result'] = decode_result(json_data['result']) - return measure + strategy = decode_strategy(json_data['strategy']) + result = decode_result(json_data['result']) + return Measure(strategy, result) def load_from_file(fname): @@ -117,7 +118,7 @@ def load_from_file(fname): def pick_best(fname, key): records = load_from_file(fname) records.sort(key=lambda rec: getattr(rec['result'], key)) - if key in ['sim_acc', 'quant_acc']: + if key in ['accuracy']: return records[-1] elif key in ['kl_divergence']: return records[0] diff --git a/python/tvm/hago/search.py b/python/tvm/hago/search.py index b67b118f7333..4a68ded17bcf 100644 --- a/python/tvm/hago/search.py +++ b/python/tvm/hago/search.py @@ -129,75 +129,7 @@ def generate_search_space(graph, hardware): # # make new guess # guess = neighbour(previous_guess, portion) # return best_guess, best_cost -# -# -# def greedy_squash(fcost, domains, args, tolerance=0.0, max_iter=3000): -# cfg = qtz.current_qconfig() -# best_guess, best_cost = None, float("inf") -# num_iter = 0 -# # init with maximum bit setting -# guess = [choices[0] for choices in domains] -# stop_guess = [choices[-1] for choices in domains] -# dim_idx = 0 -# last_update_idx = 0 -# while num_iter < max_iter: -# cost = fcost(guess, *args) -# if cost <= best_cost: -# # stored as best guess -# best_guess = guess -# best_cost = cost -# -# if (cost - best_cost) <= tolerance: -# previous_guess = guess -# previous_cost = cost -# last_update_idx = dim_idx -# else: -# # move to next dimension -# dim_idx += 1 -# -# if dim_idx - last_update_idx > len(domains): -# # early stopping -# break -# -# # make new guess -# guess = previous_guess.copy() -# while guess != stop_guess: -# dim = dim_idx % len(domains) -# if guess[dim] == min(domains[dim]): -# dim_idx += 1 -# else: -# break -# guess[dim] -= 1 -# print('niter: {}, acc: {}, best acc: {}'.format(num_iter, cost, best_cost)) -# num_iter += 1 -# return best_guess, best_cost -# -# -# def search_bits_strategy(eval_func, bit_choices, graph, hardware, topology, dataset): -# cfg = qtz.current_qconfig() -# -# args = (graph, hardware, topology, dataset) -# if cfg.search_strategy == 'random_search': -# best_bits, best_acc = random_search(eval_func, bit_choices, args) -# elif cfg.search_strategy == 'default_setting': -# best_bits = [choices[0] for choices in bit_choices] -# # sim acc: 71.1, qtz acc: 71.1, imagenet: 68.7 -# # best_bits = [6, 8, 24, 21, 24, 24, 8, 8, 21, 18, 21, 8, 7, 27, 23, 30, 32, 26, 8, 8, 22, 20, 22, 8, 8, 22, 24, 32, 32, 32, 8, 8, 32, 32, 8, 8, 32, 32, 32, 8, 8, 32, 32, 32, 32, 32, 8, 8, 32, 32, 32, 8, 8, 32, 32, 32, 32, 32, 8, 8, 32, 32, 8, 8, 32, 32, 32, 8, 8, 32, 32, 32, 32, 32, 8, 8, 32, 32, 32, 8, 8, 32, 32, 32, 32, 32, 8, 8, 32, 32, 8, 8, 32, 32, 32, 8, 8, 32, 32, 32, 32, 32, 8, 8, 32, 32, 32, 8, 8, 32, 32, 32, 32, 32] -# # sim acc: 71.9 qtz acc: 71.9, imagenet: 68.7 -# # best_bits = [6, 8, 24, 21, 24, 24, 8, 8, 21, 18, 21, 8, 7, 27, 23, 30, 32, 26, 8, 8, 22, 20, 22, 8, 8, 22, 19, 22, 21, 22, 8, 7, 21, 19, 8, 8, 23, 21, 23, 8, 8, 22, 20, 31, 22, 22, 8, 8, 21, 19, 20, 8, 8, 24, 21, 24, 23, 24, 8, 8, 17, 16, 8, 8, 22, 20, 22, 8, 8, 23, 20, 29, 23, 23, 8, 8, 19, 16, 18, 8, 8, 18, 16, 18, 16, 18, 8, 8, 13, 11, 8, 8, 30, 32, 32, 8, 8, 32, 32, 32, 32, 32, 8, 8, 32, 32, 32, 8, 8, 32, 32, 32, 32, 32] -# best_acc = eval_func(best_bits, *args) -# return best_bits, best_acc -# elif cfg.search_strategy == 'grid_search': -# best_bits, best_acc = grid_search(eval_func, bit_choices, args) -# elif cfg.search_strategy == 'simulated_annealing': -# best_bits, best_acc = simulated_annealing(eval_func, bit_choices, args) -# elif cfg.search_strategy == 'greedy_squash': -# best_bits, best_acc = greedy_squash(eval_func, bit_choices, args) -# else: -# raise ValueError('unknown search strategy: {}'.format(cfg.search_strategy)) -# -# return best_bits, best_acc -# +# # # def softmax_with_temperature(x, temp=1.0, axis=1): # e_x = np.exp((x - np.amax(x, axis=axis, keepdims=True)) / temp) @@ -215,54 +147,6 @@ def generate_search_space(graph, hardware): # kl = kl / num_samples # return kl # -# -# -# def old_search_quantize_strategy(mod, hardware, dataset=None): -# graph = mod['main'] -# fout = open(current_qconfig().log_file, 'w+', buffering=1) -# origin_out, origin_acc = eval_acc(graph, dataset) -# print('original acc: {}'.format(origin_acc)) -# topology = analyze_topology(graph, hardware) -# choices = generate_choices(graph, hardware, topology) -# # search_space = create_search_space(graph, topology, choices) -# model_hash = tvm.ir.structural_hash(graph) -# -# -# # search for bits settings with learning method -# def eval_func(bits, graph, hardware, topology, dataset): -# edge2bit = build_edge_dict(graph, bits, topology.edge_conds) -# print('bits') -# print_edge_dict(graph, edge2bit) -# # coarse-grained threshold estimate -# thresholds = threshold_estimate(graph, topology, bits, dataset) -# -# strategy = Strategy(model_hash, topology, bits, thresholds) -# quantizer = qtz.create_quantizer(graph, hardware, strategy) -# simulated_graph = quantizer.simulate() -# # print('simulated_graph') -# # print(simulated_graph) -# simulated_out, simulated_acc = eval_acc(simulated_graph, dataset) -# # [optional] calibrate threshold estimation -# quantized_graph = quantizer.quantize() -# quantized_out, quantized_acc = eval_acc(quantized_graph, dataset) -# -# kl_divergence = calculate_kl(origin_out, quantized_out) -# # logging -# print('simulated_acc: {}, quantized_acc: {}, kl_divergence: {}\n\n'.format(simulated_acc, quantized_acc, kl_divergence)) -# result = MeasureResult(sim_acc=simulated_acc, quant_acc=quantized_acc, kl_divergence=kl_divergence) -# measure = Measure(strategy, result) -# fout.write(serialize(measure)) -# fout.write('\n') -# return kl_divergence -# -# best_bits, best_acc = search_bits_strategy(eval_func, choices, graph, hardware, topology, dataset) -# print('finished search') -# print('best_acc: {0}'.format(best_acc)) -# best_thresholds = threshold_estimate(graph, topology, best_bits, dataset) -# best_strategy = Strategy(model_hash, topology, best_bits, best_thresholds) -# fout.close() -# return best_strategy, best_acc - def _accuracy_as_measure(func, dataset, outputs, ctx, target): # return a MeasureResult @@ -384,8 +268,7 @@ def _update_best_measure(self, measures): print(m) print('best_measure') print(self.best_measure) - updated = (self.best_measure == old_measure) - return updated, self.best_measure + return self.best_measure def _measure(self, bits_list): # support single sample measure and batched measure @@ -401,7 +284,7 @@ def _measure(self, bits_list): simulated_out = simulator.eval(bits, thresholds, self.dataset, self.ctx, self.target) measure_result = self.measure_func(self.graph, self.dataset, simulated_out, self.ctx, self.target) - strategy = Strategy(self.model_hash, self.topology, bits, thresholds) + strategy = Strategy(self.model_hash, bits, thresholds) results.append(Measure(strategy, measure_result)) return results else: @@ -459,7 +342,7 @@ def next_trials(self): return trials def update(self, measures): - updated, best_measure = self._update_best_measure(measures) + best_measure = self._update_best_measure(measures) self.bit_idx += 1 if measures[0].result.accuracy < best_measure.result.accuracy or \ self.bit_idx >= len(self.space[self.dim_idx]): @@ -498,7 +381,7 @@ def _measure(self, bits_list): out = runtime(**inputs) outputs.append(out) measure_result = self.measure_func(self.graph, self.dataset, outputs, self.ctx, self.target) - strategy = Strategy(self.model_hash, self.topology, bits, thresholds) + strategy = Strategy(self.model_hash, bits, thresholds) result = Measure(strategy, measure_result) print(result) return [result] @@ -520,7 +403,7 @@ def next_trials(self): return trials def update(self, measures): - updated, ms = self._update_best_measure(measures) + ms = self._update_best_measure(measures) best_bit = ms.strategy.bits[self.dim_idx] self.decided.append(best_bit) self.dim_idx += 1 diff --git a/tests/python/nightly/quantization/search_int16_activation.py b/tests/python/nightly/quantization/search_int16_activation.py index 5f117e5c294f..5dbdaa2dd829 100644 --- a/tests/python/nightly/quantization/search_int16_activation.py +++ b/tests/python/nightly/quantization/search_int16_activation.py @@ -24,6 +24,10 @@ ctx = tvm.context(target) # target = 'cuda' # ctx = tvm.gpu(3) +# best configuration for resnet18_v1 +""" +bits = [6, 7, 16, 14, 4, 16, 7, 7, 16, 14, 4, 8, 8, 16, 13, 15, 16, 4, 7, 8, 16, 12, 4, 8, 8, 16, 14, 16, 16, 4, 8, 8, 15, 14, 8, 8, 16, 14, 4, 6, 8, 15, 13, 16, 16, 4, 8, 8, 16, 15, 4, 8, 8, 16, 14, 16, 16, 4, 8, 8, 16, 15, 8, 8, 16, 14, 4, 8, 7, 13, 11, 15, 16, 4, 7, 8, 14, 12, 4, 8, 8, 11, 9, 16, 14, 4, 7, 8, 15, 14, 8, 8, 16, 11, 4, 7, 8, 14, 11, 12, 11, 4, 8, 8, 12, 9, 4, 7, 8, 12, 9, 6, 7, 5] +""" ##################### # Dataset prepartions @@ -65,9 +69,16 @@ def create_hardware(): act_dtype = "int16" hardware.add_op_desc("add", OpDesc(in_dtypes="float32", out_dtypes="float32")) hardware.add_op_desc("add", OpDesc(in_dtypes=act_dtype, out_dtypes=act_dtype)) + hardware.add_op_desc("concatenate", OpDesc(in_dtypes="float32", out_dtypes="float32")) + hardware.add_op_desc("concatenate", OpDesc(in_dtypes="int8", out_dtypes="int8")) + hardware.add_op_desc("concatenate", OpDesc(in_dtypes="int32", out_dtypes="int32")) hardware.add_op_desc("nn.conv2d", OpDesc(in_dtypes="int8", out_dtypes=act_dtype)) hardware.add_op_desc("nn.relu", OpDesc(in_dtypes=act_dtype, out_dtypes=act_dtype)) + hardware.add_op_desc("clip", OpDesc(in_dtypes=act_dtype, out_dtypes=act_dtype)) hardware.add_op_desc("nn.max_pool2d", OpDesc(in_dtypes=act_dtype, out_dtypes=act_dtype)) + hardware.add_op_desc("nn.dropout", OpDesc(in_dtypes="float32", out_dtypes="float32")) + hardware.add_op_desc("nn.avg_pool2d", OpDesc(in_dtypes="float32", out_dtypes="float32")) + hardware.add_op_desc("nn.global_avg_pool2d", OpDesc(in_dtypes="float32", out_dtypes="float32")) return hardware ############################################################################### @@ -85,7 +96,7 @@ def main(): # val_path = '/home/ubuntu/tensorflow_datasets/downloads/manual/imagenet2012/val.rec' val_path = '/home/ziheng/datasets1/imagenet/rec/val.rec' if args.run_all: - models = ['vgg16', 'densenet161'] + models = ['resnet18_v1', 'squeezenet1.1'] else: models = [args.model] for model_name in models: @@ -101,13 +112,13 @@ def main(): # Quantize calib_dataset = get_calibration_dataset(val_data, batch_fn, var_name='data') fp32_mod, params = get_model(model_name) + print(fp32_mod) qconfig = hago.qconfig(use_channel_quantize=is_per_channel, round_scale_to_pot=False, - log_file='search.log') + log_file='strategy_{}.log'.format(model_name)) hardware = create_hardware() - bits = [6, 7, 16, 14, 4, 16, 7, 7, 16, 14, 4, 8, 8, 16, 13, 15, 16, 4, 7, 8, 16, 12, 4, 8, 8, 16, 14, 16, 16, 4, 8, 8, 15, 14, 8, 8, 16, 14, 4, 6, 8, 15, 13, 16, 16, 4, 8, 8, 16, 15, 4, 8, 8, 16, 14, 16, 16, 4, 8, 8, 16, 15, 8, 8, 16, 14, 4, 8, 7, 13, 11, 15, 16, 4, 7, 8, 14, 12, 4, 8, 8, 11, 9, 16, 14, 4, 7, 8, 15, 14, 8, 8, 16, 11, 4, 7, 8, 14, 11, 12, 11, 4, 8, 8, 12, 9, 4, 7, 8, 12, 9, 6, 7, 5] - quantized_func = quantize_hago(fp32_mod, params, calib_dataset, qconfig, hardware, bits, target, ctx) + quantized_func = quantize_hago(fp32_mod, params, calib_dataset, qconfig, hardware, 'greedy', target, ctx) acc = eval_acc(quantized_func, val_data, batch_fn, args, var_name='data', target=target, ctx=ctx) channel_or_tensor = "per_channel" if is_per_channel else "per_tensor" print("quantized_accuracy", model_name, channel_or_tensor, acc, sep=',') diff --git a/tests/python/nightly/quantization/test_hago_mxnet.py b/tests/python/nightly/quantization/test_hago_mxnet.py index 5f15089839d9..f28aa8b0148f 100644 --- a/tests/python/nightly/quantization/test_hago_mxnet.py +++ b/tests/python/nightly/quantization/test_hago_mxnet.py @@ -67,9 +67,10 @@ def get_model(model_name): return mod, params def main(): - val_path = '/home/ubuntu/tensorflow_datasets/downloads/manual/imagenet2012/val.rec' + # val_path = '/home/ubuntu/tensorflow_datasets/downloads/manual/imagenet2012/val.rec' + val_path = '/home/ziheng/datasets1/imagenet/rec/val.rec' if args.run_all: - models = ['resnet50_v1', 'inceptionv3', 'mobilenetv2_1.0', 'mobilenet1.0', 'resnet18_v1', + models = ['squeezenet1.1', 'resnet50_v1', 'inceptionv3', 'mobilenetv2_1.0', 'mobilenet1.0', 'resnet18_v1', 'vgg16', 'densenet161'] else: models = [args.model]