Skip to content

Commit

Permalink
Merge pull request #239 from automl/development
Browse files Browse the repository at this point in the history
Prepare v1.1.6
  • Loading branch information
shukon authored Apr 10, 2019
2 parents e57a718 + 6418d90 commit e1cc671
Show file tree
Hide file tree
Showing 13 changed files with 276 additions and 116 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
*.swp
*.sh
data/
output/
results/
vespy/
_run_number.txt
Expand Down
21 changes: 8 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,20 +55,17 @@ We are currently working on the [documentation](https://automl.github.io/CAVE/st
You can analyze multiple folders (that are generated with the same scenario) for the analysis, simply provide the paths to all the individual results in `--folders`.

Commandline arguments:
- `--folders`: path(s) to folder(s) containing the SMAC3-output (works with
`output/run_*`)
- `--folders`: path(s) to folder(s) containing the configurator-output (works with `output/run_*`)

Optional:
- `--output`: where to save the CAVE-output
- `--file_format`: of results to be analyzed, choose from [SMAC3](https://github.com/automl/SMAC3), [SMAC2](https://www.cs.ubc.ca/labs/beta/Projects/SMAC), [CSV](https://automl.github.io/CAVE/stable/quickstart.html#csv) or [BOHB](https://github.com/automl/HpBandSter)
- `--validation_format`: of (optional) validation data (to enhance epm-quality where appropriate), choose from [SMAC3](https://github.com/automl/SMAC3), [SMAC2](https://www.cs.ubc.ca/labs/beta/Projects/SMAC), [CSV](https://automl.github.io/CAVE/stable/quickstart.html#csv) or NONE
- `--ta_exec_dir`: target algorithm execution directory, this should be a path to
the directory from which SMAC was run initially. used to find instance-files and
if necessary execute the `algo`-parameter of the SMAC-scenario (DEFAULT:
current working directory)
- `--ta_exec_dir`: target algorithm execution directories, this should be one or multiple path(s) to
the directories from which the configurator was run initially. not necessary for all configurators (BOHB doesn't need it). used to find instance-files and
if necessary execute the `algo`-parameter of the SMAC-scenario (DEFAULT: current working directory)
- `--parameter_importance`: calculating parameter importance is expensive, so you can
specify which plots you desire: `ablation`, `forward_selection`, `fanova`
and/or `lpi`.
specify which plots you desire: `ablation`, `forward_selection`, `fanova` and/or `lpi`.
either provide a combination of those or use `all` or `none`
- `--feature_analysis`: analysis features is expensive, so you can specify which
algorithm to run: `box_violin`, `clustering`, `importance` and/or `feature_cdf`.
Expand All @@ -79,8 +76,8 @@ Optional:
- `--no_parallel_coordinates`: toggles the parallel-coordinates plot
- `--no_configurator_footprint`: toggles the configurator-footprints
- `--no_algorithm_footprints`: toggles the algorithm-footprints
- `--cfp_time_slider`: how to display the over-time development of the configurator footprint, choose from `off` (which yields only the final interactive plot), `static` (which yields a number of `.png`s to click through), `online` (which generates a time-slider-widget - might be slow interaction on big data) and `prerender` (which also generates time-slider, but large file with low interaction time)
- `--cfp_number_quantiles`: if time-slider for configurator footprints is not `off`, determines the number of different quantiles to look at
- `--cfp_time_slider`: `on` will add a time-slider to the interactive configurator footprint which will result in longer loading times, `off` will generate static png's at the desired quantiles
- `--cfp_number_quantiles`: determines how many time-steps to prerender from in the configurator footprint

For further information on to use CAVE, see:
`python scripts/cave.py -h
Expand Down Expand Up @@ -111,6 +108,4 @@ and then you can use CAVE as usual, specifying the file_format as BOHB:
```
cave --folders examples/bohb --file_format BOHB --output CAVE_BOHB_results
```
There is an [example
jupyter-notebook](https://github.com/automl/BOHBsCAVE/blob/master/notebook_mlp_on_digits.ipynb) on how to use
CAVE with BOHB.
There is an [example jupyter-notebook](https://github.com/automl/HpBandSter/blob/add_docu/hpbandster/examples/Workflow.ipynb) on how to use CAVE with BOHB.
2 changes: 1 addition & 1 deletion cave/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.1.5"
__version__ = "1.1.6"
81 changes: 58 additions & 23 deletions cave/analyzer/budget_correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
from smac.scenario.scenario import Scenario

from bokeh.models import ColumnDataSource, CustomJS, Range1d
from bokeh.models.widgets import DataTable, TableColumn
from bokeh.models.widgets import DataTable, TableColumn, Select
from bokeh.embed import components
from bokeh.plotting import show, figure
from bokeh.io import output_notebook
from bokeh.layouts import column
from bokeh.layouts import column, row
from bokeh.transform import jitter

from cave.analyzer.base_analyzer import BaseAnalyzer
Expand Down Expand Up @@ -59,7 +59,7 @@ def _get_table(self, runs):
else:
table[-1].append("{:.2f} ({} samples)".format(rho, len(costs[0])))

budget_names = [os.path.basename(run.folder) for run in runs]
budget_names = [os.path.basename(run.folder).replace('_', ' ') for run in runs] # TODO
return DataFrame(data=table, columns=budget_names, index=budget_names)

def plot(self):
Expand All @@ -77,24 +77,30 @@ def _plot(self, runs):
"""
df = self._get_table(runs)
# Create CDS from pandas dataframe
columns = list(df.columns.values)
data = dict(df[columns])
budget_names = list(df.columns.values)
data = dict(df[budget_names])
data["Budget"] = df.index.tolist()
table_source = ColumnDataSource(data)
# Create bokeh-datatable
columns = [TableColumn(field='Budget', title="Budget", sortable=False, width=20)] + [
TableColumn(field=header, title=header, default_sort='descending', width=10) for header in columns
TableColumn(field=header, title=header, default_sort='descending', width=10) for header in budget_names
]
bokeh_table = DataTable(source=table_source, columns=columns, index_position=None, sortable=False,
height=20 + 30 * len(data["Budget"]))
height=20 + 30 * len(data["Budget"]))

# Create CDS for scatter-plot
all_configs = set([a for b in [run.original_runhistory.get_all_configs() for run in runs] for a in b])
data = {os.path.basename(run.folder) : [run.original_runhistory.get_cost(c) if c in
run.original_runhistory.get_all_configs() else
None for c in all_configs] for run in runs}
data = {os.path.basename(run.folder).replace('_', ' ') : [run.original_runhistory.get_cost(c) if c in # TODO
run.original_runhistory.get_all_configs() else
None for c in all_configs] for run in runs}
data['x'] = []
data['y'] = []
# Default scatter should be lowest vs highest:
for x, y in zip(data[budget_names[0]], data[budget_names[-1]]):
if x is not None and y is not None:
data['x'].append(x)
data['y'].append(y)


with warnings.catch_warnings(record=True) as list_of_warnings:
# Catch unmatching column lengths warning
Expand All @@ -113,15 +119,15 @@ def _plot(self, runs):
match_aspect=True,
y_range=Range1d(start=min_val, end=max_val, bounds=(min_val, max_val)),
x_range=Range1d(start=min_val, end=max_val, bounds=(min_val, max_val)),
x_axis_label='budget', y_axis_label='budget')
x_axis_label=budget_names[0], y_axis_label=budget_names[-1])
p.circle(x='x', y='y',
#x=jitter('x', 0.1), y=jitter('y', 0.1),
source=scatter_source, size=5, color="navy", alpha=0.5)

code = 'var budgets = ' + str(list(df.columns.values)) + ';'
code += 'console.log(budgets);'
code += """
try {
code_budgets = 'var budgets = ' + str(budget_names) + '; console.log(budgets);'

code_try = 'try {'
code_get_selected_cell = """
// This first part only extracts selected row and column!
var grid = document.getElementsByClassName('grid-canvas')[0].children;
var row = '';
Expand All @@ -138,7 +144,19 @@ def _plot(self, runs):
console.log('row', row, budgets[row]);
console.log('col', col, budgets[col]);
table_source.selected.indices = []; // Reset, so gets triggered again when clicked again
"""

code_selected = """
row = budgets.indexOf(select_x.value);
col = budgets.indexOf(select_y.value);
"""

code_update_selection_values = """
select_x.value = budgets[row];
select_y.value = budgets[col];
"""

code_update_plot = """
// This is the actual updating of the plot
if (row => 0 && col > 0) {
// Copy relevant arrays
Expand Down Expand Up @@ -170,25 +188,42 @@ def _plot(self, runs):
xr.end = max + padding;
yr.end = max + padding;
}
"""

code_catch = """
} catch(err) {
console.log(err.message);
}
"""

callback = CustomJS(args=dict(table_source=table_source,
scatter_source=scatter_source,
xaxis=p.xaxis[0], yaxis=p.yaxis[0],
xr=p.x_range, yr=p.y_range,
), code=code)
table_source.selected.js_on_change('indices', callback)

layout = column(bokeh_table, p)
code_selected = code_budgets + code_try + code_selected + code_update_plot + code_catch
select_x = Select(title="X-axis:", value=budget_names[0], options=budget_names)
select_y = Select(title="Y-axis:", value=budget_names[-1], options=budget_names)
callback_select = CustomJS(args=dict(scatter_source=scatter_source,
select_x=select_x, select_y=select_y,
xaxis=p.xaxis[0], yaxis=p.yaxis[0],
xr=p.x_range, yr=p.y_range,
), code=code_selected)
select_x.js_on_change('value', callback_select)
select_y.js_on_change('value', callback_select)

code_table_cell = code_budgets + code_try + code_get_selected_cell + code_update_selection_values + code_update_plot + code_catch
callback_table_cell = CustomJS(args=dict(table_source=table_source,
scatter_source=scatter_source,
select_x=select_x, select_y=select_y,
xaxis=p.xaxis[0], yaxis=p.yaxis[0],
xr=p.x_range, yr=p.y_range,
), code=code_table_cell)
table_source.selected.js_on_change('indices', callback_table_cell)

layout = column(bokeh_table, row(p, column(select_x, select_y)))
return layout

def get_html(self, d=None, tooltip=None):
script, div = components(self.plot())
if d is not None:
d["bokeh"] = script, div
d["tooltip"] = tooltip
return script, div

def get_jupyter(self):
Expand Down
Loading

0 comments on commit e1cc671

Please sign in to comment.