Skip to content
This repository has been archived by the owner on Feb 3, 2021. It is now read-only.

Feature: on node user creation #303

Merged
merged 14 commits into from
Jan 27, 2018
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion aztk/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ def __init__(self, name: str = None, script: str = None, run_on=None):
self.run_on = run_on


class UserConfiguration:
def __init__(self, username: str, ssh_key: str = None, password: str = None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this get auto-populated from the users' secrets.yaml?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the CLI, yeah -- that happens here:
https://github.com/jafreck/aztk/blob/06f85c2538db98e168fed8c38fe9b88524f95e5f/cli/spark/endpoints/cluster/cluster_create.py#L87

For the SDK, no the secrets.yaml file is not used at all.

self.username = username
self.ssh_key = ssh_key
self.password = password


class ClusterConfiguration:
def __init__(
self,
Expand All @@ -33,7 +40,8 @@ def __init__(
vm_low_pri_count=None,
vm_size=None,
subnet_id=None,
docker_repo: str=None):
docker_repo: str = None,
user_configuration: UserConfiguration = None):

self.custom_scripts = custom_scripts
self.file_shares = file_shares
Expand All @@ -43,6 +51,7 @@ def __init__(
self.vm_low_pri_count = vm_low_pri_count
self.subnet_id = subnet_id
self.docker_repo = docker_repo
self.user_configuration = user_configuration


class RemoteLogin:
Expand Down
15 changes: 12 additions & 3 deletions aztk/spark/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from aztk.spark.helpers import job_submission as job_submit_helper
from aztk.spark.helpers import get_log as get_log_helper
from aztk.spark.utils import upload_node_scripts, util
import yaml


class Client(BaseClient):
Expand All @@ -21,10 +22,18 @@ def __init__(self, secrets_config):
'''
def create_cluster(self, cluster_conf: models.ClusterConfiguration, wait: bool = False):
try:
if cluster_conf.user_configuration:
user_conf = yaml.dump({'username': cluster_conf.user_configuration.username,
'password': cluster_conf.user_configuration.password,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may want to omit password until we support encryption.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we disable password here that means that we would no longer support adding a user with password at cluster creation time. That's a potentially breaking change, so maybe this should PR should wait for or include the encryption feature.

Copy link
Member Author

@jafreck jafreck Dec 24, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the goal is just to not have the password in cleartext in user.yaml, we could also hash it, and create the user using the hash.

That would break ssh'ing outside of AZTK, though.

'ssh-key': cluster_conf.user_configuration.ssh_key,
'cluster_id': cluster_conf.cluster_id})
else:
user_conf = None
zip_resource_files = upload_node_scripts.zip_scripts(self.blob_client,
cluster_conf.cluster_id,
cluster_conf.custom_scripts,
cluster_conf.spark_configuration)
cluster_conf.spark_configuration,
user_conf)

start_task = create_cluster_helper.generate_cluster_start_task(self,
zip_resource_files,
Expand Down Expand Up @@ -137,7 +146,7 @@ def get_application_status(self, cluster_id: str, app_name: str):
return task.state._value_
except batch_error.BatchErrorException as e:
raise error.AztkError(helpers.format_batch_exception(e))

'''
job submission
'''
Expand Down Expand Up @@ -180,7 +189,7 @@ def submit_job(self, job_configuration):
else:
raise error.AztkError("Jobs do not support both dedicated and low priority nodes." \
" JobConfiguration fields max_dedicated_nodes and max_low_pri_nodes are mutually exclusive values.")

job = self.__submit_job(
job_configuration=job_configuration,
start_task=start_task,
Expand Down
26 changes: 16 additions & 10 deletions aztk/spark/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ class FileShare(aztk.models.FileShare):
pass


class UserConfiguration(aztk.models.UserConfiguration):
pass


class ClusterConfiguration(aztk.models.ClusterConfiguration):
def __init__(
self,
Expand All @@ -72,17 +76,19 @@ def __init__(
vm_low_pri_count=None,
vm_size=None,
subnet_id=None,
docker_repo: str=None,
spark_configuration: SparkConfiguration = None):
docker_repo: str = None,
spark_configuration: SparkConfiguration = None,
user_configuration: UserConfiguration = None):
super().__init__(custom_scripts=custom_scripts,
cluster_id=cluster_id,
vm_count=vm_count,
vm_low_pri_count=vm_low_pri_count,
vm_size=vm_size,
docker_repo=docker_repo,
subnet_id=subnet_id,
file_shares=file_shares
)
cluster_id=cluster_id,
vm_count=vm_count,
vm_low_pri_count=vm_low_pri_count,
vm_size=vm_size,
docker_repo=docker_repo,
subnet_id=subnet_id,
file_shares=file_shares,
user_configuration=user_configuration
)
self.spark_configuration = spark_configuration
self.gpu_enabled = helpers.is_gpu_enabled(vm_size)

Expand Down
6 changes: 5 additions & 1 deletion aztk/spark/utils/upload_node_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def __add_str_to_zip(zipf, payload, zipf_file_path=None):
zipf.writestr(zipf_file_path, payload)
return zipf

def zip_scripts(blob_client, container_id, custom_scripts, spark_configuration):
def zip_scripts(blob_client, container_id, custom_scripts, spark_configuration, user_conf=None):
zipf = __create_zip()
if custom_scripts:
zipf = __add_custom_scripts(zipf, custom_scripts)
Expand All @@ -117,8 +117,12 @@ def zip_scripts(blob_client, container_id, custom_scripts, spark_configuration):
for jar in spark_configuration.jars:
zipf = __add_file_to_zip(zipf, jar, 'jars', binary=True)

if user_conf:
zipf = __add_str_to_zip(zipf, user_conf, 'user.yaml')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why _add_str_to_zip? Isn't this a file?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The user_conf is a byte string that is written to a file in node_scripts called user.yaml. user.yaml is not a file on the client.

__add_str_to_zip() is probably not the best name for what this method does, though.


# add helper file to node_scripts/submit/
zip_file_to_dir(file=os.path.join(constants.ROOT_PATH, 'aztk', 'utils', 'command_builder.py'), directory='', zipf=zipf, binary=False)

zipf.close()

return __upload(blob_client, container_id)
4 changes: 0 additions & 4 deletions cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,6 @@ def merge(self, uid, username, size, size_low_pri, vm_size, subnet_id, password,
raise aztk.error.AztkError(
"Please supply a value for wait in either the cluster.yaml configuration file or with a parameter (--wait or --no-wait)")

if self.username is not None and self.wait is False:
raise aztk.error.AztkError(
"You cannot create a user '{0}' if wait is set to false. By default, we create a user in the cluster.yaml file. Please either the configure your cluster.yaml file or set the parameter (--wait)".format(self.username))


class SshConfig:

Expand Down
40 changes: 19 additions & 21 deletions cli/spark/endpoints/cluster/cluster_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,6 @@ def execute(args: typing.NamedTuple):
password=args.password,
docker_repo=args.docker_repo)

print_cluster_conf(cluster_conf)

spinner = utils.Spinner()

log.info("Please wait while your cluster is being provisioned")
spinner.start()

if cluster_conf.custom_scripts:
custom_scripts = []
for custom_script in cluster_conf.custom_scripts:
Expand All @@ -86,6 +79,23 @@ def execute(args: typing.NamedTuple):
else:
file_shares = None

if cluster_conf.username:
ssh_key, password = utils.get_ssh_key_or_prompt(spark_client.secrets_config.ssh_pub_key,
cluster_conf.username,
cluster_conf.password,
spark_client.secrets_config)
user_conf = aztk.spark.models.UserConfiguration(
username=cluster_conf.username,
password=password,
ssh_key=ssh_key
)
else:
user_conf = None

print_cluster_conf(cluster_conf)
spinner = utils.Spinner()
spinner.start()

# create spark cluster
cluster = spark_client.create_cluster(
aztk.spark.models.ClusterConfiguration(
Expand All @@ -97,24 +107,12 @@ def execute(args: typing.NamedTuple):
custom_scripts=custom_scripts,
file_shares=file_shares,
docker_repo=cluster_conf.docker_repo,
spark_configuration=load_aztk_spark_config()
spark_configuration=load_aztk_spark_config(),
user_configuration=user_conf
),
wait=cluster_conf.wait
)

if cluster_conf.username:
ssh_key = spark_client.secrets_config.ssh_pub_key

ssh_key, password = utils.get_ssh_key_or_prompt(
ssh_key, cluster_conf.username, cluster_conf.password, spark_client.secrets_config)

spark_client.create_user(
cluster_id=cluster_conf.uid,
username=cluster_conf.username,
password=password,
ssh_key=ssh_key
)

spinner.stop()

if cluster_conf.wait:
Expand Down
7 changes: 3 additions & 4 deletions config/cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@ docker_repo: aztk/base:spark2.2.0
# - script: <./relative/path/to/other/script.sh or ./relative/path/to/other/script/directory/>
# runOn: <master/worker/all-nodes>

# To add your cluster to a virtual network provide the full arm resoruce id below
# subnet_id: /subscriptions/********-****-****-****-************/resourceGroups/********/providers/Microsoft.Network/virtualNetworks/*******/subnets/******

# wait: <true/false>
wait: true

# To add yout cluster to a virtual network provide the full arm resoruce id below
# subnet_id:
wait: false
33 changes: 33 additions & 0 deletions node_scripts/install/create_user.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os
import yaml
import azure.batch.models as batch_models
import azure.batch.models.batch_error as batch_error
from datetime import datetime, timezone, timedelta
'''
Creates a user if the user configuration file at $DOCKER_WORKING_DIR/user.yaml exists
'''

def create_user(batch_client):
path = os.path.join(os.environ['DOCKER_WORKING_DIR'], "user.yaml")

if not os.path.isfile(path):
print("No user to create.")
return

with open(path) as file:
user_conf = yaml.load(file.read())

try:
batch_client.compute_node.add_user(
pool_id=os.environ['AZ_BATCH_POOL_ID'],
node_id=os.environ['AZ_BATCH_NODE_ID'],
user=batch_models.ComputeNodeUser(
name=user_conf['username'],
is_admin=True,
password=user_conf['password'],
ssh_public_key=str(user_conf['ssh-key']),
expiry_time=datetime.now(timezone.utc) + timedelta(days=365)
)
)
except batch_error.BatchErrorException as e:
print(e)
4 changes: 3 additions & 1 deletion node_scripts/install/install.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import os
from core import config
from install import pick_master, spark, scripts
from install import pick_master, spark, scripts, create_user


def setup_node():
client = config.batch_client

create_user.create_user(batch_client=client)

spark.setup_conf()

is_master = pick_master.find_master(client)
Expand Down