From 168c155a068dc26f7fa3e5aea6b8d17af23627c4 Mon Sep 17 00:00:00 2001 From: Lane Schwartz Date: Thu, 4 May 2017 15:02:49 -0500 Subject: [PATCH] Updated tech report --- .gitignore | 2 + tech-report/Makefile | 15 +- tech-report/auto_generated/.gitignore | 1 + tech-report/report.tex | 284 ++++++++++++++++++++++++-- 4 files changed, 277 insertions(+), 25 deletions(-) create mode 100644 tech-report/auto_generated/.gitignore diff --git a/.gitignore b/.gitignore index bb7b7516..6d59b892 100644 --- a/.gitignore +++ b/.gitignore @@ -38,4 +38,6 @@ tech-report/*.toc tech-report/*.bbl tech-report/*.blg tech-report/*.out +tech-report/*.fdb_latexmk +tech-report/*.fls tech-report/auto/ diff --git a/tech-report/Makefile b/tech-report/Makefile index 920a77d9..ede36e3e 100644 --- a/tech-report/Makefile +++ b/tech-report/Makefile @@ -4,7 +4,7 @@ XDG_OPEN:=$(shell which xdg-open 2> /dev/null) MAC_OPEN:=$(shell which open 2> /dev/null) OPEN:=$(if $(XDG_OPEN),$(XDG_OPEN),$(MAC_OPEN)) -OKULAR:=$(strip $(shell qdbus | grep okular)) +OKULAR:=$(strip $(shell qdbus 2> /dev/null | grep okular)) show: report.pdf @@ -16,12 +16,15 @@ update: report.pdf all: clean show -%.pdf: $(wildcard *.tex) $(wildcard *.bib) - xelatex $* && bibtex $* && xelatex $* && xelatex $* +%.pdf: $(wildcard *.tex) $(wildcard *.bib) | auto_generated + xelatex --shell-escape $* && bibtex $* && xelatex --shell-escape $* && xelatex --shell-escape $* -quick: - xelatex report && ${OPEN} report.pdf +quick: | auto_generated + xelatex --shell-escape report && ${OPEN} report.pdf + +auto_generated: + mkdir auto_generated clean: - rm -rf auto *.aux *.bbl *.blg *.log *.out *~ *.pdf + rm -rf auto *.aux *.bbl *.blg *.log *.out *~ *.pdf auto_generated diff --git a/tech-report/auto_generated/.gitignore b/tech-report/auto_generated/.gitignore new file mode 100644 index 00000000..72e8ffc0 --- /dev/null +++ b/tech-report/auto_generated/.gitignore @@ -0,0 +1 @@ +* diff --git a/tech-report/report.tex b/tech-report/report.tex index 64872b32..ce13c94c 100644 --- a/tech-report/report.tex +++ b/tech-report/report.tex @@ -1,3 +1,6 @@ +% !TEX TS-program = xelatexmk +% !TEX encoding = UTF-8 Unicode +% % Compile this document using xelatex \documentclass{report} @@ -26,6 +29,19 @@ \usepackage{fontspec} \defaultfontfeatures{Mapping=tex-text} +% redefine max depth of itemize +\usepackage{enumitem} +\setlistdepth{10} +\renewlist{itemize}{itemize}{10} + +% graphviz +% +% NOTE: The directory auto_generated must already exist, +% or compiling this file is likely to fail. +% +\usepackage[forceshell,outputdir={auto_generated/}]{dot2texi} +\usepackage{tikz} +\usetikzlibrary{shapes,arrows,shadows,shadows.blur,positioning,fit} \title{Ducttape Technical Report} \author{Lane Schwartz \and Jonathan Clark} @@ -65,39 +81,83 @@ \section{PhantomMetaHyperDAG --- directed acyclic hypergraphs with epsilon and p \chapter{Building a workflow} -\section{Abstract syntax trees} +\section{Parsing a tape file} -\begin{enumerate} -\item Parse the ducttape text file(s) into an abstract syntax tree (AST) -\end{enumerate} +Parsing is initiated by the ducttape main method through a call to \texttt{ducttape.syntax.\-GrammarParser.readWorkflow()}. +% +This causes the ducttape text file(s) to be parsed, ultimately resulting in an abstract syntax tree (AST). +% +\texttt{GrammarParser.readWorkflow()} calls \texttt{GrammarParser.parseAll()} to perform the actual parsing. +% +The result of is a sequence of objects directly representing the elements in the tape file; +the type of each element is \texttt{ducttape.syntax.ASTType}. -\section{Branch points and branches} +This list of elements is used to construct a \texttt{ducttape.syntax.WorkflowDefinition} object. +% +The ducttape main method similarly reads and parses any config files specified by the user, resulting in other \texttt{WorkflowDefinition} object(s). +% +All \texttt{WorkflowDefinition} objects are concatenated together into a single object --- \texttt{val wd: WorkflowDefinition}. +% +This object is error-checked by \texttt{StaticChecker} and by \texttt{WorkflowChecker}. -\begin{enumerate} +Finally, the \texttt{WorkflowDefinition} is used to construct a \texttt{ducttape.workflow.\-builder.WorkflowBuilder}. +% +At construction time, the \texttt{ducttape.workflow.builder.WorkflowBuilder} simply creates a \texttt{BranchPointFactory}, a \texttt{BranchFactory}, and a \texttt{PhantomMetaHyperDagBuilder}. +% +Later, the \texttt{WorkflowBuilder.build()} method is responsible for converting the AST into a \texttt{HyperWorkflow}. -\item Traverse the AST to identify all branch points used in the workflow - (see \texttt{findBranchPoints()} in \texttt{WorkflowBuilder.build()}; also - see \texttt{BranchPoint} and \texttt{BranchPointFactory}) +%\begin{enumerate} +%\item +%\end{enumerate} -\item For each branch point, identify all branch names associated with that branch point - (see \texttt{Branch} and \texttt{BranchFactory}, especially \texttt{BranchFactory.getAll()}) -\end{enumerate} +\section{Branch points and branches} +\label{sec:branchpoints_branches} +The \texttt{WorkflowBuilder.build()} method begins by traversing the abstract syntax tree represented by the \texttt{WorkflowDefinition}, looking for branch points and branches. +% +\texttt{WorkflowBuilder} has member variables \texttt{branchPointFactory} and \texttt{branchFactory} that maintain maps from branch point names to \texttt{BranchPoint} objects and branch names to \texttt{Branch} objects, respectively. +% +This traversal is performed by the inner method \texttt{findBranchPoints}. +% +After \texttt{findBranchPoints} is run, \texttt{branchPointFactory} and \texttt{branchFactory} have been populated with all branch points and branches in the workflow. + +% +%\begin{enumerate} +% +%\item Traverse the AST to identify all branch points used in the workflow +% (see \texttt{findBranchPoints()} in \texttt{WorkflowBuilder.build()}; also +% see \texttt{BranchPoint} and \texttt{BranchPointFactory}) +% +%\item For each branch point, identify all branch names associated with that branch point +% (see \texttt{Branch} and \texttt{BranchFactory}, especially \texttt{BranchFactory.getAll()}) +% +%\end{enumerate} \section{Building task templates} +\label{sec:buildTaskTemplates} + +After identifying branch points and branches (\S\ref{sec:branchpoints_branches}), the \texttt{WorkflowBuilder.build()} method next constructs a \texttt{TaskTemplateBuilder} object from the \texttt{WorkflowDefinition}, the \texttt{BranchPointFactory}, and the \texttt{BranchFactory}. +% + + +\subsection{Find tasks} -\subsection{Found tasks} +After creating a \texttt{TaskTemplateBuilder} (\S\ref{sec:buildTaskTemplates}), the \texttt{WorkflowBuilder.build()} method calls \texttt{TaskTemplateBuilder.findTasks()}. +% +The \texttt{TaskTemplateBuilder.findTasks()} method is responsible for identifying temporal and structural dependencies among tasks and store those dependencies as an edge map; this method is also responsible for pre-resolving any non-temporal dependencies such as parameters. -See \texttt{TaskTemplateBuilder.findTasks()}, called from \texttt{WorkflowBuilder.build()} +%See \texttt{TaskTemplateBuilder.findTasks()}, called from \texttt{WorkflowBuilder.build()} + +The \texttt{TaskTemplateBuilder.findTasks()} method performs the following actions: \begin{enumerate} -\item Gather a list of all task definitions +\item The \texttt{findTasks()} method first gather a list of all task definitions present in the \texttt{WorkflowDefinition}. This includes regular task definitions as well as task definitions constructed by function calls. -\item By iterating over this list, construct a map where +\item By iterating over this list of task definitions, the \texttt{findTasks()} method next constructs a map where the keys are globally unique task names and - the values are task definitions + the values are task definitions --- \texttt{taskMap: Map[Namespace,TaskDef]}. \item For each task, iterate over all input specifications and parameter specifications for that task (see \texttt{TaskTemplateBuilder.findTasks()}) @@ -112,19 +172,205 @@ \subsection{Found tasks} \end{enumerate} +This results in a \texttt{TaskTemplateBuilder.parents} map, with task names as keys, and values as follows: + +\begin{description} +\item[\texttt{BranchPointTreeGrafts}] The subtree for each task + \begin{description} + \item[\texttt{Seq[Branch]}] The set of grafts for this task + \item[\texttt{BranchPointTree}] Maps the branch point name to its corresponding branches + \begin{description} + \item[\texttt{Baseline}] Each task has a \texttt{Baseline} branch point. + \item[\texttt{BranchTree}] The subtree of the branch point + \begin{description} + \item[\texttt{Baseline.baseline}] Each \texttt{Baseline} branch point has a \texttt{baseline} branch + \begin{description} + \item[\texttt{Children}] An \texttt{ArrayBuffer[BranchPointTreeGrafts]} denoting nested branch points + \item[\texttt{TerminalData}] Only leaves have \texttt{TerminalData}, one for each task dependency + \begin{description} + \item[\texttt{task}] The task these specs depend on + \item[\texttt{grafts}] Any grafts that were applied to these specs + \item[\texttt{isParam}] True if the dependency is literal, false if temporal + \item[\texttt{specs}] Used to resolve literals + \begin{description} + \item[\texttt{origSpec}] The variable name + \item[\texttt{srcTask}] The source task of the value + \item[\texttt{srcSpec}] The value assigned to the variable + \end{description} + \end{description} + \end{description} + \end{description} + \end{description} + \end{description} +\end{description} + + \section{Temporal and structural dependencies} \section{Constructing a packed hyperDAG} -See \texttt{WorkflowBuilder.build()} +See \texttt{WorkflowBuilder.build()} + +For illustrative purposes, the following section discusses output for the following minimal \texttt{.tape} file: + +\begin{verbatim} + global { + foo="bar" + } + + task parent1 :: z="zazz" {} + + task parent2 :: q=(Q: q1="qiikw" q2="qikmiq" q3=(Sky: blue red=\$z@parent1)) {} + + task hello :: a="abc" b=\$foo c=\$z@parent1 myq=\$q@parent2 > out { + echo "Hello" > \${out} + } + + task world < in=\$out@hello > out2 { + echo "World" > \${out2} + } + + plan { + reach world via (Q: *) * (Sky: *) + } +\end{verbatim} \begin{enumerate} + \item For each task template, create a vertex. Store these vertices in a map, where the keys are globally-unique task names. + We create vertices by calling \texttt{addVertex} on the \texttt{PhantomMetaHyperDagBuilder}, which in turn calls \texttt{addVertex} + on the \texttt{MetaHyperDagBuilder}, which calls \texttt{addVertex} on the \texttt{HyperDagBuilder}. \texttt{HyperDagBuilder.addVertex} + then creates a \texttt{PackedVertex} and adds it to the HyperDag. A \texttt{PackedVertex} is a generic vertex type and the parent of other + vertices such as phantom and epsilon vertices. A regular \texttt{PackedVertex} contains a numeric UID, a \texttt{TaskTemplate}, and an + \texttt{Option[String]} comment to be used for GraphViz, while a phantom vertex takes \texttt{None} instead of a \texttt{TaskTemplate}, and an + epsilon vertex takes \texttt{null}. + +\item Find Hyperedges that need to be created by calling \texttt{WorkflowBuilder.traverse} on each newly-created \texttt{PackedVertex}. \texttt{traverse} + is mutually recursive with \texttt{getHyperedges}. If \texttt{getHyperedges} encounters a nested branch point in the current node's + \texttt{BranchPointTreeGrafts}, it adds a phantom vertex to the \texttt{Dags} and \texttt{traverse}s that vertex. If it does not encounter a nested branch point, + \texttt{getHyperedges} examines the vertex's \texttt{TerminalData}. For each \texttt{TerminalData} with no temporal dependencies (such as a parameter), + \texttt{getHyperedges} adds a phantom vertex to the sequence of hyperedges. For each \texttt{TerminalData} with temporal dependencies (such as an input), + \texttt{getHyperedges} adds a normal vertex to the sequence of hyperedges. In either case, no new vertices are created in the \texttt{HyperDag}, the recursion + succeeds, and we are done \texttt{traverse}-ing. + +\item Once \texttt{getHyperedges} succeeds, the resulting sequence is passed to \texttt{PhantomMetaHyperDagBuilder.addMetaEdge}, along with the current + branch point and a sink epsilon vertex. Epsilon vertices pair tasks with their respective branch points. Our minimal example produces eight epsilon + vertices: a \texttt{Baseline:task} vertex for each task, \texttt{Q:task} vertices for \texttt{hello} and \texttt{parent2}, and \texttt{Sky:task} vertices + for \texttt{hello} and \texttt{parent2}. \texttt{PhantomMetaHyperDagBuilder.addMetaEdge} calls \texttt{MetaHyperDagBuilder.addMetaEdge}, which creates a new + epsilon vertex and \texttt{MetaEdge} in the \texttt{PhantomMetaHyperDag} and calls \texttt{HyperDagBuilder.addHyperEdge} for each hyperedge in the passed + sequence, which creates a new \texttt{HyperEdge} in the \texttt{HyperDag}. A \texttt{MetaEdge} consists of a sink epsilon vertex, the associated branch point, + and the sequence of hyperedges. A \texttt{HyperEdge} consists of a numeric UID, a branch, and an array of edge labels. These edge labels are isomorphic to the + branch's source vertices. + +\item Create a new \texttt{HyperWorkFlow} by calling \texttt{PhantomMetaHyperDagBuilder.build}, which calls \texttt{MetaHyperDagBuilder.build}, which adds a hyperedge + from each epsilon vertex to its corresponding task vertex or phantom nested branch point vertex and calls \texttt{HyperDagBuilder.build}, which constructs a new + \texttt{HyperDag}, shown in Figure \ref{hyperdag}. + +\begin{figure} +\begin{tikzpicture}[>=latex, scale=0.25, transform shape] +\begin{dot2tex}[dot,tikzedgelabels,codeonly] +digraph G { +"parent1:0" [fillcolor="white",style="filled"] +"Epsilon:Baseline:parent1:5" -> "parent1:0" [label="\n"] +"Phantom:parent2.Baseline.baseline.nestedBranch[]:15" [fillcolor="gray",style="filled"] +"Epsilon:Q:parent2:18" -> "Phantom:parent2.Baseline.baseline.nestedBranch[]:15" [label="\n"] +"Phantom:hello.Baseline.baseline.nestedBranch[]:9" [fillcolor="gray",style="filled"] +"Epsilon:Q:hello:12" -> "Phantom:hello.Baseline.baseline.nestedBranch[]:9" [label="\n"] +"parent2:1" [fillcolor="white",style="filled"] +"Epsilon:Baseline:parent2:19" -> "parent2:1" [label="\n"] +"Phantom:parent2.Q.q3.nestedBranch[]:16" [fillcolor="gray",style="filled"] +"Epsilon:Sky:parent2:17" -> "Phantom:parent2.Q.q3.nestedBranch[]:16" [label="\n"] +"hello:2" [fillcolor="white",style="filled"] +"Epsilon:Baseline:hello:13" -> "hello:2" [label="\n"] +"Epsilon:Sky:parent2:17" [fillcolor="yellow",style="filled"] +"Phantom:parent2.literals:14" -> "Epsilon:Sky:parent2:17" [label="Sky.blue\n[]\nq='blue'@"] +"Phantom:parent2.literals:14" -> "Epsilon:Sky:parent2:17" [label="Sky.red\n[]\nq='zazz'@parent1"] +"world:3" [fillcolor="white",style="filled"] +"Epsilon:Baseline:world:7" -> "world:3" [label="\n"] +"Epsilon:Q:parent2:18" [fillcolor="yellow",style="filled"] +"Phantom:parent2.literals:14" -> "Epsilon:Q:parent2:18" [label="Q.q1\n[]\nq='qiikw'@"] +"Phantom:parent2.literals:14" -> "Epsilon:Q:parent2:18" [label="Q.q2\n[]\nq='qikmiq'@"] +"Phantom:parent2.Q.q3.nestedBranch[]:16" -> "Epsilon:Q:parent2:18" [label="Q.q3\n[]\n"] +"Phantom:hello.Q.q3.nestedBranch[]:10" [fillcolor="gray",style="filled"] +"Epsilon:Sky:hello:11" -> "Phantom:hello.Q.q3.nestedBranch[]:10" [label="\n"] +"Phantom:parent1.literals:4" [fillcolor="gray",style="filled"] +"Epsilon:Sky:hello:11" [fillcolor="yellow",style="filled"] +"Phantom:hello.literals:8" -> "Epsilon:Sky:hello:11" [label="Sky.blue\n[]\nmyq='blue'@parent2"] +"Phantom:hello.literals:8" -> "Epsilon:Sky:hello:11" [label="Sky.red\n[]\nmyq='zazz'@parent1"] +"Epsilon:Q:hello:12" [fillcolor="yellow",style="filled"] +"Phantom:hello.literals:8" -> "Epsilon:Q:hello:12" [label="Q.q1\n[]\nmyq='qiikw'@parent2"] +"Phantom:hello.literals:8" -> "Epsilon:Q:hello:12" [label="Q.q2\n[]\nmyq='qikmiq'@parent2"] +"Phantom:hello.Q.q3.nestedBranch[]:10" -> "Epsilon:Q:hello:12" [label="Q.q3\n[]\n"] +"Epsilon:Baseline:parent2:19" [fillcolor="yellow",style="filled"] +"Phantom:parent2.Baseline.baseline.nestedBranch[]:15" -> "Epsilon:Baseline:parent2:19" [label="Baseline.baseline\n[]\n"] +"Epsilon:Baseline:hello:13" [fillcolor="yellow",style="filled"] +"Phantom:hello.Baseline.baseline.nestedBranch[]:9" -> "Epsilon:Baseline:hello:13" [label="Baseline.baseline\n[]\n"] +"Phantom:hello.literals:8" -> "Epsilon:Baseline:hello:13" [label="Baseline.baseline\n[]\na='abc'@\nb='bar'@"] +"Phantom:hello.literals:8" -> "Epsilon:Baseline:hello:13" [label="Baseline.baseline\n[]\nc='zazz'@parent1"] +"Epsilon:Baseline:parent1:5" [fillcolor="yellow",style="filled"] +"Phantom:parent1.literals:4" -> "Epsilon:Baseline:parent1:5" [label="Baseline.baseline\n[]\nz='zazz'@"] +"Phantom:world.literals:6" [fillcolor="gray",style="filled"] +"Epsilon:Baseline:world:7" [fillcolor="yellow",style="filled"] +"hello:2" -> "Epsilon:Baseline:world:7" [label="Baseline.baseline\n[]\nin=@hello"] +"Phantom:parent2.literals:14" [fillcolor="gray",style="filled"] +"Phantom:hello.literals:8" [fillcolor="gray",style="filled"] +} +\end{dot2tex} +\end{tikzpicture} +\caption{The resulting packed HyperDag} +\label{hyperdag} +\end{figure} + \end{enumerate} +\section{Unpacking the HyperDag} + +See \texttt{Plans.getPlannedVertices} + +\begin{enumerate} + +\item Map vertices to grafts (\texttt{Set[Branch]}) by building \texttt{immediateGrafts}. For each \texttt{SpecGroup} \textit{spec} + in each \texttt{HyperEdge} \textit{he} for each \texttt{PackedVertex} \textit{v} in the \texttt{HyperDag}, if \textit{spec}'s + grafts are not empty, add the grafts to \texttt{immediateGrafts[v]}. + +\item Recursively find dependencies by calling \texttt{visitDependencies} and building \texttt{graftRelaxations}, which is another + vertex->grafts map. In \texttt{visitDependencies}, if \textit{dep} does not exist in \texttt{graftRelaxations}, we add it as a key, + set its value to \texttt{grafts}, and call \texttt{visitDependencies} on each of its parents. If \textit{dep} does exist in + \texttt{graftRelaxations}, the recursion succeeds. + +\item For each realization plan in the workflow (or the union of all plans if unspecified), find candidates by calling \texttt{getCandidates}, + which calls \texttt{Hyperworkflow.unpackedWalker}. \texttt{unpackedWalker} builds a munger by initializing the following, in order: + + \begin{description} + + \item[\texttt{EdgeStateInitializer}] adds each edge's state to a holding buffer + + \item[texttt{BranchGraftMunger}] allows the unpacker to recognize and handle grafts + + \item[\texttt{GlobalBranchPointConstraint}] enforces global branch point consistency + + \item[\texttt{EdgeStateMerger}] merges the edge state's holding buffer into the hyperedgeState + + \item[\texttt{InPlanConstraint}] checks the final hyperedge state for plan membership + + \item[\texttt{PhantomMetaHyperDag.unpackedWalker}] creates a new \texttt{UnpackedPhantomMetaDagWalker}. \texttt{UnpackedPhantomMetaDagWalker} + creates a new \texttt{UnpackedMetaDagWalker}, which creates a new \texttt{mutable.HashMap[(PackedVertex[V],Seq[D]), UnpackedVertex[V,H,E,D]]} + of epsilons and--you guessed it--\texttt{UnpackedDagWalker}. \texttt{UnpackedDagWalker} unpacks the roots by adding a new \texttt{UnpackedVertex} + to the \texttt{agenda} for each. + + \end{description} + + This brings us up through Plans.scala, line 190. + +\item Filter candidate realizations by those allowed in the cross-product of realizations for the plan. + +\item Do backward dependency resolution starting at goals + +\end{enumerate} \bibliographystyle{plainnat} \bibliography{report} -\end{document} \ No newline at end of file +\end{document} +