diff --git a/docs/changelog/101700.yaml b/docs/changelog/101700.yaml
new file mode 100644
index 0000000000000..08671360688a7
--- /dev/null
+++ b/docs/changelog/101700.yaml
@@ -0,0 +1,5 @@
+pr: 101700
+summary: Fix `lastUnsafeSegmentGenerationForGets` for realtime get
+area: Engine
+type: bug
+issues: []
diff --git a/docs/changelog/101815.yaml b/docs/changelog/101815.yaml
new file mode 100644
index 0000000000000..511e23beb68ef
--- /dev/null
+++ b/docs/changelog/101815.yaml
@@ -0,0 +1,5 @@
+pr: 101815
+summary: Run `TransportGetAliasesAction` on local node
+area: Indices APIs
+type: enhancement
+issues: []
diff --git a/docs/changelog/98874.yaml b/docs/changelog/98874.yaml
new file mode 100644
index 0000000000000..e3eb7b5acc63f
--- /dev/null
+++ b/docs/changelog/98874.yaml
@@ -0,0 +1,5 @@
+pr: 98874
+summary: Estimate the memory required to deploy trained models more accurately
+area: Machine Learning
+type: enhancement
+issues: []
diff --git a/docs/plugins/development/creating-stable-plugins.asciidoc b/docs/plugins/development/creating-stable-plugins.asciidoc
index a8efc86c5beac..c9a8a1f6c7e2a 100644
--- a/docs/plugins/development/creating-stable-plugins.asciidoc
+++ b/docs/plugins/development/creating-stable-plugins.asciidoc
@@ -59,7 +59,7 @@ for the plugin. If you need other resources, package them into a resources JAR.
 [discrete]
 ==== Development process
 
-Elastic provides a Grade plugin, `elasticsearch.stable-esplugin`, that makes it
+Elastic provides a Gradle plugin, `elasticsearch.stable-esplugin`, that makes it
 easier to develop and package stable plugins. The steps in this section assume
 you use this plugin. However, you don't need Gradle to create plugins.
 
@@ -128,4 +128,4 @@ extend `ESClientYamlSuiteTestCase`.
 [[plugin-descriptor-file-stable]]
 ==== The plugin descriptor file for stable plugins
 
-include::plugin-descriptor-file.asciidoc[]
\ No newline at end of file
+include::plugin-descriptor-file.asciidoc[]
diff --git a/docs/reference/esql/index.asciidoc b/docs/reference/esql/index.asciidoc
index 2946f4e61d629..799f95751aa69 100644
--- a/docs/reference/esql/index.asciidoc
+++ b/docs/reference/esql/index.asciidoc
@@ -62,7 +62,7 @@ An overview of using the <<esql-rest>>, <<esql-kibana>>, and
 The current limitations of {esql}.
 
 <<esql-examples>>::
-A few examples of what you can with {esql}.
+A few examples of what you can do with {esql}.
 
 include::esql-get-started.asciidoc[]
 
diff --git a/docs/reference/esql/processing-commands/dissect.asciidoc b/docs/reference/esql/processing-commands/dissect.asciidoc
index eca10c201c968..eb7ab80d6174d 100644
--- a/docs/reference/esql/processing-commands/dissect.asciidoc
+++ b/docs/reference/esql/processing-commands/dissect.asciidoc
@@ -4,9 +4,9 @@
 
 **Syntax**
 
-[source,txt]
+[source,esql]
 ----
-DISSECT input "pattern" [ append_separator="<separator>"]
+DISSECT input "pattern" [ APPEND_SEPARATOR="<separator>"]
 ----
 
 *Parameters*
@@ -16,9 +16,9 @@ The column that contains the string you want to structure.  If the column has
 multiple values, `DISSECT` will process each value.
 
 `pattern`::
-A dissect pattern.
+A <<esql-dissect-patterns,dissect pattern>>.
 
-`append_separator="<separator>"`::
+`<separator>`::
 A string used as the separator between appended values, when using the <<esql-append-modifier,append modifier>>.
 
 *Description*
@@ -29,7 +29,7 @@ delimiter-based pattern, and extracts the specified keys as columns.
 
 Refer to <<esql-process-data-with-dissect>> for the syntax of dissect patterns.
 
-*Example*
+*Examples*
 
 // tag::examples[]
 The following example parses a string that contains a timestamp, some text, and
diff --git a/docs/reference/esql/processing-commands/drop.asciidoc b/docs/reference/esql/processing-commands/drop.asciidoc
index 50e3b27fb1b28..4787c5f137314 100644
--- a/docs/reference/esql/processing-commands/drop.asciidoc
+++ b/docs/reference/esql/processing-commands/drop.asciidoc
@@ -2,7 +2,23 @@
 [[esql-drop]]
 === `DROP`
 
-Use `DROP` to remove columns:
+**Syntax**
+
+[source,esql]
+----
+DROP columns
+----
+
+*Parameters*
+
+`columns`::
+A comma-separated list of columns to remove. Supports wildcards.
+
+*Description*
+
+The `DROP` processing command removes one or more columns.
+
+*Examples*
 
 [source,esql]
 ----
diff --git a/docs/reference/esql/processing-commands/enrich.asciidoc b/docs/reference/esql/processing-commands/enrich.asciidoc
index df402f3b1bd50..603683858b8c0 100644
--- a/docs/reference/esql/processing-commands/enrich.asciidoc
+++ b/docs/reference/esql/processing-commands/enrich.asciidoc
@@ -4,7 +4,7 @@
 
 **Syntax**
 
-[source,txt]
+[source,esql]
 ----
 ENRICH policy [ON match_field] [WITH [new_name1 = ]field1, [new_name2 = ]field2, ...]
 ----
@@ -15,18 +15,18 @@ ENRICH policy [ON match_field] [WITH [new_name1 = ]field1, [new_name2 = ]field2,
 The name of the enrich policy. You need to <<esql-set-up-enrich-policy,create>>
 and <<esql-execute-enrich-policy,execute>> the enrich policy first.
 
-`ON match_field`::
+`match_field`::
 The match field. `ENRICH` uses its value to look for records in the enrich
 index. If not specified, the match will be performed on the column with the same
 name as the `match_field` defined in the <<esql-enrich-policy,enrich policy>>.
 
-`WITH fieldX`::
+`fieldX`::
 The enrich fields from the enrich index that are added to the result as new
 columns. If a column with the same name as the enrich field already exists, the
 existing column will be replaced by the new column. If not specified, each of
 the enrich fields defined in the policy is added
 
-`new_nameX =`::
+`new_nameX`::
 Enables you to change the name of the column that's added for each of the enrich
 fields. Defaults to the enrich field name.
 
@@ -74,7 +74,7 @@ include::{esql-specs}/docs-IT_tests_only.csv-spec[tag=enrich_on-result]
 
 By default, each of the enrich fields defined in the policy is added as a
 column. To explicitly select the enrich fields that are added, use
-`WITH <field1>, <field2>...`:
+`WITH <field1>, <field2>, ...`:
 
 [source.merge.styled,esql]
 ----
diff --git a/docs/reference/esql/processing-commands/eval.asciidoc b/docs/reference/esql/processing-commands/eval.asciidoc
index a0a78f2a3bf97..eb69a587014ab 100644
--- a/docs/reference/esql/processing-commands/eval.asciidoc
+++ b/docs/reference/esql/processing-commands/eval.asciidoc
@@ -1,7 +1,30 @@
 [discrete]
 [[esql-eval]]
 === `EVAL`
-`EVAL` enables you to append new columns:
+
+**Syntax**
+
+[source,esql]
+----
+EVAL column1 = value1[, ..., columnN = valueN]
+----
+
+*Parameters*
+
+`columnX`::
+The column name.
+
+`valueX`::
+The value for the column. Can be a literal, an expression, or a
+<<esql-functions,function>>.
+
+*Description*
+
+The `EVAL` processing command enables you to append new columns with calculated
+values. `EVAL` supports various functions for calculating values. Refer to
+<<esql-functions,Functions>> for more information.
+
+*Examples*
 
 [source.merge.styled,esql]
 ----
@@ -23,8 +46,3 @@ include::{esql-specs}/docs.csv-spec[tag=evalReplace]
 |===
 include::{esql-specs}/docs.csv-spec[tag=evalReplace-result]
 |===
-
-[discrete]
-==== Functions
-`EVAL` supports various functions for calculating values. Refer to
-<<esql-functions,Functions>> for more information.
diff --git a/docs/reference/esql/processing-commands/grok.asciidoc b/docs/reference/esql/processing-commands/grok.asciidoc
index c95fe59f888ce..d5d58a9eaee12 100644
--- a/docs/reference/esql/processing-commands/grok.asciidoc
+++ b/docs/reference/esql/processing-commands/grok.asciidoc
@@ -4,7 +4,7 @@
 
 **Syntax**
 
-[source,txt]
+[source,esql]
 ----
 GROK input "pattern"
 ----
diff --git a/docs/reference/esql/processing-commands/keep.asciidoc b/docs/reference/esql/processing-commands/keep.asciidoc
index 3e54e5a7d1c5c..7515583b1bfd1 100644
--- a/docs/reference/esql/processing-commands/keep.asciidoc
+++ b/docs/reference/esql/processing-commands/keep.asciidoc
@@ -2,11 +2,25 @@
 [[esql-keep]]
 === `KEEP`
 
-The `KEEP` command enables you to specify what columns are returned and the
-order in which they are returned.
+**Syntax**
 
-To limit the columns that are returned, use a comma-separated list of column
-names. The columns are returned in the specified order:
+[source,esql]
+----
+KEEP columns
+----
+
+*Parameters*
+`columns`::
+A comma-separated list of columns to keep. Supports wildcards.
+
+*Description*
+
+The `KEEP` processing command enables you to specify what columns are returned
+and the order in which they are returned.
+
+*Examples*
+
+The columns are returned in the specified order:
 
 [source.merge.styled,esql]
 ----
@@ -27,7 +41,7 @@ include::{esql-specs}/docs.csv-spec[tag=keepWildcard]
 
 The asterisk wildcard (`*`) by itself translates to all columns that do not
 match the other arguments. This query will first return all columns with a name
-that starts with an h, followed by all other columns:
+that starts with `h`, followed by all other columns:
 
 [source,esql]
 ----
diff --git a/docs/reference/esql/processing-commands/limit.asciidoc b/docs/reference/esql/processing-commands/limit.asciidoc
index c02b534af59e1..457d5e9e65223 100644
--- a/docs/reference/esql/processing-commands/limit.asciidoc
+++ b/docs/reference/esql/processing-commands/limit.asciidoc
@@ -2,12 +2,30 @@
 [[esql-limit]]
 === `LIMIT`
 
-The `LIMIT` processing command enables you to limit the number of rows:
+**Syntax**
 
 [source,esql]
 ----
-include::{esql-specs}/docs.csv-spec[tag=limit]
+LIMIT max_number_of_rows
 ----
 
-If not specified, `LIMIT` defaults to `500`. A single query will not return
-more than 10,000 rows, regardless of the `LIMIT` value.
+*Parameters*
+
+`max_number_of_rows`::
+The maximum number of rows to return.
+
+*Description*
+
+The `LIMIT` processing command enables you to limit the number of rows that are
+returned. If not specified, `LIMIT` defaults to `500`.
+
+A query does not return more than 10,000 rows, regardless of the `LIMIT` value.
+You can change this with the `esql.query.result_truncation_max_size` static
+cluster setting.
+
+*Example*
+
+[source,esql]
+----
+include::{esql-specs}/docs.csv-spec[tag=limit]
+----
diff --git a/docs/reference/esql/processing-commands/mv_expand.asciidoc b/docs/reference/esql/processing-commands/mv_expand.asciidoc
index d62b28aabe440..46dc4fd0a33cf 100644
--- a/docs/reference/esql/processing-commands/mv_expand.asciidoc
+++ b/docs/reference/esql/processing-commands/mv_expand.asciidoc
@@ -2,7 +2,24 @@
 [[esql-mv_expand]]
 === `MV_EXPAND`
 
-The `MV_EXPAND` processing command expands multivalued fields into one row per value, duplicating other fields:
+**Syntax**
+
+[source,esql]
+----
+MV_EXPAND column
+----
+
+*Parameters*
+
+`column`::
+The multivalued column to expand.
+
+*Description*
+
+The `MV_EXPAND` processing command expands multivalued columns into one row per
+value, duplicating other columns.
+
+*Example*
 
 [source.merge.styled,esql]
 ----
diff --git a/docs/reference/esql/processing-commands/rename.asciidoc b/docs/reference/esql/processing-commands/rename.asciidoc
index 1dda424317976..773fe8b640f75 100644
--- a/docs/reference/esql/processing-commands/rename.asciidoc
+++ b/docs/reference/esql/processing-commands/rename.asciidoc
@@ -2,22 +2,33 @@
 [[esql-rename]]
 === `RENAME`
 
-Use `RENAME` to rename a column using the following syntax:
+**Syntax**
 
 [source,esql]
 ----
-RENAME <old-name> AS <new-name>
+RENAME old_name1 AS new_name1[, ..., old_nameN AS new_nameN]
 ----
 
-For example:
+*Parameters*
+
+`old_nameX`::
+The name of a column you want to rename.
+
+`new_nameX`::
+The new name of the column.
+
+*Description*
+
+The `RENAME` processing command renames one or more columns. If a column with
+the new name already exists, it will be replaced by the new column.
+
+*Examples*
 
 [source,esql]
 ----
 include::{esql-specs}/docs.csv-spec[tag=rename]
 ----
 
-If a column with the new name already exists, it will be replaced by the new
-column.
 
 Multiple columns can be renamed with a single `RENAME` command:
 
diff --git a/docs/reference/esql/processing-commands/sort.asciidoc b/docs/reference/esql/processing-commands/sort.asciidoc
index 76a9193375932..fea7bfaf0c65f 100644
--- a/docs/reference/esql/processing-commands/sort.asciidoc
+++ b/docs/reference/esql/processing-commands/sort.asciidoc
@@ -1,35 +1,59 @@
 [discrete]
 [[esql-sort]]
 === `SORT`
-Use the `SORT` command to sort rows on one or more fields:
+
+**Syntax**
+
+[source,esql]
+----
+SORT column1 [ASC/DESC][NULLS FIRST/NULLS LAST][, ..., columnN [ASC/DESC][NULLS FIRST/NULLS LAST]]
+----
+
+*Parameters*
+
+`columnX`::
+The column to sort on.
+
+*Description*
+
+The `SORT` processing command sorts a table on one or more columns.
+
+The default sort order is ascending. Use `ASC` or `DESC` to specify an explicit
+sort order.
+
+Two rows with the same sort key are considered equal. You can provide additional
+sort expressions to act as tie breakers.
+
+Sorting on multivalued columns uses the lowest value when sorting ascending and
+the highest value when sorting descending.
+
+By default, `null` values are treated as being larger than any other value. With
+an ascending sort order, `null` values are sorted last, and with a descending
+sort order, `null` values are sorted first. You can change that by providing
+`NULLS FIRST` or `NULLS LAST`.
+
+*Examples*
 
 [source,esql]
 ----
 include::{esql-specs}/docs.csv-spec[tag=sort]
 ----
 
-The default sort order is ascending. Set an explicit sort order using `ASC` or
-`DESC`:
+Explicitly sorting in ascending order with `ASC`:
 
 [source,esql]
 ----
 include::{esql-specs}/docs.csv-spec[tag=sortDesc]
 ----
 
-Two rows with the same sort key are considered equal. You can provide additional
-sort expressions to act as tie breakers:
+Providing additional sort expressions to act as tie breakers:
 
 [source,esql]
 ----
 include::{esql-specs}/docs.csv-spec[tag=sortTie]
 ----
 
-[discrete]
-==== `null` values
-By default, `null` values are treated as being larger than any other value. With
-an ascending sort order, `null` values are sorted last, and with a descending
-sort order, `null` values are sorted first. You can change that by providing
-`NULLS FIRST` or `NULLS LAST`:
+Sorting `null` values first using `NULLS FIRST`:
 
 [source,esql]
 ----
diff --git a/docs/reference/esql/processing-commands/stats.asciidoc b/docs/reference/esql/processing-commands/stats.asciidoc
index e0a9bbb52b03e..cbdb74d350fb1 100644
--- a/docs/reference/esql/processing-commands/stats.asciidoc
+++ b/docs/reference/esql/processing-commands/stats.asciidoc
@@ -1,8 +1,49 @@
 [discrete]
 [[esql-stats-by]]
 === `STATS ... BY`
-Use `STATS ... BY` to group rows according to a common value and calculate one
-or more aggregated values over the grouped rows.
+
+**Syntax**
+
+[source,esql]
+----
+STATS [column1 =] expression1[, ..., [columnN =] expressionN] [BY grouping_column1[, ..., grouping_columnN]]
+----
+
+*Parameters*
+
+`columnX`::
+The name by which the aggregated value is returned. If omitted, the name is
+equal to the corresponding expression (`expressionX`).
+
+`expressionX`::
+An expression that computes an aggregated value.
+
+`grouping_columnX`::
+The column containing the values to group by.
+
+*Description*
+
+The `STATS ... BY` processing command groups rows according to a common value
+and calculate one or more aggregated values over the grouped rows. If `BY` is
+omitted, the output table contains exactly one row with the aggregations applied
+over the entire dataset.
+
+The following aggregation functions are supported:
+
+include::../functions/aggregation-functions.asciidoc[tag=agg_list]
+
+NOTE: `STATS` without any groups is much much faster than adding a group.
+
+NOTE: Grouping on a single column is currently much more optimized than grouping
+      on many columns. In some tests we have seen grouping on a single `keyword`
+      column to be five times faster than grouping on two `keyword` columns. Do 
+      not try to work around this by combining the two columns together with 
+      something like <<esql-concat>> and then grouping - that is not going to be
+      faster.
+
+*Examples*
+
+Calculating a statistic and grouping by the values of another column:
 
 [source.merge.styled,esql]
 ----
@@ -13,8 +54,8 @@ include::{esql-specs}/docs.csv-spec[tag=stats]
 include::{esql-specs}/docs.csv-spec[tag=stats-result]
 |===
 
-If `BY` is omitted, the output table contains exactly one row with the
-aggregations applied over the entire dataset:
+Omitting `BY` returns one row with the aggregations applied over the entire
+dataset:
 
 [source.merge.styled,esql]
 ----
@@ -39,15 +80,3 @@ keyword family fields):
 ----
 include::{esql-specs}/docs.csv-spec[tag=statsGroupByMultipleValues]
 ----
-
-The following aggregation functions are supported:
-
-include::../functions/aggregation-functions.asciidoc[tag=agg_list]
-
-NOTE: `STATS` without any groups is much much faster than adding group.
-
-NOTE: Grouping on a single field is currently much more optimized than grouping
-      on many fields. In some tests we've seen grouping on a single `keyword`
-      field to be five times faster than grouping on two `keyword` fields. Don't
-      try to work around this combining the two fields together with something
-      like <<esql-concat>> and then grouping - that's not going to be faster.
diff --git a/docs/reference/esql/processing-commands/where.asciidoc b/docs/reference/esql/processing-commands/where.asciidoc
index 8dd55df12b9e7..e723a977bf99c 100644
--- a/docs/reference/esql/processing-commands/where.asciidoc
+++ b/docs/reference/esql/processing-commands/where.asciidoc
@@ -2,8 +2,27 @@
 [[esql-where]]
 === `WHERE`
 
-Use `WHERE` to produce a table that contains all the rows from the input table
-for which the provided condition evaluates to `true`:
+**Syntax**
+
+[source,esql]
+----
+WHERE expression
+----
+
+*Parameters*
+
+`expression`::
+A boolean expression.
+
+*Description*
+
+The `WHERE` processing command produces a table that contains all the rows from
+the input table for which the provided condition evaluates to `true`.
+
+`WHERE` supports various <<esql-functions,functions>> and
+<<esql-operators,operators>>.
+
+*Examples*
 
 [source,esql]
 ----
@@ -17,15 +36,7 @@ Which, if `still_hired` is a boolean field, can be simplified to:
 include::{esql-specs}/docs.csv-spec[tag=whereBoolean]
 ----
 
-[discrete]
-==== Operators
-
-Refer to <<esql-operators>> for an overview of the supported operators.
-
-[discrete]
-==== Functions
-`WHERE` supports various functions for calculating values. Refer to
-<<esql-functions,Functions>> for more information.
+Using a function:
 
 [source,esql]
 ----
diff --git a/docs/reference/esql/source-commands/from.asciidoc b/docs/reference/esql/source-commands/from.asciidoc
index 5718bfc27ac1c..6f54a42ddad35 100644
--- a/docs/reference/esql/source-commands/from.asciidoc
+++ b/docs/reference/esql/source-commands/from.asciidoc
@@ -2,10 +2,47 @@
 [[esql-from]]
 === `FROM`
 
-The `FROM` source command returns a table with up to 10,000 documents from a
-data stream, index, or alias. Each row in the resulting table represents a
-document. Each column corresponds to a field, and can be accessed by the name
-of that field.
+**Syntax**
+
+[source,esql]
+----
+FROM index_pattern [METADATA fields]
+----
+
+*Parameters*
+
+`index_pattern`::
+A list of indices, data streams or aliases. Supports wildcards and date math.
+
+`fields`::
+A comma-separated list of <<esql-metadata-fields,metadata fields>> to retrieve.
+
+*Description*
+
+The `FROM` source command returns a table with data from a data stream, index,
+or alias. Each row in the resulting table represents a document. Each column
+corresponds to a field, and can be accessed by the name of that field.
+
+[NOTE]
+====
+By default, an {esql} query without an explicit <<esql-limit>> uses an implicit
+limit of 500. This applies to `FROM` too. A `FROM` command without `LIMIT`:
+
+[source,esql]
+----
+FROM employees
+----
+
+is executed as:
+
+[source,esql]
+----
+FROM employees
+| LIMIT 500
+----
+====
+
+*Examples*
 
 [source,esql]
 ----
diff --git a/docs/reference/esql/source-commands/row.asciidoc b/docs/reference/esql/source-commands/row.asciidoc
index edfe5ecbf7cf3..adce844f365b8 100644
--- a/docs/reference/esql/source-commands/row.asciidoc
+++ b/docs/reference/esql/source-commands/row.asciidoc
@@ -2,9 +2,29 @@
 [[esql-row]]
 === `ROW`
 
+**Syntax**
+
+[source,esql]
+----
+ROW column1 = value1[, ..., columnN = valueN]
+----
+
+*Parameters*
+
+`columnX`::
+The column name.
+
+`valueX`::
+The value for the column. Can be a literal, an expression, or a
+<<esql-functions,function>>.
+
+*Description*
+
 The `ROW` source command produces a row with one or more columns with values
 that you specify. This can be useful for testing.
 
+*Examples*
+
 [source.merge.styled,esql]
 ----
 include::{esql-specs}/row.csv-spec[tag=example]
diff --git a/docs/reference/esql/source-commands/show.asciidoc b/docs/reference/esql/source-commands/show.asciidoc
index 956baf628e9f3..ea8c83ceb772a 100644
--- a/docs/reference/esql/source-commands/show.asciidoc
+++ b/docs/reference/esql/source-commands/show.asciidoc
@@ -1,10 +1,35 @@
 [discrete]
 [[esql-show]]
-=== `SHOW <item>`
+=== `SHOW`
 
-The `SHOW <item>` source command returns information about the deployment and
+**Syntax**
+
+[source,esql]
+----
+SHOW item
+----
+
+*Parameters*
+
+`item`::
+Can be `INFO` or `FUNCTIONS`.
+
+*Description*
+
+The `SHOW` source command returns information about the deployment and
 its capabilities:
 
 * Use `SHOW INFO` to return the deployment's version, build date and hash.
 * Use `SHOW FUNCTIONS` to return a list of all supported functions and a
 synopsis of each function.
+
+*Examples*
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/show.csv-spec[tag=showFunctionsFiltered]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/show.csv-spec[tag=showFunctionsFiltered-result]
+|===
diff --git a/docs/reference/search/search-your-data/ccs-version-compat-matrix.asciidoc b/docs/reference/search/search-your-data/ccs-version-compat-matrix.asciidoc
index cae5627d65b54..8c289c27a2d31 100644
--- a/docs/reference/search/search-your-data/ccs-version-compat-matrix.asciidoc
+++ b/docs/reference/search/search-your-data/ccs-version-compat-matrix.asciidoc
@@ -1,20 +1,21 @@
-[cols="^,^,^,^,^,^,^,^,^,^,^,^,^,^,^"]
+[cols="^,^,^,^,^,^,^,^,^,^,^,^,^,^,^,^"]
 |====
-| 14+^h| Remote cluster version
+| 15+^h| Remote cluster version
 h| Local cluster version
-            |  6.8        | 7.1–7.16   | 7.17       | 8.0        | 8.1        | 8.2        | 8.3       | 8.4       | 8.5       |8.6         |8.7         |8.8         |8.9         |8.10
-| 6.8       |  {yes-icon} | {yes-icon} | {yes-icon} | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon} | {no-icon} | {no-icon} | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}
-| 7.1–7.16  |  {yes-icon} | {yes-icon} | {yes-icon} | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon} | {no-icon} | {no-icon} | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}
-| 7.17      |  {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
-| 8.0       |  {no-icon}  | {no-icon}  | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
-| 8.1       |  {no-icon}  | {no-icon}  | {no-icon}  | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
-| 8.2       |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
-| 8.3       |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {yes-icon} | {yes-icon}|{yes-icon} | {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
-| 8.4       |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {yes-icon} |{yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
-| 8.5       |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  |{yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
-| 8.6       |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
-| 8.7       |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}| {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
-| 8.8       |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}| {no-icon} |  {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
-| 8.9       |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}| {no-icon} |  {no-icon} | {no-icon}  | {yes-icon} | {yes-icon} | {yes-icon}
-| 8.10      |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}| {no-icon} |  {no-icon} | {no-icon}  | {no-icon}  | {yes-icon} | {yes-icon}
+            |  6.8        | 7.1–7.16   | 7.17       | 8.0        | 8.1        | 8.2        | 8.3       | 8.4       | 8.5       |8.6         |8.7         |8.8         |8.9         |8.10        |8.11
+| 6.8       |  {yes-icon} | {yes-icon} | {yes-icon} | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon} | {no-icon} | {no-icon} | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}
+| 7.1–7.16  |  {yes-icon} | {yes-icon} | {yes-icon} | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon} | {no-icon} | {no-icon} | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}
+| 7.17      |  {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
+| 8.0       |  {no-icon}  | {no-icon}  | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
+| 8.1       |  {no-icon}  | {no-icon}  | {no-icon}  | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
+| 8.2       |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
+| 8.3       |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {yes-icon} | {yes-icon}|{yes-icon} | {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
+| 8.4       |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {yes-icon} |{yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
+| 8.5       |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  |{yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
+| 8.6       |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
+| 8.7       |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}| {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
+| 8.8       |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}| {no-icon} |  {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
+| 8.9       |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}| {no-icon} |  {no-icon} | {no-icon}  | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}
+| 8.10      |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}| {no-icon} |  {no-icon} | {no-icon}  | {no-icon}  | {yes-icon} | {yes-icon} | {yes-icon}
+| 8.11      |  {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}  | {no-icon}| {no-icon} |  {no-icon} | {no-icon}  | {no-icon}  | {no-icon}  | {yes-icon} | {yes-icon}
 |====
\ No newline at end of file
diff --git a/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/IndicesRecoveryRestCancellationIT.java b/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/IndicesRecoveryRestCancellationIT.java
deleted file mode 100644
index 55870bed5e851..0000000000000
--- a/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/IndicesRecoveryRestCancellationIT.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0 and the Server Side Public License, v 1; you may not use this file except
- * in compliance with, at your election, the Elastic License 2.0 or the Server
- * Side Public License, v 1.
- */
-
-package org.elasticsearch.http;
-
-import org.apache.http.client.methods.HttpGet;
-import org.elasticsearch.action.admin.indices.recovery.RecoveryAction;
-import org.elasticsearch.action.admin.indices.recovery.TransportRecoveryAction;
-import org.elasticsearch.action.admin.indices.recovery.TransportRecoveryActionHelper;
-import org.elasticsearch.action.support.PlainActionFuture;
-import org.elasticsearch.client.Cancellable;
-import org.elasticsearch.client.Request;
-import org.elasticsearch.client.Response;
-import org.elasticsearch.core.Releasable;
-import org.elasticsearch.core.Releasables;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.CancellationException;
-import java.util.concurrent.Semaphore;
-
-import static org.elasticsearch.action.support.ActionTestUtils.wrapAsRestResponseListener;
-import static org.elasticsearch.test.TaskAssertions.assertAllCancellableTasksAreCancelled;
-import static org.elasticsearch.test.TaskAssertions.assertAllTasksHaveFinished;
-import static org.elasticsearch.test.TaskAssertions.awaitTaskWithPrefix;
-import static org.hamcrest.Matchers.empty;
-import static org.hamcrest.Matchers.not;
-
-public class IndicesRecoveryRestCancellationIT extends HttpSmokeTestCase {
-
-    public void testIndicesRecoveryRestCancellation() throws Exception {
-        runTest(new Request(HttpGet.METHOD_NAME, "/_recovery"));
-    }
-
-    public void testCatRecoveryRestCancellation() throws Exception {
-        runTest(new Request(HttpGet.METHOD_NAME, "/_cat/recovery"));
-    }
-
-    private void runTest(Request request) throws Exception {
-
-        createIndex("test");
-        ensureGreen("test");
-
-        final List<Semaphore> operationBlocks = new ArrayList<>();
-        for (final TransportRecoveryAction transportRecoveryAction : internalCluster().getInstances(TransportRecoveryAction.class)) {
-            final Semaphore operationBlock = new Semaphore(1);
-            operationBlocks.add(operationBlock);
-            TransportRecoveryActionHelper.setOnShardOperation(transportRecoveryAction, () -> {
-                try {
-                    operationBlock.acquire();
-                } catch (InterruptedException e) {
-                    throw new AssertionError(e);
-                }
-                operationBlock.release();
-            });
-        }
-        assertThat(operationBlocks, not(empty()));
-
-        final List<Releasable> releasables = new ArrayList<>();
-        try {
-            for (final Semaphore operationBlock : operationBlocks) {
-                operationBlock.acquire();
-                releasables.add(operationBlock::release);
-            }
-
-            final PlainActionFuture<Response> future = new PlainActionFuture<>();
-            logger.info("--> sending request");
-            final Cancellable cancellable = getRestClient().performRequestAsync(request, wrapAsRestResponseListener(future));
-
-            awaitTaskWithPrefix(RecoveryAction.NAME);
-
-            logger.info("--> waiting for at least one task to hit a block");
-            assertBusy(() -> assertTrue(operationBlocks.stream().anyMatch(Semaphore::hasQueuedThreads)));
-
-            logger.info("--> cancelling request");
-            cancellable.cancel();
-            expectThrows(CancellationException.class, future::actionGet);
-
-            assertAllCancellableTasksAreCancelled(RecoveryAction.NAME);
-        } finally {
-            Releasables.close(releasables);
-        }
-
-        assertAllTasksHaveFinished(RecoveryAction.NAME);
-    }
-
-}
diff --git a/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/RestActionCancellationIT.java b/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/RestActionCancellationIT.java
new file mode 100644
index 0000000000000..d46868094907d
--- /dev/null
+++ b/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/RestActionCancellationIT.java
@@ -0,0 +1,144 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.http;
+
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpPost;
+import org.elasticsearch.action.admin.cluster.health.ClusterHealthAction;
+import org.elasticsearch.action.admin.cluster.state.ClusterStateAction;
+import org.elasticsearch.action.admin.indices.alias.get.GetAliasesAction;
+import org.elasticsearch.action.admin.indices.recovery.RecoveryAction;
+import org.elasticsearch.action.support.CancellableActionTestPlugin;
+import org.elasticsearch.action.support.PlainActionFuture;
+import org.elasticsearch.action.support.RefCountingListener;
+import org.elasticsearch.action.support.SubscribableListener;
+import org.elasticsearch.client.Request;
+import org.elasticsearch.client.Response;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.util.CollectionUtils;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.test.rest.ObjectPath;
+
+import java.util.Collection;
+import java.util.concurrent.CancellationException;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+
+import static org.elasticsearch.action.support.ActionTestUtils.wrapAsRestResponseListener;
+import static org.elasticsearch.test.TaskAssertions.assertAllTasksHaveFinished;
+import static org.hamcrest.Matchers.greaterThan;
+
+public class RestActionCancellationIT extends HttpSmokeTestCase {
+
+    public void testIndicesRecoveryRestCancellation() {
+        createIndex("test");
+        ensureGreen("test");
+        runRestActionCancellationTest(new Request(HttpGet.METHOD_NAME, "/_recovery"), RecoveryAction.NAME);
+    }
+
+    public void testCatRecoveryRestCancellation() {
+        createIndex("test");
+        ensureGreen("test");
+        runRestActionCancellationTest(new Request(HttpGet.METHOD_NAME, "/_cat/recovery"), RecoveryAction.NAME);
+    }
+
+    public void testClusterHealthRestCancellation() {
+        runRestActionCancellationTest(new Request(HttpGet.METHOD_NAME, "/_cluster/health"), ClusterHealthAction.NAME);
+    }
+
+    public void testClusterStateRestCancellation() {
+        runRestActionCancellationTest(new Request(HttpGet.METHOD_NAME, "/_cluster/state"), ClusterStateAction.NAME);
+    }
+
+    public void testGetAliasesCancellation() {
+        runRestActionCancellationTest(new Request(HttpGet.METHOD_NAME, "/_alias"), GetAliasesAction.NAME);
+    }
+
+    public void testCatAliasesCancellation() {
+        runRestActionCancellationTest(new Request(HttpGet.METHOD_NAME, "/_cat/aliases"), GetAliasesAction.NAME);
+    }
+
+    private void runRestActionCancellationTest(Request request, String actionName) {
+        final var node = usually() ? internalCluster().getRandomNodeName() : internalCluster().startCoordinatingOnlyNode(Settings.EMPTY);
+
+        try (
+            var restClient = createRestClient(node);
+            var capturingAction = CancellableActionTestPlugin.capturingActionOnNode(actionName, node)
+        ) {
+            final var responseFuture = new PlainActionFuture<Response>();
+            final var restInvocation = restClient.performRequestAsync(request, wrapAsRestResponseListener(responseFuture));
+
+            if (randomBoolean()) {
+                // cancel by aborting the REST request
+                capturingAction.captureAndCancel(restInvocation::cancel);
+                expectThrows(ExecutionException.class, CancellationException.class, () -> responseFuture.get(10, TimeUnit.SECONDS));
+            } else {
+                // cancel via the task management API
+                final var cancelFuture = new PlainActionFuture<Void>();
+                capturingAction.captureAndCancel(
+                    () -> SubscribableListener
+
+                        .<ObjectPath>newForked(
+                            l -> restClient.performRequestAsync(
+                                getListTasksRequest(node, actionName),
+                                wrapAsRestResponseListener(l.map(ObjectPath::createFromResponse))
+                            )
+                        )
+
+                        .<Void>andThen((l, listTasksResponse) -> {
+                            final var taskCount = listTasksResponse.evaluateArraySize("tasks");
+                            assertThat(taskCount, greaterThan(0));
+                            try (var listeners = new RefCountingListener(l)) {
+                                for (int i = 0; i < taskCount; i++) {
+                                    final var taskPrefix = "tasks." + i + ".";
+                                    assertTrue(listTasksResponse.evaluate(taskPrefix + "cancellable"));
+                                    assertFalse(listTasksResponse.evaluate(taskPrefix + "cancelled"));
+                                    restClient.performRequestAsync(
+                                        getCancelTaskRequest(
+                                            listTasksResponse.evaluate(taskPrefix + "node"),
+                                            listTasksResponse.evaluate(taskPrefix + "id")
+                                        ),
+                                        wrapAsRestResponseListener(listeners.acquire(HttpSmokeTestCase::assertOK))
+                                    );
+                                }
+                            }
+                        })
+
+                        .addListener(cancelFuture)
+                );
+                cancelFuture.get(10, TimeUnit.SECONDS);
+                expectThrows(Exception.class, () -> responseFuture.get(10, TimeUnit.SECONDS));
+            }
+
+            assertAllTasksHaveFinished(actionName);
+        } catch (Exception e) {
+            fail(e);
+        }
+    }
+
+    private static Request getListTasksRequest(String taskNode, String actionName) {
+        final var listTasksRequest = new Request(HttpGet.METHOD_NAME, "/_tasks");
+        listTasksRequest.addParameter("nodes", taskNode);
+        listTasksRequest.addParameter("actions", actionName);
+        listTasksRequest.addParameter("group_by", "none");
+        return listTasksRequest;
+    }
+
+    private static Request getCancelTaskRequest(String taskNode, int taskId) {
+        final var cancelTaskRequest = new Request(HttpPost.METHOD_NAME, Strings.format("/_tasks/%s:%d/_cancel", taskNode, taskId));
+        cancelTaskRequest.addParameter("wait_for_completion", null);
+        return cancelTaskRequest;
+    }
+
+    @Override
+    protected Collection<Class<? extends Plugin>> nodePlugins() {
+        return CollectionUtils.appendToCopy(super.nodePlugins(), CancellableActionTestPlugin.class);
+    }
+}
diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cat.aliases/10_basic.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cat.aliases/10_basic.yml
index 49f5958ad8da1..96998a2a6218e 100644
--- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cat.aliases/10_basic.yml
+++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cat.aliases/10_basic.yml
@@ -496,3 +496,16 @@
                 test_alias \s+ test_index\n
                 my_alias \s+ test_index\n
             $/
+
+---
+"Deprecated local parameter":
+  - skip:
+      version: "- 8.11.99"
+      features: ["warnings"]
+      reason: verifying deprecation warnings from 8.12.0 onwards
+
+  - do:
+      cat.aliases:
+        local: true
+      warnings:
+        - "the [?local=true] query parameter to cat-aliases requests has no effect and will be removed in a future version"
diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.exists_alias/10_basic.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.exists_alias/10_basic.yml
index fba0512ca372f..bf499de8463bd 100644
--- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.exists_alias/10_basic.yml
+++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.exists_alias/10_basic.yml
@@ -37,9 +37,14 @@
 
 ---
 "Test indices.exists_alias with local flag":
+  - skip:
+      features: ["allowed_warnings"]
+
   - do:
       indices.exists_alias:
         name: test_alias
         local: true
+      allowed_warnings:
+        - "the [?local=true] query parameter to get-aliases requests has no effect and will be removed in a future version"
 
   - is_false: ''
diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.get_alias/10_basic.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.get_alias/10_basic.yml
index 721c7bc709032..d765decda68a8 100644
--- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.get_alias/10_basic.yml
+++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.get_alias/10_basic.yml
@@ -291,10 +291,14 @@ setup:
 
 ---
 "Get alias with local flag":
+  - skip:
+      features: ["allowed_warnings"]
 
   - do:
       indices.get_alias:
         local: true
+      allowed_warnings:
+        - "the [?local=true] query parameter to get-aliases requests has no effect and will be removed in a future version"
 
   - is_true: test_index
 
@@ -325,3 +329,17 @@ setup:
 
   - is_true: test_index
   - is_false: test_index_2
+
+
+---
+"Deprecated local parameter":
+  - skip:
+      version: "- 8.11.99"
+      features: ["warnings"]
+      reason: verifying deprecation warnings from 8.12.0 onwards
+
+  - do:
+      indices.get_alias:
+        local: true
+      warnings:
+        - "the [?local=true] query parameter to get-aliases requests has no effect and will be removed in a future version"
diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/alias/get/GetAliasesAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/alias/get/GetAliasesAction.java
index c05b19043e88b..a04c7c2c2af60 100644
--- a/server/src/main/java/org/elasticsearch/action/admin/indices/alias/get/GetAliasesAction.java
+++ b/server/src/main/java/org/elasticsearch/action/admin/indices/alias/get/GetAliasesAction.java
@@ -9,6 +9,7 @@
 package org.elasticsearch.action.admin.indices.alias.get;
 
 import org.elasticsearch.action.ActionType;
+import org.elasticsearch.common.io.stream.Writeable;
 
 public class GetAliasesAction extends ActionType<GetAliasesResponse> {
 
@@ -16,6 +17,6 @@ public class GetAliasesAction extends ActionType<GetAliasesResponse> {
     public static final String NAME = "indices:admin/aliases/get";
 
     private GetAliasesAction() {
-        super(NAME, GetAliasesResponse::new);
+        super(NAME, Writeable.Reader.localOnly());
     }
 }
diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/alias/get/GetAliasesRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/alias/get/GetAliasesRequest.java
index d801b441fecea..ee6797ca58fb9 100644
--- a/server/src/main/java/org/elasticsearch/action/admin/indices/alias/get/GetAliasesRequest.java
+++ b/server/src/main/java/org/elasticsearch/action/admin/indices/alias/get/GetAliasesRequest.java
@@ -10,12 +10,17 @@
 import org.elasticsearch.action.ActionRequestValidationException;
 import org.elasticsearch.action.AliasesRequest;
 import org.elasticsearch.action.support.IndicesOptions;
+import org.elasticsearch.action.support.TransportAction;
 import org.elasticsearch.action.support.master.MasterNodeReadRequest;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.tasks.CancellableTask;
+import org.elasticsearch.tasks.Task;
+import org.elasticsearch.tasks.TaskId;
 
 import java.io.IOException;
+import java.util.Map;
 
 public class GetAliasesRequest extends MasterNodeReadRequest<GetAliasesRequest> implements AliasesRequest {
 
@@ -33,6 +38,11 @@ public GetAliasesRequest(String... aliases) {
 
     public GetAliasesRequest() {}
 
+    /**
+     * NB prior to 8.12 get-aliases was a TransportMasterNodeReadAction so for BwC we must remain able to read these requests until we no
+     * longer need to support {@link org.elasticsearch.TransportVersions#CLUSTER_FEATURES_ADDED} and earlier. Once we remove this we can
+     * also make this class a regular ActionRequest instead of a MasterNodeReadRequest.
+     */
     public GetAliasesRequest(StreamInput in) throws IOException {
         super(in);
         indices = in.readStringArray();
@@ -43,11 +53,7 @@ public GetAliasesRequest(StreamInput in) throws IOException {
 
     @Override
     public void writeTo(StreamOutput out) throws IOException {
-        super.writeTo(out);
-        out.writeStringArray(indices);
-        out.writeStringArray(aliases);
-        indicesOptions.writeIndicesOptions(out);
-        out.writeStringArray(originalAliases);
+        TransportAction.localOnly();
     }
 
     @Override
@@ -108,4 +114,9 @@ public ActionRequestValidationException validate() {
     public boolean includeDataStreams() {
         return true;
     }
+
+    @Override
+    public Task createTask(long id, String type, String action, TaskId parentTaskId, Map<String, String> headers) {
+        return new CancellableTask(id, type, action, "", parentTaskId, headers);
+    }
 }
diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/alias/get/GetAliasesResponse.java b/server/src/main/java/org/elasticsearch/action/admin/indices/alias/get/GetAliasesResponse.java
index 91c6f49101e85..c0e26b16585c4 100644
--- a/server/src/main/java/org/elasticsearch/action/admin/indices/alias/get/GetAliasesResponse.java
+++ b/server/src/main/java/org/elasticsearch/action/admin/indices/alias/get/GetAliasesResponse.java
@@ -11,7 +11,6 @@
 import org.elasticsearch.action.ActionResponse;
 import org.elasticsearch.cluster.metadata.AliasMetadata;
 import org.elasticsearch.cluster.metadata.DataStreamAlias;
-import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 
 import java.io.IOException;
@@ -29,12 +28,6 @@ public GetAliasesResponse(Map<String, List<AliasMetadata>> aliases, Map<String,
         this.dataStreamAliases = dataStreamAliases;
     }
 
-    public GetAliasesResponse(StreamInput in) throws IOException {
-        super(in);
-        aliases = in.readImmutableOpenMap(StreamInput::readString, i -> i.readCollectionAsList(AliasMetadata::new));
-        dataStreamAliases = in.readMap(in1 -> in1.readCollectionAsList(DataStreamAlias::new));
-    }
-
     public Map<String, List<AliasMetadata>> getAliases() {
         return aliases;
     }
@@ -43,6 +36,10 @@ public Map<String, List<DataStreamAlias>> getDataStreamAliases() {
         return dataStreamAliases;
     }
 
+    /**
+     * NB prior to 8.12 get-aliases was a TransportMasterNodeReadAction so for BwC we must remain able to write these responses until we no
+     * longer need to support {@link org.elasticsearch.TransportVersions#CLUSTER_FEATURES_ADDED} and earlier.
+     */
     @Override
     public void writeTo(StreamOutput out) throws IOException {
         out.writeMap(aliases, StreamOutput::writeCollection);
diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/alias/get/TransportGetAliasesAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/alias/get/TransportGetAliasesAction.java
index 456b2cc7b899f..e43d1a825c233 100644
--- a/server/src/main/java/org/elasticsearch/action/admin/indices/alias/get/TransportGetAliasesAction.java
+++ b/server/src/main/java/org/elasticsearch/action/admin/indices/alias/get/TransportGetAliasesAction.java
@@ -9,7 +9,7 @@
 
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.support.ActionFilters;
-import org.elasticsearch.action.support.master.TransportMasterNodeReadAction;
+import org.elasticsearch.action.support.TransportLocalClusterStateAction;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.block.ClusterBlockException;
 import org.elasticsearch.cluster.block.ClusterBlockLevel;
@@ -26,6 +26,7 @@
 import org.elasticsearch.common.util.concurrent.ThreadContext;
 import org.elasticsearch.indices.SystemIndices;
 import org.elasticsearch.indices.SystemIndices.SystemIndexAccessLevel;
+import org.elasticsearch.tasks.CancellableTask;
 import org.elasticsearch.tasks.Task;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.transport.TransportService;
@@ -38,32 +39,37 @@
 import java.util.Map;
 import java.util.function.Predicate;
 
-public class TransportGetAliasesAction extends TransportMasterNodeReadAction<GetAliasesRequest, GetAliasesResponse> {
+/**
+ * NB prior to 8.12 this was a TransportMasterNodeReadAction so for BwC it must be registered with the TransportService (i.e. a
+ * HandledTransportAction) until we no longer need to support {@link org.elasticsearch.TransportVersions#CLUSTER_FEATURES_ADDED} and
+ * earlier.
+ */
+public class TransportGetAliasesAction extends TransportLocalClusterStateAction<GetAliasesRequest, GetAliasesResponse> {
     private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(TransportGetAliasesAction.class);
 
+    private final IndexNameExpressionResolver indexNameExpressionResolver;
     private final SystemIndices systemIndices;
+    private final ThreadContext threadContext;
 
     @Inject
     public TransportGetAliasesAction(
         TransportService transportService,
-        ClusterService clusterService,
-        ThreadPool threadPool,
         ActionFilters actionFilters,
+        ClusterService clusterService,
         IndexNameExpressionResolver indexNameExpressionResolver,
         SystemIndices systemIndices
     ) {
         super(
             GetAliasesAction.NAME,
-            transportService,
             clusterService,
-            threadPool,
+            transportService,
             actionFilters,
             GetAliasesRequest::new,
-            indexNameExpressionResolver,
-            GetAliasesResponse::new,
-            threadPool.executor(ThreadPool.Names.MANAGEMENT)
+            clusterService.threadPool().executor(ThreadPool.Names.MANAGEMENT)
         );
+        this.indexNameExpressionResolver = indexNameExpressionResolver;
         this.systemIndices = systemIndices;
+        this.threadContext = clusterService.threadPool().getThreadContext();
     }
 
     @Override
@@ -77,15 +83,22 @@ protected ClusterBlockException checkBlock(GetAliasesRequest request, ClusterSta
     }
 
     @Override
-    protected void masterOperation(Task task, GetAliasesRequest request, ClusterState state, ActionListener<GetAliasesResponse> listener) {
+    protected void localClusterStateOperation(
+        Task task,
+        GetAliasesRequest request,
+        ClusterState state,
+        ActionListener<GetAliasesResponse> listener
+    ) {
         assert Transports.assertNotTransportThread("no need to avoid the context switch and may be expensive if there are many aliases");
+        final var cancellableTask = (CancellableTask) task;
         // resolve all concrete indices upfront and warn/error later
         final String[] concreteIndices = indexNameExpressionResolver.concreteIndexNamesWithSystemIndexAccess(state, request);
         final SystemIndexAccessLevel systemIndexAccessLevel = indexNameExpressionResolver.getSystemIndexAccessLevel();
         Map<String, List<AliasMetadata>> aliases = state.metadata().findAliases(request.aliases(), concreteIndices);
+        cancellableTask.ensureNotCancelled();
         listener.onResponse(
             new GetAliasesResponse(
-                postProcess(request, concreteIndices, aliases, state, systemIndexAccessLevel, threadPool.getThreadContext(), systemIndices),
+                postProcess(request, concreteIndices, aliases, state, systemIndexAccessLevel, threadContext, systemIndices),
                 postProcess(indexNameExpressionResolver, request, state)
             )
         );
@@ -122,7 +135,7 @@ static Map<String, List<AliasMetadata>> postProcess(
         }
         final Map<String, List<AliasMetadata>> finalResponse = Collections.unmodifiableMap(mapBuilder);
         if (systemIndexAccessLevel != SystemIndexAccessLevel.ALL) {
-            checkSystemIndexAccess(request, systemIndices, state, finalResponse, systemIndexAccessLevel, threadContext);
+            checkSystemIndexAccess(systemIndices, state, finalResponse, systemIndexAccessLevel, threadContext);
         }
         return finalResponse;
     }
@@ -151,7 +164,6 @@ static Map<String, List<DataStreamAlias>> postProcess(
     }
 
     private static void checkSystemIndexAccess(
-        GetAliasesRequest request,
         SystemIndices systemIndices,
         ClusterState state,
         Map<String, List<AliasMetadata>> aliasesMap,
diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/recovery/TransportRecoveryAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/recovery/TransportRecoveryAction.java
index 0bd51eba85ff9..c74981d475389 100644
--- a/server/src/main/java/org/elasticsearch/action/admin/indices/recovery/TransportRecoveryAction.java
+++ b/server/src/main/java/org/elasticsearch/action/admin/indices/recovery/TransportRecoveryAction.java
@@ -20,7 +20,6 @@
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.io.stream.StreamInput;
-import org.elasticsearch.core.Nullable;
 import org.elasticsearch.index.IndexService;
 import org.elasticsearch.index.shard.IndexShard;
 import org.elasticsearch.indices.IndicesService;
@@ -102,7 +101,6 @@ protected RecoveryRequest readRequestFrom(StreamInput in) throws IOException {
     protected void shardOperation(RecoveryRequest request, ShardRouting shardRouting, Task task, ActionListener<RecoveryState> listener) {
         ActionListener.completeWith(listener, () -> {
             assert task instanceof CancellableTask;
-            runOnShardOperation();
             IndexService indexService = indicesService.indexServiceSafe(shardRouting.shardId().getIndex());
             IndexShard indexShard = indexService.getShard(shardRouting.shardId().id());
             return indexShard.recoveryState();
@@ -123,19 +121,4 @@ protected ClusterBlockException checkGlobalBlock(ClusterState state, RecoveryReq
     protected ClusterBlockException checkRequestBlock(ClusterState state, RecoveryRequest request, String[] concreteIndices) {
         return state.blocks().indicesBlockedException(ClusterBlockLevel.METADATA_READ, concreteIndices);
     }
-
-    @Nullable // unless running tests that inject extra behaviour
-    private volatile Runnable onShardOperation;
-
-    private void runOnShardOperation() {
-        final Runnable onShardOperation = this.onShardOperation;
-        if (onShardOperation != null) {
-            onShardOperation.run();
-        }
-    }
-
-    // exposed for tests: inject some extra behaviour that runs when shardOperation() is called
-    void setOnShardOperation(@Nullable Runnable onShardOperation) {
-        this.onShardOperation = onShardOperation;
-    }
 }
diff --git a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java
index 2f3266f9e0099..b56cb0ca5926c 100644
--- a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java
+++ b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java
@@ -228,27 +228,7 @@ public final void run() {
             skipShard(iterator);
         }
         if (shardsIts.size() > 0) {
-            assert request.allowPartialSearchResults() != null : "SearchRequest missing setting for allowPartialSearchResults";
-            if (request.allowPartialSearchResults() == false) {
-                final StringBuilder missingShards = new StringBuilder();
-                // Fail-fast verification of all shards being available
-                for (int index = 0; index < shardsIts.size(); index++) {
-                    final SearchShardIterator shardRoutings = shardsIts.get(index);
-                    if (shardRoutings.size() == 0) {
-                        if (missingShards.length() > 0) {
-                            missingShards.append(", ");
-                        }
-                        missingShards.append(shardRoutings.shardId());
-                    }
-                }
-                if (missingShards.length() > 0) {
-                    // Status red - shard is missing all copies and would produce partial results for an index search
-                    final String msg = "Search rejected due to missing shards ["
-                        + missingShards
-                        + "]. Consider using `allow_partial_search_results` setting to bypass this error.";
-                    throw new SearchPhaseExecutionException(getName(), msg, null, ShardSearchFailure.EMPTY_ARRAY);
-                }
-            }
+            doCheckNoMissingShards(getName(), request, shardsIts);
             Version version = request.minCompatibleShardNode();
             if (version != null && Version.CURRENT.minimumCompatibilityVersion().equals(version) == false) {
                 if (checkMinimumVersion(shardsIts) == false) {
@@ -434,7 +414,6 @@ public final void executeNextPhase(SearchPhase currentPhase, SearchPhase nextPha
                         logger.debug(() -> format("%s shards failed for phase: [%s]", numShardFailures, currentPhase.getName()), cause);
                     }
                     onPhaseFailure(currentPhase, "Partial shards failure", null);
-                    return;
                 } else {
                     int discrepancy = getNumShards() - successfulOps.get();
                     assert discrepancy > 0 : "discrepancy: " + discrepancy;
@@ -449,8 +428,8 @@ public final void executeNextPhase(SearchPhase currentPhase, SearchPhase nextPha
                         );
                     }
                     onPhaseFailure(currentPhase, "Partial shards failure (" + discrepancy + " shards unavailable)", null);
-                    return;
                 }
+                return;
             }
             if (logger.isTraceEnabled()) {
                 final String resultsFrom = results.getSuccessfulResults()
@@ -840,7 +819,7 @@ void executeNext(Runnable runnable, Thread originalThread) {
     private static final class PendingExecutions {
         private final int permits;
         private int permitsTaken = 0;
-        private ArrayDeque<Runnable> queue = new ArrayDeque<>();
+        private final ArrayDeque<Runnable> queue = new ArrayDeque<>();
 
         PendingExecutions(int permits) {
             assert permits > 0 : "not enough permits: " + permits;
diff --git a/server/src/main/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhase.java b/server/src/main/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhase.java
index cef6bf92cc5e6..6e553f254ee8b 100644
--- a/server/src/main/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhase.java
+++ b/server/src/main/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhase.java
@@ -31,7 +31,6 @@
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.transport.Transport;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
@@ -127,7 +126,7 @@ private static boolean assertSearchCoordinationThread() {
     }
 
     @Override
-    public void run() throws IOException {
+    public void run() {
         assert assertSearchCoordinationThread();
         checkNoMissingShards();
         Version version = request.minCompatibleShardNode();
@@ -159,9 +158,7 @@ private void runCoordinatorRewritePhase() {
             );
             final ShardSearchRequest request = canMatchNodeRequest.createShardSearchRequest(buildShardLevelRequest(searchShardIterator));
             if (searchShardIterator.prefiltered()) {
-                CanMatchShardResponse result = new CanMatchShardResponse(searchShardIterator.skip() == false, null);
-                result.setShardIndex(request.shardRequestIndex());
-                results.consumeResult(result, () -> {});
+                consumeResult(searchShardIterator.skip() == false, request);
                 continue;
             }
             boolean canMatch = true;
@@ -178,9 +175,7 @@ private void runCoordinatorRewritePhase() {
             if (canMatch) {
                 matchedShardLevelRequests.add(searchShardIterator);
             } else {
-                CanMatchShardResponse result = new CanMatchShardResponse(canMatch, null);
-                result.setShardIndex(request.shardRequestIndex());
-                results.consumeResult(result, () -> {});
+                consumeResult(false, request);
             }
         }
         if (matchedShardLevelRequests.isEmpty()) {
@@ -190,29 +185,15 @@ private void runCoordinatorRewritePhase() {
         }
     }
 
+    private void consumeResult(boolean canMatch, ShardSearchRequest request) {
+        CanMatchShardResponse result = new CanMatchShardResponse(canMatch, null);
+        result.setShardIndex(request.shardRequestIndex());
+        results.consumeResult(result, () -> {});
+    }
+
     private void checkNoMissingShards() {
         assert assertSearchCoordinationThread();
-        assert request.allowPartialSearchResults() != null : "SearchRequest missing setting for allowPartialSearchResults";
-        if (request.allowPartialSearchResults() == false) {
-            final StringBuilder missingShards = new StringBuilder();
-            // Fail-fast verification of all shards being available
-            for (int index = 0; index < shardsIts.size(); index++) {
-                final SearchShardIterator shardRoutings = shardsIts.get(index);
-                if (shardRoutings.size() == 0) {
-                    if (missingShards.length() > 0) {
-                        missingShards.append(", ");
-                    }
-                    missingShards.append(shardRoutings.shardId());
-                }
-            }
-            if (missingShards.length() > 0) {
-                // Status red - shard is missing all copies and would produce partial results for an index search
-                final String msg = "Search rejected due to missing shards ["
-                    + missingShards
-                    + "]. Consider using `allow_partial_search_results` setting to bypass this error.";
-                throw new SearchPhaseExecutionException(getName(), msg, null, ShardSearchFailure.EMPTY_ARRAY);
-            }
-        }
+        doCheckNoMissingShards(getName(), request, shardsIts);
     }
 
     private Map<SendingTarget, List<SearchShardIterator>> groupByNode(GroupShardsIterator<SearchShardIterator> shards) {
@@ -425,7 +406,7 @@ public void onFailure(Exception e) {
             }
 
             @Override
-            protected void doRun() throws IOException {
+            protected void doRun() {
                 CanMatchPreFilterSearchPhase.this.run();
             }
         });
diff --git a/server/src/main/java/org/elasticsearch/action/search/ClearScrollResponse.java b/server/src/main/java/org/elasticsearch/action/search/ClearScrollResponse.java
index 0a7b53ea8b9c4..8b1116951df82 100644
--- a/server/src/main/java/org/elasticsearch/action/search/ClearScrollResponse.java
+++ b/server/src/main/java/org/elasticsearch/action/search/ClearScrollResponse.java
@@ -85,7 +85,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
     /**
      * Parse the clear scroll response body into a new {@link ClearScrollResponse} object
      */
-    public static ClosePointInTimeResponse fromXContent(XContentParser parser) throws IOException {
+    public static ClosePointInTimeResponse fromXContent(XContentParser parser) {
         return PARSER.apply(parser, null);
     }
 
diff --git a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java
index dca269f06a3d3..e010e840d3f2d 100644
--- a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java
+++ b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java
@@ -24,7 +24,6 @@
 import org.elasticsearch.search.vectors.KnnScoreDocQueryBuilder;
 import org.elasticsearch.transport.Transport;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
@@ -71,7 +70,7 @@ final class DfsQueryPhase extends SearchPhase {
     }
 
     @Override
-    public void run() throws IOException {
+    public void run() {
         // TODO we can potentially also consume the actual per shard results from the initial phase here in the aggregateDfs
         // to free up memory early
         final CountedCollector<SearchPhaseResult> counter = new CountedCollector<>(
diff --git a/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequest.java b/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequest.java
index e7d6eca23498f..cadcd6ca57334 100644
--- a/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequest.java
+++ b/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequest.java
@@ -51,7 +51,6 @@
  */
 public class MultiSearchRequest extends ActionRequest implements CompositeIndicesRequest {
     private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(RestSearchAction.class);
-    public static final String TYPES_DEPRECATION_MESSAGE = "[types removal]" + " Specifying types in search requests is deprecated.";
     public static final String FIRST_LINE_EMPTY_DEPRECATION_MESSAGE =
         "support for empty first line before any action metadata in msearch API is deprecated "
             + "and will be removed in the next major version";
diff --git a/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequestBuilder.java b/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequestBuilder.java
index 6f1e8d429edab..57c536f3d371e 100644
--- a/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequestBuilder.java
+++ b/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequestBuilder.java
@@ -63,11 +63,4 @@ public MultiSearchRequestBuilder setIndicesOptions(IndicesOptions indicesOptions
         return this;
     }
 
-    /**
-     * Sets how many search requests specified in this multi search requests are allowed to be ran concurrently.
-     */
-    public MultiSearchRequestBuilder setMaxConcurrentSearchRequests(int maxConcurrentSearchRequests) {
-        request().maxConcurrentSearchRequests(maxConcurrentSearchRequests);
-        return this;
-    }
 }
diff --git a/server/src/main/java/org/elasticsearch/action/search/OpenPointInTimeResponse.java b/server/src/main/java/org/elasticsearch/action/search/OpenPointInTimeResponse.java
index c6463bcb00f67..92a2a1503aefc 100644
--- a/server/src/main/java/org/elasticsearch/action/search/OpenPointInTimeResponse.java
+++ b/server/src/main/java/org/elasticsearch/action/search/OpenPointInTimeResponse.java
@@ -11,27 +11,16 @@
 import org.elasticsearch.action.ActionResponse;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
-import org.elasticsearch.xcontent.ConstructingObjectParser;
-import org.elasticsearch.xcontent.ObjectParser;
 import org.elasticsearch.xcontent.ParseField;
 import org.elasticsearch.xcontent.ToXContentObject;
 import org.elasticsearch.xcontent.XContentBuilder;
-import org.elasticsearch.xcontent.XContentParser;
 
 import java.io.IOException;
 import java.util.Objects;
 
-import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;
-
 public final class OpenPointInTimeResponse extends ActionResponse implements ToXContentObject {
     private static final ParseField ID = new ParseField("id");
 
-    private static final ConstructingObjectParser<OpenPointInTimeResponse, Void> PARSER;
-
-    static {
-        PARSER = new ConstructingObjectParser<>("open_point_in_time", true, a -> new OpenPointInTimeResponse((String) a[0]));
-        PARSER.declareField(constructorArg(), (parser, context) -> parser.text(), ID, ObjectParser.ValueType.STRING);
-    }
     private final String pointInTimeId;
 
     public OpenPointInTimeResponse(String pointInTimeId) {
@@ -60,7 +49,4 @@ public String getPointInTimeId() {
         return pointInTimeId;
     }
 
-    public static OpenPointInTimeResponse fromXContent(XContentParser parser) throws IOException {
-        return PARSER.parse(parser, null);
-    }
 }
diff --git a/server/src/main/java/org/elasticsearch/action/search/ParsedScrollId.java b/server/src/main/java/org/elasticsearch/action/search/ParsedScrollId.java
index ca68b1865495d..a9f3502bfa631 100644
--- a/server/src/main/java/org/elasticsearch/action/search/ParsedScrollId.java
+++ b/server/src/main/java/org/elasticsearch/action/search/ParsedScrollId.java
@@ -16,22 +16,15 @@ public class ParsedScrollId {
 
     public static final String QUERY_AND_FETCH_TYPE = "queryAndFetch";
 
-    private final String source;
-
     private final String type;
 
     private final SearchContextIdForNode[] context;
 
-    ParsedScrollId(String source, String type, SearchContextIdForNode[] context) {
-        this.source = source;
+    ParsedScrollId(String type, SearchContextIdForNode[] context) {
         this.type = type;
         this.context = context;
     }
 
-    public String getSource() {
-        return source;
-    }
-
     public String getType() {
         return type;
     }
diff --git a/server/src/main/java/org/elasticsearch/action/search/QueryPhaseResultConsumer.java b/server/src/main/java/org/elasticsearch/action/search/QueryPhaseResultConsumer.java
index f78d5f4005755..ee956b5179902 100644
--- a/server/src/main/java/org/elasticsearch/action/search/QueryPhaseResultConsumer.java
+++ b/server/src/main/java/org/elasticsearch/action/search/QueryPhaseResultConsumer.java
@@ -520,7 +520,7 @@ private record MergeResult(
     private static class MergeTask {
         private final List<SearchShard> emptyResults;
         private QuerySearchResult[] buffer;
-        private long aggsBufferSize;
+        private final long aggsBufferSize;
         private Runnable next;
 
         private MergeTask(QuerySearchResult[] buffer, long aggsBufferSize, List<SearchShard> emptyResults, Runnable next) {
diff --git a/server/src/main/java/org/elasticsearch/action/search/RestOpenPointInTimeAction.java b/server/src/main/java/org/elasticsearch/action/search/RestOpenPointInTimeAction.java
index 5de59cc6ce878..815deac07dfcd 100644
--- a/server/src/main/java/org/elasticsearch/action/search/RestOpenPointInTimeAction.java
+++ b/server/src/main/java/org/elasticsearch/action/search/RestOpenPointInTimeAction.java
@@ -18,7 +18,6 @@
 import org.elasticsearch.rest.ServerlessScope;
 import org.elasticsearch.rest.action.RestToXContentListener;
 
-import java.io.IOException;
 import java.util.List;
 
 import static org.elasticsearch.rest.RestRequest.Method.POST;
@@ -37,7 +36,7 @@ public List<Route> routes() {
     }
 
     @Override
-    public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException {
+    public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) {
         final String[] indices = Strings.splitStringByCommaToArray(request.param("index"));
         final OpenPointInTimeRequest openRequest = new OpenPointInTimeRequest(indices);
         openRequest.indicesOptions(IndicesOptions.fromRequest(request, OpenPointInTimeRequest.DEFAULT_INDICES_OPTIONS));
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchContextId.java b/server/src/main/java/org/elasticsearch/action/search/SearchContextId.java
index 2b7105cffe2bb..f10650a6401d6 100644
--- a/server/src/main/java/org/elasticsearch/action/search/SearchContextId.java
+++ b/server/src/main/java/org/elasticsearch/action/search/SearchContextId.java
@@ -41,7 +41,7 @@
 public final class SearchContextId {
     private final Map<ShardId, SearchContextIdForNode> shards;
     private final Map<String, AliasFilter> aliasFilter;
-    private transient Set<ShardSearchContextId> contextIds;
+    private final transient Set<ShardSearchContextId> contextIds;
 
     SearchContextId(Map<ShardId, SearchContextIdForNode> shards, Map<String, AliasFilter> aliasFilter) {
         this.shards = shards;
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchPhase.java b/server/src/main/java/org/elasticsearch/action/search/SearchPhase.java
index 88da2fdfa3a9e..9d3eadcc42bf9 100644
--- a/server/src/main/java/org/elasticsearch/action/search/SearchPhase.java
+++ b/server/src/main/java/org/elasticsearch/action/search/SearchPhase.java
@@ -7,6 +7,7 @@
  */
 package org.elasticsearch.action.search;
 
+import org.elasticsearch.cluster.routing.GroupShardsIterator;
 import org.elasticsearch.core.CheckedRunnable;
 
 import java.io.IOException;
@@ -37,4 +38,28 @@ public void start() {
             throw new UncheckedIOException(e);
         }
     }
+
+    static void doCheckNoMissingShards(String phaseName, SearchRequest request, GroupShardsIterator<SearchShardIterator> shardsIts) {
+        assert request.allowPartialSearchResults() != null : "SearchRequest missing setting for allowPartialSearchResults";
+        if (request.allowPartialSearchResults() == false) {
+            final StringBuilder missingShards = new StringBuilder();
+            // Fail-fast verification of all shards being available
+            for (int index = 0; index < shardsIts.size(); index++) {
+                final SearchShardIterator shardRoutings = shardsIts.get(index);
+                if (shardRoutings.size() == 0) {
+                    if (missingShards.isEmpty() == false) {
+                        missingShards.append(", ");
+                    }
+                    missingShards.append(shardRoutings.shardId());
+                }
+            }
+            if (missingShards.isEmpty() == false) {
+                // Status red - shard is missing all copies and would produce partial results for an index search
+                final String msg = "Search rejected due to missing shards ["
+                    + missingShards
+                    + "]. Consider using `allow_partial_search_results` setting to bypass this error.";
+                throw new SearchPhaseExecutionException(phaseName, msg, null, ShardSearchFailure.EMPTY_ARRAY);
+            }
+        }
+    }
 }
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java b/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java
index fb554232503f2..5af5c4c2ec602 100644
--- a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java
+++ b/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java
@@ -667,7 +667,7 @@ private static void validateMergeSortValueFormats(Collection<? extends SearchPha
                 firstResult = false;
                 ulFormats = new boolean[formats.length];
                 for (int i = 0; i < formats.length; i++) {
-                    ulFormats[i] = formats[i] == DocValueFormat.UNSIGNED_LONG_SHIFTED ? true : false;
+                    ulFormats[i] = formats[i] == DocValueFormat.UNSIGNED_LONG_SHIFTED;
                 }
             } else {
                 for (int i = 0; i < formats.length; i++) {
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java b/server/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java
index cf8210b6ba154..a59557832285b 100644
--- a/server/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java
+++ b/server/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java
@@ -61,15 +61,6 @@ public SearchRequestBuilder setSearchType(SearchType searchType) {
         return this;
     }
 
-    /**
-     * The a string representation search type to execute, defaults to {@link SearchType#DEFAULT}. Can be
-     * one of "dfs_query_then_fetch" or "query_then_fetch".
-     */
-    public SearchRequestBuilder setSearchType(String searchType) {
-        request.searchType(searchType);
-        return this;
-    }
-
     /**
      * If set, will enable scrolling of the search request.
      */
@@ -251,17 +242,6 @@ public SearchRequestBuilder seqNoAndPrimaryTerm(boolean seqNoAndPrimaryTerm) {
         return this;
     }
 
-    /**
-     * Sets the boost a specific index will receive when the query is executed against it.
-     *
-     * @param index      The index to apply the boost against
-     * @param indexBoost The boost to apply to the index
-     */
-    public SearchRequestBuilder addIndexBoost(String index, float indexBoost) {
-        sourceBuilder().indexBoost(index, indexBoost);
-        return this;
-    }
-
     /**
      * The stats groups this request will be aggregated under.
      */
@@ -270,14 +250,6 @@ public SearchRequestBuilder setStats(String... statsGroups) {
         return this;
     }
 
-    /**
-     * The stats groups this request will be aggregated under.
-     */
-    public SearchRequestBuilder setStats(List<String> statsGroups) {
-        sourceBuilder().stats(statsGroups);
-        return this;
-    }
-
     /**
      * Indicates whether the response should contain the stored _source for every hit
      */
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchResponse.java b/server/src/main/java/org/elasticsearch/action/search/SearchResponse.java
index b6a9179b1e956..56b58cd8ced6c 100644
--- a/server/src/main/java/org/elasticsearch/action/search/SearchResponse.java
+++ b/server/src/main/java/org/elasticsearch/action/search/SearchResponse.java
@@ -144,10 +144,6 @@ public RestStatus status() {
         return RestStatus.status(successfulShards, totalShards, shardFailures);
     }
 
-    public SearchResponseSections getInternalResponse() {
-        return internalResponse;
-    }
-
     /**
      * The search hits.
      */
@@ -387,7 +383,7 @@ public static SearchResponse innerFromXContent(XContentParser parser) throws IOE
                             }
                         } else if (token == Token.START_ARRAY) {
                             if (RestActions.FAILURES_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
-                                while ((token = parser.nextToken()) != Token.END_ARRAY) {
+                                while (parser.nextToken() != Token.END_ARRAY) {
                                     failures.add(ShardSearchFailure.fromXContent(parser));
                                 }
                             } else {
@@ -479,7 +475,7 @@ public static final class Clusters implements ToXContentFragment, Writeable {
         private final Map<String, Cluster> clusterInfo;
 
         // not Writeable since it is only needed on the (primary) CCS coordinator
-        private transient Boolean ccsMinimizeRoundtrips;
+        private final transient Boolean ccsMinimizeRoundtrips;
 
         /**
          * For use with cross-cluster searches.
@@ -985,7 +981,7 @@ public static class Builder {
             private List<ShardSearchFailure> failures;
             private TimeValue took;
             private Boolean timedOut;
-            private Cluster original;
+            private final Cluster original;
 
             public Builder(Cluster copyFrom) {
                 this.original = copyFrom;
@@ -1167,7 +1163,7 @@ public static Cluster fromXContent(String clusterAlias, XContentParser parser) t
                     }
                 } else if (token == Token.START_ARRAY) {
                     if (RestActions.FAILURES_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
-                        while ((token = parser.nextToken()) != Token.END_ARRAY) {
+                        while (parser.nextToken() != Token.END_ARRAY) {
                             failures.add(ShardSearchFailure.fromXContent(parser));
                         }
                     } else {
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchScrollAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/SearchScrollAsyncAction.java
index 35aae0764e251..df16c107a2619 100644
--- a/server/src/main/java/org/elasticsearch/action/search/SearchScrollAsyncAction.java
+++ b/server/src/main/java/org/elasticsearch/action/search/SearchScrollAsyncAction.java
@@ -23,7 +23,6 @@
 import org.elasticsearch.transport.RemoteClusterService;
 import org.elasticsearch.transport.Transport;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
@@ -230,7 +229,7 @@ protected SearchPhase sendResponsePhase(
     ) {
         return new SearchPhase("fetch") {
             @Override
-            public void run() throws IOException {
+            public void run() {
                 sendResponse(queryPhase, fetchResults);
             }
         };
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchTransportService.java b/server/src/main/java/org/elasticsearch/action/search/SearchTransportService.java
index d02958567a873..800ad7afbb8db 100644
--- a/server/src/main/java/org/elasticsearch/action/search/SearchTransportService.java
+++ b/server/src/main/java/org/elasticsearch/action/search/SearchTransportService.java
@@ -51,6 +51,7 @@
 import org.elasticsearch.transport.TransportActionProxy;
 import org.elasticsearch.transport.TransportException;
 import org.elasticsearch.transport.TransportRequest;
+import org.elasticsearch.transport.TransportRequestHandler;
 import org.elasticsearch.transport.TransportRequestOptions;
 import org.elasticsearch.transport.TransportResponse;
 import org.elasticsearch.transport.TransportResponseHandler;
@@ -366,7 +367,7 @@ public Map<String, Long> getPendingSearchRequests() {
     }
 
     static class ScrollFreeContextRequest extends TransportRequest {
-        private ShardSearchContextId contextId;
+        private final ShardSearchContextId contextId;
 
         ScrollFreeContextRequest(ShardSearchContextId contextId) {
             this.contextId = Objects.requireNonNull(contextId);
@@ -390,7 +391,7 @@ public ShardSearchContextId id() {
     }
 
     static class SearchFreeContextRequest extends ScrollFreeContextRequest implements IndicesRequest {
-        private OriginalIndices originalIndices;
+        private final OriginalIndices originalIndices;
 
         SearchFreeContextRequest(OriginalIndices originalIndices, ShardSearchContextId id) {
             super(id);
@@ -428,7 +429,7 @@ public IndicesOptions indicesOptions() {
 
     public static class SearchFreeContextResponse extends TransportResponse {
 
-        private boolean freed;
+        private final boolean freed;
 
         SearchFreeContextResponse(StreamInput in) throws IOException {
             freed = in.readBoolean();
@@ -541,13 +542,16 @@ public static void registerRequestHandler(TransportService transportService, Sea
         );
         TransportActionProxy.registerProxyAction(transportService, QUERY_FETCH_SCROLL_ACTION_NAME, true, ScrollQueryFetchSearchResult::new);
 
+        TransportRequestHandler<ShardFetchRequest> shardFetchHandler = (request, channel, task) -> searchService.executeFetchPhase(
+            request,
+            (SearchShardTask) task,
+            new ChannelActionListener<>(channel)
+        );
         transportService.registerRequestHandler(
             FETCH_ID_SCROLL_ACTION_NAME,
             EsExecutors.DIRECT_EXECUTOR_SERVICE,
             ShardFetchRequest::new,
-            (request, channel, task) -> {
-                searchService.executeFetchPhase(request, (SearchShardTask) task, new ChannelActionListener<>(channel));
-            }
+            shardFetchHandler
         );
         TransportActionProxy.registerProxyAction(transportService, FETCH_ID_SCROLL_ACTION_NAME, true, FetchSearchResult::new);
 
@@ -557,9 +561,7 @@ public static void registerRequestHandler(TransportService transportService, Sea
             true,
             true,
             ShardFetchSearchRequest::new,
-            (request, channel, task) -> {
-                searchService.executeFetchPhase(request, (SearchShardTask) task, new ChannelActionListener<>(channel));
-            }
+            shardFetchHandler
         );
         TransportActionProxy.registerProxyAction(transportService, FETCH_ID_ACTION_NAME, true, FetchSearchResult::new);
 
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchType.java b/server/src/main/java/org/elasticsearch/action/search/SearchType.java
index 519f1ce98a7b6..8e6511db62136 100644
--- a/server/src/main/java/org/elasticsearch/action/search/SearchType.java
+++ b/server/src/main/java/org/elasticsearch/action/search/SearchType.java
@@ -39,7 +39,7 @@ public enum SearchType {
      */
     public static final SearchType[] CURRENTLY_SUPPORTED = { QUERY_THEN_FETCH, DFS_QUERY_THEN_FETCH };
 
-    private byte id;
+    private final byte id;
 
     SearchType(byte id) {
         this.id = id;
diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportMultiSearchAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportMultiSearchAction.java
index a4a35789db258..a2324010876bf 100644
--- a/server/src/main/java/org/elasticsearch/action/search/TransportMultiSearchAction.java
+++ b/server/src/main/java/org/elasticsearch/action/search/TransportMultiSearchAction.java
@@ -16,7 +16,6 @@
 import org.elasticsearch.cluster.block.ClusterBlockLevel;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.inject.Inject;
-import org.elasticsearch.common.io.stream.Writeable;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.concurrent.AtomicArray;
 import org.elasticsearch.common.util.concurrent.EsExecutors;
@@ -47,13 +46,7 @@ public TransportMultiSearchAction(
         ActionFilters actionFilters,
         NodeClient client
     ) {
-        super(
-            MultiSearchAction.NAME,
-            transportService,
-            actionFilters,
-            (Writeable.Reader<MultiSearchRequest>) MultiSearchRequest::new,
-            EsExecutors.DIRECT_EXECUTOR_SERVICE
-        );
+        super(MultiSearchAction.NAME, transportService, actionFilters, MultiSearchRequest::new, EsExecutors.DIRECT_EXECUTOR_SERVICE);
         this.threadPool = threadPool;
         this.clusterService = clusterService;
         this.allocatedProcessors = EsExecutors.allocatedProcessors(settings);
@@ -70,13 +63,7 @@ public TransportMultiSearchAction(
         LongSupplier relativeTimeProvider,
         NodeClient client
     ) {
-        super(
-            MultiSearchAction.NAME,
-            transportService,
-            actionFilters,
-            (Writeable.Reader<MultiSearchRequest>) MultiSearchRequest::new,
-            EsExecutors.DIRECT_EXECUTOR_SERVICE
-        );
+        super(MultiSearchAction.NAME, transportService, actionFilters, MultiSearchRequest::new, EsExecutors.DIRECT_EXECUTOR_SERVICE);
         this.threadPool = threadPool;
         this.clusterService = clusterService;
         this.allocatedProcessors = allocatedProcessors;
diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportOpenPointInTimeAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportOpenPointInTimeAction.java
index aeb71a3b03d8f..ae3c735e079e9 100644
--- a/server/src/main/java/org/elasticsearch/action/search/TransportOpenPointInTimeAction.java
+++ b/server/src/main/java/org/elasticsearch/action/search/TransportOpenPointInTimeAction.java
@@ -268,7 +268,7 @@ public void writeTo(StreamOutput out) throws IOException {
 
     private class ShardOpenReaderRequestHandler implements TransportRequestHandler<ShardOpenReaderRequest> {
         @Override
-        public void messageReceived(ShardOpenReaderRequest request, TransportChannel channel, Task task) throws Exception {
+        public void messageReceived(ShardOpenReaderRequest request, TransportChannel channel, Task task) {
             searchService.openReaderContext(
                 request.getShardId(),
                 request.keepAlive,
diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java
index a2739e2c2a85e..5030bd875a0f6 100644
--- a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java
+++ b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java
@@ -39,7 +39,6 @@
 import org.elasticsearch.common.breaker.CircuitBreaker;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
-import org.elasticsearch.common.io.stream.Writeable;
 import org.elasticsearch.common.logging.DeprecationCategory;
 import org.elasticsearch.common.logging.DeprecationLogger;
 import org.elasticsearch.common.settings.Setting;
@@ -159,13 +158,7 @@ public TransportSearchAction(
         NamedWriteableRegistry namedWriteableRegistry,
         ExecutorSelector executorSelector
     ) {
-        super(
-            SearchAction.NAME,
-            transportService,
-            actionFilters,
-            (Writeable.Reader<SearchRequest>) SearchRequest::new,
-            EsExecutors.DIRECT_EXECUTOR_SERVICE
-        );
+        super(SearchAction.NAME, transportService, actionFilters, SearchRequest::new, EsExecutors.DIRECT_EXECUTOR_SERVICE);
         this.threadPool = threadPool;
         this.circuitBreaker = circuitBreakerService.getBreaker(CircuitBreaker.REQUEST);
         this.searchPhaseController = searchPhaseController;
@@ -514,7 +507,7 @@ static void ccsRemoteReduce(
                 clusterAlias,
                 remoteClientResponseExecutor
             );
-            remoteClusterClient.search(ccsSearchRequest, new ActionListener<SearchResponse>() {
+            remoteClusterClient.search(ccsSearchRequest, new ActionListener<>() {
                 @Override
                 public void onResponse(SearchResponse searchResponse) {
                     // TODO: in CCS fail fast ticket we may need to fail the query if the cluster is marked as FAILED
@@ -749,14 +742,7 @@ private static ActionListener<SearchResponse> createCCSListener(
         SearchResponse.Clusters clusters,
         ActionListener<SearchResponse> originalListener
     ) {
-        return new CCSActionListener<SearchResponse, SearchResponse>(
-            clusterAlias,
-            skipUnavailable,
-            countDown,
-            exceptions,
-            clusters,
-            originalListener
-        ) {
+        return new CCSActionListener<>(clusterAlias, skipUnavailable, countDown, exceptions, clusters, originalListener) {
             @Override
             void innerOnResponse(SearchResponse searchResponse) {
                 // TODO: in CCS fail fast ticket we may need to fail the query if the cluster gets marked as FAILED
@@ -1417,7 +1403,6 @@ abstract static class CCSActionListener<Response, FinalResponse> implements Acti
         private final AtomicReference<Exception> exceptions;
         protected final SearchResponse.Clusters clusters;
         private final ActionListener<FinalResponse> originalListener;
-        protected final long startTime;
 
         /**
          * Used by both minimize_roundtrips true and false
@@ -1436,7 +1421,6 @@ abstract static class CCSActionListener<Response, FinalResponse> implements Acti
             this.exceptions = exceptions;
             this.clusters = clusters;
             this.originalListener = originalListener;
-            this.startTime = System.currentTimeMillis();
         }
 
         @Override
@@ -1454,12 +1438,12 @@ public final void onFailure(Exception e) {
             SearchResponse.Cluster cluster = clusters.getCluster(clusterAlias);
             if (skipUnavailable) {
                 if (cluster != null) {
-                    ccsClusterInfoUpdate(f, clusters, clusterAlias, skipUnavailable);
+                    ccsClusterInfoUpdate(f, clusters, clusterAlias, true);
                 }
                 // skippedClusters.incrementAndGet();
             } else {
                 if (cluster != null) {
-                    ccsClusterInfoUpdate(f, clusters, clusterAlias, skipUnavailable);
+                    ccsClusterInfoUpdate(f, clusters, clusterAlias, false);
                 }
                 Exception exception = e;
                 if (RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY.equals(clusterAlias) == false) {
diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportSearchHelper.java b/server/src/main/java/org/elasticsearch/action/search/TransportSearchHelper.java
index 632fbafa0536b..ffaecedb62bba 100644
--- a/server/src/main/java/org/elasticsearch/action/search/TransportSearchHelper.java
+++ b/server/src/main/java/org/elasticsearch/action/search/TransportSearchHelper.java
@@ -93,7 +93,7 @@ static ParsedScrollId parseScrollId(String scrollId) {
             if (in.available() > 0) {
                 throw new IllegalArgumentException("Not all bytes were read");
             }
-            return new ParsedScrollId(scrollId, type, context);
+            return new ParsedScrollId(type, context);
         } catch (Exception e) {
             throw new IllegalArgumentException("Cannot parse scroll id", e);
         }
diff --git a/server/src/main/java/org/elasticsearch/action/support/TransportLocalClusterStateAction.java b/server/src/main/java/org/elasticsearch/action/support/TransportLocalClusterStateAction.java
new file mode 100644
index 0000000000000..6af5a3a1e8384
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/action/support/TransportLocalClusterStateAction.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.action.support;
+
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.action.ActionRequest;
+import org.elasticsearch.action.ActionResponse;
+import org.elasticsearch.action.ActionRunnable;
+import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.block.ClusterBlockException;
+import org.elasticsearch.cluster.service.ClusterService;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.util.concurrent.EsExecutors;
+import org.elasticsearch.tasks.Task;
+import org.elasticsearch.transport.TransportService;
+
+import java.util.concurrent.Executor;
+
+/**
+ * Analogue of {@link org.elasticsearch.action.support.master.TransportMasterNodeReadAction} except that it runs on the local node rather
+ * than delegating to the master.
+ */
+public abstract class TransportLocalClusterStateAction<Request extends ActionRequest, Response extends ActionResponse> extends
+    HandledTransportAction<Request, Response> {
+
+    protected final ClusterService clusterService;
+    protected final Executor executor;
+
+    protected TransportLocalClusterStateAction(
+        String actionName,
+        ClusterService clusterService,
+        TransportService transportService,
+        ActionFilters actionFilters,
+        Writeable.Reader<Request> requestReader,
+        Executor executor
+    ) {
+        // TODO replace DIRECT_EXECUTOR_SERVICE when removing workaround for https://github.com/elastic/elasticsearch/issues/97916
+        super(actionName, transportService, actionFilters, requestReader, EsExecutors.DIRECT_EXECUTOR_SERVICE);
+        this.clusterService = clusterService;
+        this.executor = executor;
+    }
+
+    protected abstract ClusterBlockException checkBlock(Request request, ClusterState state);
+
+    @Override
+    protected final void doExecute(Task task, Request request, ActionListener<Response> listener) {
+        final var state = clusterService.state();
+        final var clusterBlockException = checkBlock(request, state);
+        if (clusterBlockException != null) {
+            throw clusterBlockException;
+        }
+
+        // Workaround for https://github.com/elastic/elasticsearch/issues/97916 - TODO remove this when we can
+        executor.execute(ActionRunnable.wrap(listener, l -> localClusterStateOperation(task, request, state, l)));
+    }
+
+    protected abstract void localClusterStateOperation(Task task, Request request, ClusterState state, ActionListener<Response> listener)
+        throws Exception;
+}
diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java
index 625591ba8b90b..048ade3ef86c5 100644
--- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java
+++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java
@@ -261,6 +261,12 @@ private void allocateUnassigned() {
                                 // desired node no longer exists
                                 continue;
                             }
+                            if (routingNode.getByShardId(shard.shardId()) != null) {
+                                // node already contains same shard.
+                                // Skipping it allows us to exclude NO decisions from SameShardAllocationDecider and only log more relevant
+                                // NO or THROTTLE decisions of the preventing shard from starting on assigned node
+                                continue;
+                            }
                             final var decision = allocation.deciders().canAllocate(shard, routingNode, allocation);
                             switch (decision.type()) {
                                 case YES -> {
@@ -287,10 +293,10 @@ private void allocateUnassigned() {
                                 case THROTTLE -> {
                                     nodeIdsIterator.wasThrottled = true;
                                     unallocatedStatus = AllocationStatus.DECIDERS_THROTTLED;
-                                    logger.trace("Couldn't assign shard [{}] to [{}]: {}", shard.shardId(), nodeId, decision);
+                                    logger.debug("Couldn't assign shard [{}] to [{}]: {}", shard.shardId(), nodeId, decision);
                                 }
                                 case NO -> {
-                                    logger.trace("Couldn't assign shard [{}] to [{}]: {}", shard.shardId(), nodeId, decision);
+                                    logger.debug("Couldn't assign shard [{}] to [{}]: {}", shard.shardId(), nodeId, decision);
                                 }
                             }
                         }
@@ -505,11 +511,14 @@ private void balance() {
                 }
             }
 
-            maybeLogUndesiredAllocationsWarning(allAllocations, undesiredAllocations);
+            maybeLogUndesiredAllocationsWarning(allAllocations, undesiredAllocations, routingNodes.size());
         }
 
-        private void maybeLogUndesiredAllocationsWarning(long allAllocations, long undesiredAllocations) {
-            if (allAllocations > 0 && undesiredAllocations > undesiredAllocationsLogThreshold * allAllocations) {
+        private void maybeLogUndesiredAllocationsWarning(long allAllocations, long undesiredAllocations, int nodeCount) {
+            // more shards than cluster can relocate with one reroute
+            final boolean nonEmptyRelocationBacklog = undesiredAllocations > 2L * nodeCount;
+            final boolean warningThresholdReached = undesiredAllocations > undesiredAllocationsLogThreshold * allAllocations;
+            if (allAllocations > 0 && nonEmptyRelocationBacklog && warningThresholdReached) {
                 undesiredAllocationLogInterval.maybeExecute(
                     () -> logger.warn(
                         "[{}] of assigned shards ({}/{}) are not on their desired nodes, which exceeds the warn threshold of [{}]",
diff --git a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java
index 9419cd7e6ab5f..141a06eff0ec6 100644
--- a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java
+++ b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java
@@ -1039,11 +1039,19 @@ private VersionValue getVersionFromMap(BytesRef id) {
                 // but we only need to do this once since the last operation per ID is to add to the version
                 // map so once we pass this point we can safely lookup from the version map.
                 if (versionMap.isUnsafe()) {
-                    lastUnsafeSegmentGenerationForGets.set(lastCommittedSegmentInfos.getGeneration() + 1);
                     refreshInternalSearcher(UNSAFE_VERSION_MAP_REFRESH_SOURCE, true);
+                    // After the refresh, the doc that triggered it must now be part of the last commit.
+                    // In rare cases, there could be other flush cycles completed in between the above line
+                    // and the line below which push the last commit generation further. But that's OK.
+                    // The invariant here is that doc is available within the generations of commits upto
+                    // lastUnsafeSegmentGenerationForGets (inclusive). Therefore it is ok for it be larger
+                    // which means the search shard needs to wait for extra generations and these generations
+                    // are guaranteed to happen since they are all committed.
+                    lastUnsafeSegmentGenerationForGets.set(lastCommittedSegmentInfos.getGeneration());
                 }
                 versionMap.enforceSafeAccess();
             }
+            // The versionMap can still be unsafe at this point due to archive being unsafe
         }
         return versionMap.getUnderLock(id);
     }
diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/indices/RestGetAliasesAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/indices/RestGetAliasesAction.java
index 661ecf38c8523..b6e1240a3f85a 100644
--- a/server/src/main/java/org/elasticsearch/rest/action/admin/indices/RestGetAliasesAction.java
+++ b/server/src/main/java/org/elasticsearch/rest/action/admin/indices/RestGetAliasesAction.java
@@ -16,8 +16,11 @@
 import org.elasticsearch.cluster.metadata.DataStreamAlias;
 import org.elasticsearch.cluster.metadata.Metadata;
 import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.logging.DeprecationCategory;
+import org.elasticsearch.common.logging.DeprecationLogger;
 import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.common.xcontent.XContentHelper;
+import org.elasticsearch.core.RestApiVersion;
 import org.elasticsearch.rest.BaseRestHandler;
 import org.elasticsearch.rest.RestRequest;
 import org.elasticsearch.rest.RestResponse;
@@ -25,6 +28,7 @@
 import org.elasticsearch.rest.Scope;
 import org.elasticsearch.rest.ServerlessScope;
 import org.elasticsearch.rest.action.RestBuilderListener;
+import org.elasticsearch.rest.action.RestCancellableNodeClient;
 import org.elasticsearch.xcontent.ToXContent;
 import org.elasticsearch.xcontent.XContentBuilder;
 
@@ -46,6 +50,8 @@
 @ServerlessScope(Scope.PUBLIC)
 public class RestGetAliasesAction extends BaseRestHandler {
 
+    private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(RestGetAliasesAction.class);
+
     @Override
     public List<Route> routes() {
         return List.of(
@@ -201,16 +207,30 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC
         final String[] indices = Strings.splitStringByCommaToArray(request.param("index"));
         getAliasesRequest.indices(indices);
         getAliasesRequest.indicesOptions(IndicesOptions.fromRequest(request, getAliasesRequest.indicesOptions()));
-        getAliasesRequest.local(request.paramAsBoolean("local", getAliasesRequest.local()));
+
+        if (request.hasParam("local")) {
+            // consume this param just for validation
+            final var localParam = request.paramAsBoolean("local", false);
+            if (request.getRestApiVersion() != RestApiVersion.V_7) {
+                DEPRECATION_LOGGER.critical(
+                    DeprecationCategory.API,
+                    "get-aliases-local",
+                    "the [?local={}] query parameter to get-aliases requests has no effect and will be removed in a future version",
+                    localParam
+                );
+            }
+        }
 
         // we may want to move this logic to TransportGetAliasesAction but it is based on the original provided aliases, which will
         // not always be available there (they may get replaced so retrieving request.aliases is not quite the same).
-        return channel -> client.admin().indices().getAliases(getAliasesRequest, new RestBuilderListener<>(channel) {
-            @Override
-            public RestResponse buildResponse(GetAliasesResponse response, XContentBuilder builder) throws Exception {
-                return buildRestResponse(namesProvided, aliases, response.getAliases(), response.getDataStreamAliases(), builder);
-            }
-        });
+        return channel -> new RestCancellableNodeClient(client, request.getHttpChannel()).admin()
+            .indices()
+            .getAliases(getAliasesRequest, new RestBuilderListener<>(channel) {
+                @Override
+                public RestResponse buildResponse(GetAliasesResponse response, XContentBuilder builder) throws Exception {
+                    return buildRestResponse(namesProvided, aliases, response.getAliases(), response.getDataStreamAliases(), builder);
+                }
+            });
     }
 
 }
diff --git a/server/src/main/java/org/elasticsearch/rest/action/cat/RestAliasAction.java b/server/src/main/java/org/elasticsearch/rest/action/cat/RestAliasAction.java
index b8f083115614f..dc99b970864b2 100644
--- a/server/src/main/java/org/elasticsearch/rest/action/cat/RestAliasAction.java
+++ b/server/src/main/java/org/elasticsearch/rest/action/cat/RestAliasAction.java
@@ -14,10 +14,14 @@
 import org.elasticsearch.cluster.metadata.AliasMetadata;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.Table;
+import org.elasticsearch.common.logging.DeprecationCategory;
+import org.elasticsearch.common.logging.DeprecationLogger;
+import org.elasticsearch.core.RestApiVersion;
 import org.elasticsearch.rest.RestRequest;
 import org.elasticsearch.rest.RestResponse;
 import org.elasticsearch.rest.Scope;
 import org.elasticsearch.rest.ServerlessScope;
+import org.elasticsearch.rest.action.RestCancellableNodeClient;
 import org.elasticsearch.rest.action.RestResponseListener;
 
 import java.util.List;
@@ -28,6 +32,8 @@
 @ServerlessScope(Scope.PUBLIC)
 public class RestAliasAction extends AbstractCatAction {
 
+    private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(RestAliasAction.class);
+
     @Override
     public List<Route> routes() {
         return List.of(new Route(GET, "/_cat/aliases"), new Route(GET, "/_cat/aliases/{alias}"));
@@ -49,15 +55,29 @@ protected RestChannelConsumer doCatRequest(final RestRequest request, final Node
             ? new GetAliasesRequest(Strings.commaDelimitedListToStringArray(request.param("alias")))
             : new GetAliasesRequest();
         getAliasesRequest.indicesOptions(IndicesOptions.fromRequest(request, getAliasesRequest.indicesOptions()));
-        getAliasesRequest.local(request.paramAsBoolean("local", getAliasesRequest.local()));
 
-        return channel -> client.admin().indices().getAliases(getAliasesRequest, new RestResponseListener<GetAliasesResponse>(channel) {
-            @Override
-            public RestResponse buildResponse(GetAliasesResponse response) throws Exception {
-                Table tab = buildTable(request, response);
-                return RestTable.buildResponse(tab, channel);
+        if (request.hasParam("local")) {
+            // consume this param just for validation
+            final var localParam = request.paramAsBoolean("local", false);
+            if (request.getRestApiVersion() != RestApiVersion.V_7) {
+                DEPRECATION_LOGGER.critical(
+                    DeprecationCategory.API,
+                    "cat-aliases-local",
+                    "the [?local={}] query parameter to cat-aliases requests has no effect and will be removed in a future version",
+                    localParam
+                );
             }
-        });
+        }
+
+        return channel -> new RestCancellableNodeClient(client, request.getHttpChannel()).admin()
+            .indices()
+            .getAliases(getAliasesRequest, new RestResponseListener<>(channel) {
+                @Override
+                public RestResponse buildResponse(GetAliasesResponse response) throws Exception {
+                    Table tab = buildTable(request, response);
+                    return RestTable.buildResponse(tab, channel);
+                }
+            });
     }
 
     @Override
diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/alias/get/GetAliasesResponseTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/alias/get/GetAliasesResponseTests.java
index 420bde60bc168..6fde4bed97a17 100644
--- a/server/src/test/java/org/elasticsearch/action/admin/indices/alias/get/GetAliasesResponseTests.java
+++ b/server/src/test/java/org/elasticsearch/action/admin/indices/alias/get/GetAliasesResponseTests.java
@@ -10,7 +10,9 @@
 
 import org.elasticsearch.cluster.metadata.AliasMetadata;
 import org.elasticsearch.cluster.metadata.AliasMetadata.Builder;
+import org.elasticsearch.cluster.metadata.DataStreamAlias;
 import org.elasticsearch.cluster.metadata.DataStreamTestHelper;
+import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.Writeable;
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.test.AbstractWireSerializingTestCase;
@@ -29,9 +31,18 @@ protected GetAliasesResponse createTestInstance() {
         return createTestItem();
     }
 
+    /**
+     * NB prior to 8.12 get-aliases was a TransportMasterNodeReadAction so for BwC we must remain able to write these responses so that
+     * older nodes can read them until we no longer need to support {@link org.elasticsearch.TransportVersions#CLUSTER_FEATURES_ADDED} and
+     * earlier. The reader implementation below is the production implementation from earlier versions, but moved here because it is unused
+     * in production now.
+     */
     @Override
     protected Writeable.Reader<GetAliasesResponse> instanceReader() {
-        return GetAliasesResponse::new;
+        return in -> new GetAliasesResponse(
+            in.readImmutableOpenMap(StreamInput::readString, i -> i.readCollectionAsList(AliasMetadata::new)),
+            in.readMap(in1 -> in1.readCollectionAsList(DataStreamAlias::new))
+        );
     }
 
     @Override
diff --git a/server/src/test/java/org/elasticsearch/action/search/ParsedScrollIdTests.java b/server/src/test/java/org/elasticsearch/action/search/ParsedScrollIdTests.java
index 6130435b4b181..a92cfdb1d02be 100644
--- a/server/src/test/java/org/elasticsearch/action/search/ParsedScrollIdTests.java
+++ b/server/src/test/java/org/elasticsearch/action/search/ParsedScrollIdTests.java
@@ -26,7 +26,7 @@ public void testHasLocalIndices() {
                 new ShardSearchContextId(randomAlphaOfLength(8), randomLong())
             );
         }
-        final ParsedScrollId parsedScrollId = new ParsedScrollId(randomAlphaOfLength(8), randomAlphaOfLength(8), searchContextIdForNodes);
+        final ParsedScrollId parsedScrollId = new ParsedScrollId(randomAlphaOfLength(8), searchContextIdForNodes);
 
         assertEquals(hasLocal, parsedScrollId.hasLocalIndices());
     }
diff --git a/server/src/test/java/org/elasticsearch/action/search/SearchScrollAsyncActionTests.java b/server/src/test/java/org/elasticsearch/action/search/SearchScrollAsyncActionTests.java
index df33a5e18fce6..41e7a5c8ad1e1 100644
--- a/server/src/test/java/org/elasticsearch/action/search/SearchScrollAsyncActionTests.java
+++ b/server/src/test/java/org/elasticsearch/action/search/SearchScrollAsyncActionTests.java
@@ -458,7 +458,7 @@ protected void onFirstPhaseResult(int shardId, SearchAsyncActionTests.TestSearch
     private static ParsedScrollId getParsedScrollId(SearchContextIdForNode... idsForNodes) {
         List<SearchContextIdForNode> searchContextIdForNodes = Arrays.asList(idsForNodes);
         Collections.shuffle(searchContextIdForNodes, random());
-        return new ParsedScrollId("", "test", searchContextIdForNodes.toArray(new SearchContextIdForNode[0]));
+        return new ParsedScrollId("test", searchContextIdForNodes.toArray(new SearchContextIdForNode[0]));
     }
 
     private ActionListener<SearchResponse> dummyListener() {
diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java
index c8e6a011bc52e..b67b4ef7e5a7f 100644
--- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java
+++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java
@@ -53,6 +53,7 @@
 import org.elasticsearch.common.collect.ImmutableOpenMap;
 import org.elasticsearch.common.settings.ClusterSettings;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.util.Maps;
 import org.elasticsearch.gateway.GatewayAllocator;
 import org.elasticsearch.index.IndexVersion;
 import org.elasticsearch.index.shard.ShardId;
@@ -1247,44 +1248,68 @@ public void testRebalanceDoesNotCauseHotSpots() {
 
     public void testShouldLogOnTooManyUndesiredAllocations() {
 
-        var indexMetadata = IndexMetadata.builder("index-1").settings(indexSettings(IndexVersion.current(), 1, 0)).build();
-        final var index = indexMetadata.getIndex();
-        final var shardId = new ShardId(index, 0);
+        final int shardCount = 5;
+
+        final var dataNode1Assignments = Maps.<ShardId, ShardAssignment>newMapWithExpectedSize(shardCount);
+        final var dataNode2Assignments = Maps.<ShardId, ShardAssignment>newMapWithExpectedSize(shardCount);
+
+        final var metadataBuilder = Metadata.builder();
+        final var routingTableBuilder = RoutingTable.builder();
+        for (int i = 0; i < shardCount; i++) {
+            final var indexMetadata = IndexMetadata.builder("index-" + i).settings(indexSettings(IndexVersion.current(), 1, 0)).build();
+            final var index = indexMetadata.getIndex();
+            final var shardId = new ShardId(index, 0);
+            metadataBuilder.put(indexMetadata, false);
+            routingTableBuilder.add(IndexRoutingTable.builder(index).addShard(newShardRouting(shardId, "data-node-1", true, STARTED)));
+
+            dataNode1Assignments.put(shardId, new ShardAssignment(Set.of("data-node-1"), 1, 0, 0));
+            dataNode2Assignments.put(shardId, new ShardAssignment(Set.of("data-node-2"), 1, 0, 0));
+        }
 
         final var clusterState = ClusterState.builder(ClusterName.DEFAULT)
             .nodes(DiscoveryNodes.builder().add(newNode("data-node-1")).add(newNode("data-node-2")))
-            .metadata(Metadata.builder().put(indexMetadata, true))
-            .routingTable(
-                RoutingTable.builder()
-                    .add(IndexRoutingTable.builder(index).addShard(newShardRouting(shardId, "data-node-2", true, STARTED)))
-            )
+            .metadata(metadataBuilder)
+            .routingTable(routingTableBuilder)
             .build();
 
-        final var balance = new DesiredBalance(1, Map.of(shardId, new ShardAssignment(Set.of("data-node-1"), 1, 0, 0)));
-
         var threadPool = mock(ThreadPool.class);
-        when(threadPool.relativeTimeInMillis()).thenReturn(1L).thenReturn(2L);
+        when(threadPool.relativeTimeInMillis()).thenReturn(1L).thenReturn(2L).thenReturn(3L);
 
         var reconciler = new DesiredBalanceReconciler(createBuiltInClusterSettings(), threadPool);
 
+        var expectedWarningMessage = "[100%] of assigned shards ("
+            + shardCount
+            + "/"
+            + shardCount
+            + ") are not on their desired nodes, which exceeds the warn threshold of [10%]";
+        assertThatLogger(
+            () -> reconciler.reconcile(new DesiredBalance(1, dataNode1Assignments), createRoutingAllocationFrom(clusterState)),
+            DesiredBalanceReconciler.class,
+            new MockLogAppender.UnseenEventExpectation(
+                "Should not log if all shards on desired location",
+                DesiredBalanceReconciler.class.getCanonicalName(),
+                Level.WARN,
+                expectedWarningMessage
+            )
+        );
         assertThatLogger(
-            () -> reconciler.reconcile(balance, createRoutingAllocationFrom(clusterState)),
+            () -> reconciler.reconcile(new DesiredBalance(1, dataNode2Assignments), createRoutingAllocationFrom(clusterState)),
             DesiredBalanceReconciler.class,
             new MockLogAppender.SeenEventExpectation(
                 "Should log first too many shards on undesired locations",
                 DesiredBalanceReconciler.class.getCanonicalName(),
                 Level.WARN,
-                "[100%] of assigned shards (1/1) are not on their desired nodes, which exceeds the warn threshold of [10%]"
+                expectedWarningMessage
             )
         );
         assertThatLogger(
-            () -> reconciler.reconcile(balance, createRoutingAllocationFrom(clusterState)),
+            () -> reconciler.reconcile(new DesiredBalance(1, dataNode2Assignments), createRoutingAllocationFrom(clusterState)),
             DesiredBalanceReconciler.class,
             new MockLogAppender.UnseenEventExpectation(
                 "Should not log immediate second too many shards on undesired locations",
                 DesiredBalanceReconciler.class.getCanonicalName(),
                 Level.WARN,
-                "[100.0%] of assigned shards (1/1) are not on their desired nodes, which exceeds the warn threshold of [10.0%]"
+                expectedWarningMessage
             )
         );
     }
diff --git a/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java b/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java
index f099fa657b89c..b32e9f4db8b77 100644
--- a/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java
+++ b/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java
@@ -30,7 +30,6 @@
 import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM;
 import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO;
 import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.greaterThan;
 
 public class ShardGetServiceTests extends IndexShardTestCase {
 
@@ -241,7 +240,8 @@ public void testGetFromTranslog() throws IOException {
             .getFromTranslog("2", new String[] { "foo" }, true, 1, VersionType.INTERNAL, FetchSourceContext.FETCH_SOURCE, false);
         assertNull(getResult);
         var lastUnsafeGeneration = engine.getLastUnsafeSegmentGenerationForGets();
-        assertThat(lastUnsafeGeneration, greaterThan(0L));
+        // last unsafe generation is set to last committed gen after the refresh triggered by realtime get
+        assertThat(lastUnsafeGeneration, equalTo(engine.getLastCommittedSegmentInfos().getGeneration()));
         assertTrue(LiveVersionMapTestUtils.isSafeAccessRequired(map));
         assertFalse(LiveVersionMapTestUtils.isUnsafe(map));
 
@@ -250,7 +250,7 @@ public void testGetFromTranslog() throws IOException {
         engine.flush(true, true, flushFuture);
         var flushResult = flushFuture.actionGet();
         assertTrue(flushResult.flushPerformed());
-        assertThat(flushResult.generation(), equalTo(lastUnsafeGeneration));
+        assertThat(flushResult.generation(), equalTo(lastUnsafeGeneration + 1));
         assertThat(engine.getLastUnsafeSegmentGenerationForGets(), equalTo(lastUnsafeGeneration));
         // No longer in translog
         getResult = primary.getService()
diff --git a/test/framework/src/main/java/org/elasticsearch/action/admin/indices/recovery/TransportRecoveryActionHelper.java b/test/framework/src/main/java/org/elasticsearch/action/admin/indices/recovery/TransportRecoveryActionHelper.java
deleted file mode 100644
index 90786fb0e2915..0000000000000
--- a/test/framework/src/main/java/org/elasticsearch/action/admin/indices/recovery/TransportRecoveryActionHelper.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0 and the Server Side Public License, v 1; you may not use this file except
- * in compliance with, at your election, the Elastic License 2.0 or the Server
- * Side Public License, v 1.
- */
-
-package org.elasticsearch.action.admin.indices.recovery;
-
-/**
- * Helper methods for {@link TransportRecoveryAction}.
- */
-public class TransportRecoveryActionHelper {
-
-    /**
-     * Helper method for tests to call {@link TransportRecoveryAction#setOnShardOperation}.
-     */
-    public static void setOnShardOperation(TransportRecoveryAction transportRecoveryAction, Runnable setOnShardOperation) {
-        transportRecoveryAction.setOnShardOperation(setOnShardOperation);
-    }
-}
diff --git a/test/framework/src/main/java/org/elasticsearch/action/support/CancellableActionTestPlugin.java b/test/framework/src/main/java/org/elasticsearch/action/support/CancellableActionTestPlugin.java
new file mode 100644
index 0000000000000..115ea63fb243e
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/action/support/CancellableActionTestPlugin.java
@@ -0,0 +1,155 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.action.support;
+
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.action.ActionRequest;
+import org.elasticsearch.action.ActionResponse;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.plugins.ActionPlugin;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.plugins.PluginsService;
+import org.elasticsearch.tasks.CancellableTask;
+import org.elasticsearch.tasks.Task;
+import org.elasticsearch.tasks.TaskCancelledException;
+import org.elasticsearch.tasks.TaskManager;
+
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicReference;
+
+import static org.elasticsearch.ExceptionsHelper.unwrapCause;
+import static org.elasticsearch.action.support.ActionTestUtils.assertNoFailureListener;
+import static org.elasticsearch.test.ESIntegTestCase.internalCluster;
+import static org.elasticsearch.test.ESTestCase.asInstanceOf;
+import static org.elasticsearch.test.ESTestCase.randomInt;
+import static org.elasticsearch.test.ESTestCase.safeAwait;
+import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.instanceOf;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+/**
+ * Utility plugin that captures the invocation of an action on a node after the task has been registered with the {@link TaskManager},
+ * cancels it (e.g. by closing the connection used for the original REST request), verifies that the corresponding task is cancelled, then
+ * lets the action execution proceed in order to verify that it fails with a {@link TaskCancelledException}. This allows to verify a few key
+ * aspects of the cancellability of tasks:
+ * <ul>
+ *     <li>The task that the request creates is cancellable.</li>
+ *     <li>The REST handler propagates cancellation to the task it starts.</li>
+ *     <li>The action implementation checks for cancellation at least once.</li>
+ * </ul>
+ * However, note that this is implemented as an {@link ActionFilter} it blocks and cancels the action before it even starts executing on the
+ * local node, so it does not verify that the cancellation is processed promptly at all stages of the execution of the action, nor that
+ * cancellations are propagated correctly to subsidiary actions.
+ */
+public class CancellableActionTestPlugin extends Plugin implements ActionPlugin {
+
+    public interface CapturingAction extends Releasable {
+        /**
+         * @param doCancel callback to invoke when the specified action has started which should cancel the action.
+         */
+        void captureAndCancel(Runnable doCancel);
+    }
+
+    /**
+     * Returns a {@link CapturingAction}, typically for use in a try-with-resources block, which can be used to capture and cancel exactly
+     * one invocation of the specified action on the specified node.
+     */
+    public static CapturingAction capturingActionOnNode(String actionName, String nodeName) {
+        final var plugins = internalCluster().getInstance(PluginsService.class, nodeName)
+            .filterPlugins(CancellableActionTestPlugin.class)
+            .toList();
+        assertThat("unique " + CancellableActionTestPlugin.class.getCanonicalName() + " plugin not found", plugins, hasSize(1));
+        return plugins.get(0).capturingAction(actionName);
+    }
+
+    private volatile String capturedActionName;
+    private final AtomicReference<SubscribableListener<Captured>> capturedRef = new AtomicReference<>();
+
+    private record Captured(Runnable doCancel, CountDownLatch countDownLatch) {}
+
+    private CapturingAction capturingAction(String actionName) {
+        final var captureListener = new SubscribableListener<Captured>();
+        capturedActionName = actionName;
+        assertTrue(capturedRef.compareAndSet(null, captureListener));
+
+        final var completionLatch = new CountDownLatch(1);
+
+        return new CapturingAction() {
+            @Override
+            public void captureAndCancel(Runnable doCancel) {
+                assertFalse(captureListener.isDone());
+                captureListener.onResponse(new Captured(doCancel, completionLatch));
+                safeAwait(completionLatch);
+            }
+
+            @Override
+            public void close() {
+                // verify that a request was indeed captured
+                assertNull(capturedRef.get());
+                // and that it completed
+                assertEquals(0, completionLatch.getCount());
+            }
+        };
+    }
+
+    @Override
+    public List<ActionFilter> getActionFilters() {
+        return List.of(new ActionFilter() {
+
+            private final int order = randomInt();
+
+            @Override
+            public int order() {
+                return order;
+            }
+
+            @Override
+            public <Request extends ActionRequest, Response extends ActionResponse> void apply(
+                Task task,
+                String action,
+                Request request,
+                ActionListener<Response> listener,
+                ActionFilterChain<Request, Response> chain
+            ) {
+                if (action.equals(capturedActionName)) {
+                    final var capturingListener = capturedRef.getAndSet(null);
+                    if (capturingListener != null) {
+                        final var cancellableTask = asInstanceOf(CancellableTask.class, task);
+                        capturingListener.addListener(assertNoFailureListener(captured -> {
+                            cancellableTask.addListener(() -> chain.proceed(task, action, request, new ActionListener<>() {
+                                @Override
+                                public void onResponse(Response response) {
+                                    fail("cancelled action should not succeed, but got " + response);
+                                }
+
+                                @Override
+                                public void onFailure(Exception e) {
+                                    assertThat(unwrapCause(e), instanceOf(TaskCancelledException.class));
+                                    listener.onFailure(e);
+                                    captured.countDownLatch().countDown();
+                                }
+                            }));
+                            assertFalse(cancellableTask.isCancelled());
+                            captured.doCancel().run();
+                        }));
+                        return;
+                    }
+                }
+
+                chain.proceed(task, action, request, listener);
+            }
+        });
+    }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java
index 832902f52deb2..c75a52a82caf1 100644
--- a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java
+++ b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java
@@ -2436,6 +2436,10 @@ protected static RestClient createRestClient() {
         return createRestClient(null, "http");
     }
 
+    protected static RestClient createRestClient(String node) {
+        return createRestClient(client(node).admin().cluster().prepareNodesInfo("_local").get().getNodes(), null, "http");
+    }
+
     protected static RestClient createRestClient(RestClientBuilder.HttpClientConfigCallback httpClientConfigCallback, String protocol) {
         NodesInfoResponse nodesInfoResponse = clusterAdmin().prepareNodesInfo().get();
         assertFalse(nodesInfoResponse.hasFailures());
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/assignment/TrainedModelAssignment.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/assignment/TrainedModelAssignment.java
index f69be31939b32..d27d325a5c596 100644
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/assignment/TrainedModelAssignment.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/assignment/TrainedModelAssignment.java
@@ -9,6 +9,7 @@
 
 import org.elasticsearch.ResourceAlreadyExistsException;
 import org.elasticsearch.ResourceNotFoundException;
+import org.elasticsearch.TransportVersion;
 import org.elasticsearch.TransportVersions;
 import org.elasticsearch.cluster.SimpleDiffable;
 import org.elasticsearch.common.Randomness;
@@ -96,6 +97,10 @@ public final class TrainedModelAssignment implements SimpleDiffable<TrainedModel
     private final Instant startTime;
     private final int maxAssignedAllocations;
 
+    public static boolean useNewMemoryFields(TransportVersion minClusterVersion) {
+        return minClusterVersion.onOrAfter(TransportVersions.V_8_500_064);
+    }
+
     public static TrainedModelAssignment fromXContent(XContentParser parser) throws IOException {
         return PARSER.apply(parser, null);
     }
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec
index 02d530a2ae835..dbf76033fbe79 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec
@@ -278,7 +278,7 @@ docsWhereFunction
 // tag::whereFunction[]
 FROM employees
 | KEEP first_name, last_name, height
-| WHERE length(first_name) < 4
+| WHERE LENGTH(first_name) < 4
 // end::whereFunction[]
 | SORT first_name
 ;
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/show.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/show.csv-spec
index 117bb9646bc5d..0b45f9ac5aea4 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/show.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/show.csv-spec
@@ -184,10 +184,16 @@ synopsis:keyword
 
 
 showFunctionsFiltered
-show functions | where starts_with(name, "is_");
+// tag::showFunctionsFiltered[]
+SHOW functions 
+| WHERE STARTS_WITH(name, "is_")
+// end::showFunctionsFiltered[]
+;
 
+// tag::showFunctionsFiltered-result[]
        name:keyword      |                        synopsis:keyword                |       argNames:keyword  | argTypes:keyword |             argDescriptions:keyword                |  returnType:keyword   |  description:keyword  |   optionalArgs:boolean |  variadic:boolean
 is_finite                |? is_finite(arg1:?)                                     |arg1                     |?                 |  ""                                                  |?              | ""                      | false                | false
 is_infinite              |? is_infinite(arg1:?)                                   |arg1                     |?                 |  ""                                                  |?              | ""                      | false                | false
 is_nan                   |? is_nan(arg1:?)                                        |arg1                     |?                 |  ""                                                  |?              | ""                      | false                | false
+// end::showFunctionsFiltered-result[]
 ;
diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/ScheduledEventsIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/ScheduledEventsIT.java
index 45d1e57a52f46..ca43638e5d038 100644
--- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/ScheduledEventsIT.java
+++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/ScheduledEventsIT.java
@@ -45,7 +45,6 @@ public void cleanUpTest() {
         cleanUp();
     }
 
-    @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/95096")
     public void testScheduledEvents() throws IOException {
 
         TimeValue bucketSpan = TimeValue.timeValueMinutes(30);
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportGetTrainedModelsStatsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportGetTrainedModelsStatsAction.java
index 84372a111c0bd..1c535326b3296 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportGetTrainedModelsStatsAction.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportGetTrainedModelsStatsAction.java
@@ -45,9 +45,11 @@
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig;
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelType;
 import org.elasticsearch.xpack.core.ml.inference.assignment.AssignmentStats;
+import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignment;
 import org.elasticsearch.xpack.core.ml.inference.persistence.InferenceIndexConstants;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceStats;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TrainedModelSizeStats;
+import org.elasticsearch.xpack.core.ml.utils.TransportVersionUtils;
 import org.elasticsearch.xpack.ml.inference.ModelAliasMetadata;
 import org.elasticsearch.xpack.ml.inference.assignment.TrainedModelAssignmentMetadata;
 import org.elasticsearch.xpack.ml.inference.persistence.TrainedModelDefinitionDoc;
@@ -296,29 +298,23 @@ private void modelSizeStats(
                 for (TrainedModelConfig model : models) {
                     if (model.getModelType() == TrainedModelType.PYTORCH) {
                         long totalDefinitionLength = pytorchTotalDefinitionLengthsByModelId.getOrDefault(model.getModelId(), 0L);
+                        // We ensure that in the mixed cluster state trained model stats uses the same values for memory estimation
+                        // as the rebalancer.
+                        boolean useNewMemoryFields = TrainedModelAssignment.useNewMemoryFields(
+                            TransportVersionUtils.getMinTransportVersion(clusterService.state())
+                        );
                         long estimatedMemoryUsageBytes = totalDefinitionLength > 0L
                             ? StartTrainedModelDeploymentAction.estimateMemoryUsageBytes(
                                 model.getModelId(),
                                 totalDefinitionLength,
-                                model.getPerDeploymentMemoryBytes(),
-                                model.getPerAllocationMemoryBytes(),
+                                useNewMemoryFields ? model.getPerDeploymentMemoryBytes() : 0,
+                                useNewMemoryFields ? model.getPerAllocationMemoryBytes() : 0,
                                 numberOfAllocations
                             )
                             : 0L;
                         modelSizeStatsByModelId.put(
                             model.getModelId(),
-                            new TrainedModelSizeStats(
-                                totalDefinitionLength,
-                                totalDefinitionLength > 0L
-                                    ? StartTrainedModelDeploymentAction.estimateMemoryUsageBytes(
-                                        model.getModelId(),
-                                        totalDefinitionLength,
-                                        model.getPerDeploymentMemoryBytes(),
-                                        model.getPerAllocationMemoryBytes(),
-                                        numberOfAllocations
-                                    )
-                                    : 0L
-                            )
+                            new TrainedModelSizeStats(totalDefinitionLength, estimatedMemoryUsageBytes)
                         );
                     } else {
                         modelSizeStatsByModelId.put(model.getModelId(), new TrainedModelSizeStats(model.getModelSize(), 0));
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentClusterService.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentClusterService.java
index 2caf338d2a3c7..fe4462d6556ee 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentClusterService.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentClusterService.java
@@ -47,6 +47,7 @@
 import org.elasticsearch.xpack.core.ml.job.messages.Messages;
 import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
 import org.elasticsearch.xpack.core.ml.utils.MlPlatformArchitecturesUtil;
+import org.elasticsearch.xpack.core.ml.utils.TransportVersionUtils;
 import org.elasticsearch.xpack.ml.MachineLearning;
 import org.elasticsearch.xpack.ml.autoscaling.NodeAvailabilityZoneMapper;
 import org.elasticsearch.xpack.ml.inference.assignment.planning.AllocationReducer;
@@ -76,6 +77,8 @@ public class TrainedModelAssignmentClusterService implements ClusterStateListene
     private static final TransportVersion RENAME_ALLOCATION_TO_ASSIGNMENT_TRANSPORT_VERSION = TransportVersions.V_8_3_0;
     public static final TransportVersion DISTRIBUTED_MODEL_ALLOCATION_TRANSPORT_VERSION = TransportVersions.V_8_4_0;
 
+    private static final TransportVersion NEW_ALLOCATION_MEMORY_VERSION = TransportVersions.V_8_500_064;
+
     private final ClusterService clusterService;
     private final ThreadPool threadPool;
     private final NodeLoadDetector nodeLoadDetector;
@@ -644,12 +647,14 @@ private TrainedModelAssignmentMetadata.Builder rebalanceAssignments(
         Map<DiscoveryNode, NodeLoad> nodeLoads = detectNodeLoads(nodes, currentState);
         TrainedModelAssignmentMetadata currentMetadata = TrainedModelAssignmentMetadata.fromState(currentState);
 
+        boolean useNewMemoryFields = TrainedModelAssignment.useNewMemoryFields(TransportVersionUtils.getMinTransportVersion(currentState));
         TrainedModelAssignmentRebalancer rebalancer = new TrainedModelAssignmentRebalancer(
             currentMetadata,
             nodeLoads,
             nodeAvailabilityZoneMapper.buildMlNodesByAvailabilityZone(currentState),
             modelToAdd,
-            allocatedProcessorsScale
+            allocatedProcessorsScale,
+            useNewMemoryFields
         );
 
         Set<String> shuttingDownNodeIds = currentState.metadata().nodeShutdowns().getAllNodeIds();
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java
index e1241dc8a93c3..6e6b447fcea3d 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java
@@ -52,18 +52,22 @@ class TrainedModelAssignmentRebalancer {
     private final Optional<StartTrainedModelDeploymentAction.TaskParams> deploymentToAdd;
     private final int allocatedProcessorsScale;
 
+    private final boolean useNewMemoryFields;
+
     TrainedModelAssignmentRebalancer(
         TrainedModelAssignmentMetadata currentMetadata,
         Map<DiscoveryNode, NodeLoad> nodeLoads,
         Map<List<String>, Collection<DiscoveryNode>> mlNodesByZone,
         Optional<StartTrainedModelDeploymentAction.TaskParams> deploymentToAdd,
-        int allocatedProcessorsScale
+        int allocatedProcessorsScale,
+        boolean useNewMemoryFields
     ) {
         this.currentMetadata = Objects.requireNonNull(currentMetadata);
         this.nodeLoads = Objects.requireNonNull(nodeLoads);
         this.mlNodesByZone = Objects.requireNonNull(mlNodesByZone);
         this.deploymentToAdd = Objects.requireNonNull(deploymentToAdd);
         this.allocatedProcessorsScale = allocatedProcessorsScale;
+        this.useNewMemoryFields = useNewMemoryFields;
     }
 
     TrainedModelAssignmentMetadata.Builder rebalance() {
@@ -138,9 +142,11 @@ private static void copyAssignments(
                 AssignmentPlan.Node originalNode = originalNodeById.get(assignment.getKey().id());
                 dest.assignModelToNode(m, originalNode, assignment.getValue());
                 if (m.currentAllocationsByNodeId().containsKey(originalNode.id())) {
+                    // TODO (#101612) requiredMemory should be calculated by the AssignmentPlan.Builder
                     // As the node has all its available memory we need to manually account memory of models with
                     // current allocations.
-                    dest.accountMemory(m, originalNode);
+                    long requiredMemory = m.estimateMemoryUsageBytes(m.currentAllocationsByNodeId().get(originalNode.id()));
+                    dest.accountMemory(m, originalNode, requiredMemory);
                 }
             }
         }
@@ -168,11 +174,14 @@ private AssignmentPlan computePlanForNormalPriorityModels(
                     .collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().getTargetAllocations()));
                 return new AssignmentPlan.Deployment(
                     assignment.getDeploymentId(),
-                    assignment.getTaskParams().estimateMemoryUsageBytes(),
+                    assignment.getTaskParams().getModelBytes(),
                     assignment.getTaskParams().getNumberOfAllocations(),
                     assignment.getTaskParams().getThreadsPerAllocation(),
                     currentAssignments,
-                    assignment.getMaxAssignedAllocations()
+                    assignment.getMaxAssignedAllocations(),
+                    // in the mixed cluster state use old memory fields to avoid unstable assignment plans
+                    useNewMemoryFields ? assignment.getTaskParams().getPerDeploymentMemoryBytes() : 0,
+                    useNewMemoryFields ? assignment.getTaskParams().getPerAllocationMemoryBytes() : 0
                 );
             })
             .forEach(planDeployments::add);
@@ -181,11 +190,14 @@ private AssignmentPlan computePlanForNormalPriorityModels(
             planDeployments.add(
                 new AssignmentPlan.Deployment(
                     taskParams.getDeploymentId(),
-                    taskParams.estimateMemoryUsageBytes(),
+                    taskParams.getModelBytes(),
                     taskParams.getNumberOfAllocations(),
                     taskParams.getThreadsPerAllocation(),
                     Map.of(),
-                    0
+                    0,
+                    // in the mixed cluster state use old memory fields to avoid unstable assignment plans
+                    useNewMemoryFields ? taskParams.getPerDeploymentMemoryBytes() : 0,
+                    useNewMemoryFields ? taskParams.getPerAllocationMemoryBytes() : 0
                 )
             );
         }
@@ -217,12 +229,14 @@ private AssignmentPlan computePlanForLowPriorityModels(Set<String> assignableNod
             .map(
                 assignment -> new AssignmentPlan.Deployment(
                     assignment.getDeploymentId(),
-                    assignment.getTaskParams().estimateMemoryUsageBytes(),
+                    assignment.getTaskParams().getModelBytes(),
                     assignment.getTaskParams().getNumberOfAllocations(),
                     assignment.getTaskParams().getThreadsPerAllocation(),
                     findFittingAssignments(assignment, assignableNodeIds, remainingNodeMemory),
                     assignment.getMaxAssignedAllocations(),
-                    Priority.LOW
+                    Priority.LOW,
+                    (useNewMemoryFields == false) ? assignment.getTaskParams().getPerDeploymentMemoryBytes() : 0,
+                    (useNewMemoryFields == false) ? assignment.getTaskParams().getPerAllocationMemoryBytes() : 0
                 )
             )
             .forEach(planDeployments::add);
@@ -231,12 +245,14 @@ private AssignmentPlan computePlanForLowPriorityModels(Set<String> assignableNod
             planDeployments.add(
                 new AssignmentPlan.Deployment(
                     taskParams.getDeploymentId(),
-                    taskParams.estimateMemoryUsageBytes(),
+                    taskParams.getModelBytes(),
                     taskParams.getNumberOfAllocations(),
                     taskParams.getThreadsPerAllocation(),
                     Map.of(),
                     0,
-                    Priority.LOW
+                    Priority.LOW,
+                    (useNewMemoryFields == false) ? taskParams.getPerDeploymentMemoryBytes() : 0,
+                    (useNewMemoryFields == false) ? taskParams.getPerAllocationMemoryBytes() : 0
                 )
             );
         }
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AbstractPreserveAllocations.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AbstractPreserveAllocations.java
index 4843cc43d1187..026b433a8c2d4 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AbstractPreserveAllocations.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AbstractPreserveAllocations.java
@@ -35,7 +35,8 @@ private Node modifyNodePreservingAllocations(Node n) {
         int coresUsed = 0;
         for (Deployment m : deployments) {
             if (m.currentAllocationsByNodeId().containsKey(n.id())) {
-                bytesUsed += m.memoryBytes();
+                int allocations = m.currentAllocationsByNodeId().get(n.id());
+                bytesUsed += m.estimateMemoryUsageBytes(allocations);
                 coresUsed += calculateUsedCores(n, m);
             }
         }
@@ -58,7 +59,9 @@ Deployment modifyModelPreservingPreviousAssignments(Deployment m) {
             m.allocations() - calculatePreservedAllocations(m),
             m.threadsPerAllocation(),
             calculateAllocationsPerNodeToPreserve(m),
-            m.maxAssignedAllocations()
+            m.maxAssignedAllocations(),
+            m.perDeploymentMemoryBytes(),
+            m.perAllocationMemoryBytes()
         );
     }
 
@@ -67,28 +70,37 @@ AssignmentPlan mergePreservedAllocations(AssignmentPlan assignmentPlan) {
         // they will not match the models/nodes members we have in this class.
         // Therefore, we build a lookup table based on the ids so we can merge the plan
         // with its preserved allocations.
-        final Map<Tuple<String, String>, Integer> assignmentsByModelNodeIdPair = new HashMap<>();
+        final Map<Tuple<String, String>, Integer> plannedAssignmentsByModelNodeIdPair = new HashMap<>();
         for (Deployment m : assignmentPlan.models()) {
             Map<Node, Integer> assignments = assignmentPlan.assignments(m).orElse(Map.of());
             for (Map.Entry<Node, Integer> nodeAssignment : assignments.entrySet()) {
-                assignmentsByModelNodeIdPair.put(Tuple.tuple(m.id(), nodeAssignment.getKey().id()), nodeAssignment.getValue());
+                plannedAssignmentsByModelNodeIdPair.put(Tuple.tuple(m.id(), nodeAssignment.getKey().id()), nodeAssignment.getValue());
             }
         }
 
         AssignmentPlan.Builder mergedPlanBuilder = AssignmentPlan.builder(nodes, deployments);
-        for (Deployment m : deployments) {
-            for (Node n : nodes) {
-                int allocations = assignmentsByModelNodeIdPair.getOrDefault(Tuple.tuple(m.id(), n.id()), 0);
-                if (m.currentAllocationsByNodeId().containsKey(n.id())) {
-                    if (mergedPlanBuilder.getRemainingMemory(n) >= m.memoryBytes()) {
-                        allocations += addPreservedAllocations(n, m);
-                        // As the node has all its available memory we need to manually account memory of models with
-                        // current allocations.
-                        mergedPlanBuilder.accountMemory(m, n);
+        for (Node n : nodes) {
+            // TODO (#101612) Should the first loop happen in the builder constructor?
+            for (Deployment deploymentAllocationsToPreserve : deployments) {
+
+                // if the model m is already allocated on the node n and I want to preserve this allocation
+                int preservedAllocations = addPreservedAllocations(n, deploymentAllocationsToPreserve);
+                if (preservedAllocations > 0) {
+                    long requiredMemory = deploymentAllocationsToPreserve.estimateMemoryUsageBytes(preservedAllocations);
+                    if (mergedPlanBuilder.canAssign(deploymentAllocationsToPreserve, n, preservedAllocations, requiredMemory)) {
+                        mergedPlanBuilder.assignModelToNode(deploymentAllocationsToPreserve, n, preservedAllocations, requiredMemory);
                     }
                 }
-                if (allocations > 0) {
-                    mergedPlanBuilder.assignModelToNode(m, n, allocations);
+            }
+            for (Deployment deploymentNewAllocations : deployments) {
+                int newAllocations = plannedAssignmentsByModelNodeIdPair.getOrDefault(
+                    Tuple.tuple(deploymentNewAllocations.id(), n.id()),
+                    0
+                );
+
+                long requiredMemory = mergedPlanBuilder.getDeploymentMemoryRequirement(deploymentNewAllocations, n, newAllocations);
+                if (newAllocations > 0 && mergedPlanBuilder.canAssign(deploymentNewAllocations, n, newAllocations, requiredMemory)) {
+                    mergedPlanBuilder.assignModelToNode(deploymentNewAllocations, n, newAllocations);
                 }
             }
         }
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java
index 72a83d7579463..1dce7f0bb46ba 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java
@@ -10,6 +10,7 @@
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.common.util.Maps;
 import org.elasticsearch.core.Tuple;
+import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction;
 import org.elasticsearch.xpack.core.ml.inference.assignment.Priority;
 
 import java.util.ArrayList;
@@ -36,18 +37,32 @@ public record Deployment(
         int threadsPerAllocation,
         Map<String, Integer> currentAllocationsByNodeId,
         int maxAssignedAllocations,
-        Priority priority
+        Priority priority,
+        long perDeploymentMemoryBytes,
+        long perAllocationMemoryBytes
     ) {
 
         public Deployment(
             String id,
-            long memoryBytes,
+            long modelBytes,
             int allocations,
             int threadsPerAllocation,
             Map<String, Integer> currentAllocationsByNodeId,
-            int maxAssignedAllocations
+            int maxAssignedAllocations,
+            long perDeploymentMemoryBytes,
+            long perAllocationMemoryBytes
         ) {
-            this(id, memoryBytes, allocations, threadsPerAllocation, currentAllocationsByNodeId, maxAssignedAllocations, Priority.NORMAL);
+            this(
+                id,
+                modelBytes,
+                allocations,
+                threadsPerAllocation,
+                currentAllocationsByNodeId,
+                maxAssignedAllocations,
+                Priority.NORMAL,
+                perDeploymentMemoryBytes,
+                perAllocationMemoryBytes
+            );
         }
 
         int getCurrentAssignedAllocations() {
@@ -58,6 +73,60 @@ boolean hasEverBeenAllocated() {
             return maxAssignedAllocations > 0;
         }
 
+        public long estimateMemoryUsageBytes(int allocations) {
+            return StartTrainedModelDeploymentAction.estimateMemoryUsageBytes(
+                id,
+                memoryBytes,
+                perDeploymentMemoryBytes,
+                perAllocationMemoryBytes,
+                allocations
+            );
+        }
+
+        long estimateAdditionalMemoryUsageBytes(int allocationsOld, int allocationsNew) {
+            return StartTrainedModelDeploymentAction.estimateMemoryUsageBytes(
+                id,
+                memoryBytes,
+                perDeploymentMemoryBytes,
+                perAllocationMemoryBytes,
+                allocationsNew
+            ) - StartTrainedModelDeploymentAction.estimateMemoryUsageBytes(
+                id,
+                memoryBytes,
+                perDeploymentMemoryBytes,
+                perAllocationMemoryBytes,
+                allocationsOld
+            );
+
+        }
+
+        long minimumMemoryRequiredBytes() {
+            return StartTrainedModelDeploymentAction.estimateMemoryUsageBytes(
+                id,
+                memoryBytes,
+                perDeploymentMemoryBytes,
+                perAllocationMemoryBytes,
+                1
+            );
+        }
+
+        int findOptimalAllocations(int maxAllocations, long availableMemoryBytes) {
+            if (perDeploymentMemoryBytes > 0 && perAllocationMemoryBytes > 0) {
+                return (int) Math.max(
+                    Math.min(maxAllocations, Math.floorDiv(availableMemoryBytes - estimateMemoryUsageBytes(0), perAllocationMemoryBytes)),
+                    0
+                );
+            }
+            return maxAllocations;
+        }
+
+        int findExcessAllocations(int maxAllocations, long availableMemoryBytes) {
+            if (perDeploymentMemoryBytes > 0 && perAllocationMemoryBytes > 0) {
+                return (int) Math.min(maxAllocations, Math.floorDiv(availableMemoryBytes, perAllocationMemoryBytes));
+            }
+            return maxAllocations;
+        }
+
         @Override
         public String toString() {
             return id
@@ -71,6 +140,8 @@ public String toString() {
                 + currentAllocationsByNodeId
                 + ") (max_assigned_allocations = "
                 + maxAssignedAllocations
+                + ") (memory_usage = "
+                + ByteSizeValue.ofBytes(estimateMemoryUsageBytes(allocations))
                 + ")";
         }
     };
@@ -304,19 +375,42 @@ int getRemainingAllocations(Deployment m) {
         }
 
         boolean canAssign(Deployment deployment, Node node, int allocations) {
-            return (isAlreadyAssigned(deployment, node)
-                || (deployment.memoryBytes() <= remainingNodeMemory.get(node))
-                    && (deployment.priority == Priority.LOW
-                        || allocations * deployment.threadsPerAllocation() <= remainingNodeCores.get(node)));
+            long requiredMemory = getDeploymentMemoryRequirement(deployment, node, allocations);
+            return canAssign(deployment, node, allocations, requiredMemory);
+        }
+
+        boolean canAssign(Deployment deployment, Node node, int allocations, long requiredMemory) {
+            return (requiredMemory <= remainingNodeMemory.get(node))
+                && (deployment.priority == Priority.LOW || allocations * deployment.threadsPerAllocation() <= remainingNodeCores.get(node));
+        }
+
+        public long getDeploymentMemoryRequirement(Deployment deployment, Node node, int newAllocations) {
+            int assignedAllocations = getAssignedAllocations(deployment, node);
+
+            if (assignedAllocations > 0) {
+                return deployment.estimateAdditionalMemoryUsageBytes(assignedAllocations, assignedAllocations + newAllocations);
+            }
+            return deployment.estimateMemoryUsageBytes(newAllocations);
         }
 
         public Builder assignModelToNode(Deployment deployment, Node node, int allocations) {
+            return assignModelToNode(deployment, node, allocations, getDeploymentMemoryRequirement(deployment, node, allocations));
+        }
+
+        public Builder assignModelToNode(Deployment deployment, Node node, int allocations, long requiredMemory) {
             if (allocations <= 0) {
                 return this;
             }
-            if (isAlreadyAssigned(deployment, node) == false && deployment.memoryBytes() > remainingNodeMemory.get(node)) {
+            if (/*isAlreadyAssigned(deployment, node) == false
+                &&*/ requiredMemory > remainingNodeMemory.get(node)) {
                 throw new IllegalArgumentException(
-                    "not enough memory on node [" + node.id() + "] to assign model [" + deployment.id() + "]"
+                    "not enough memory on node ["
+                        + node.id()
+                        + "] to assign ["
+                        + allocations
+                        + "] allocations to deployment ["
+                        + deployment.id()
+                        + "]"
                 );
             }
             if (deployment.priority == Priority.NORMAL && allocations * deployment.threadsPerAllocation() > remainingNodeCores.get(node)) {
@@ -333,9 +427,9 @@ public Builder assignModelToNode(Deployment deployment, Node node, int allocatio
                 );
             }
 
-            long additionalModelMemory = isAlreadyAssigned(deployment, node) ? 0 : deployment.memoryBytes;
             assignments.get(deployment).compute(node, (n, remAllocations) -> remAllocations + allocations);
-            remainingNodeMemory.compute(node, (n, remMemory) -> remMemory - additionalModelMemory);
+            accountMemory(deployment, node, requiredMemory);
+
             if (deployment.priority == Priority.NORMAL) {
                 remainingNodeCores.compute(node, (n, remCores) -> remCores - allocations * deployment.threadsPerAllocation());
             }
@@ -347,9 +441,26 @@ private boolean isAlreadyAssigned(Deployment deployment, Node node) {
             return deployment.currentAllocationsByNodeId().containsKey(node.id()) || assignments.get(deployment).get(node) > 0;
         }
 
+        private int getAssignedAllocations(Deployment deployment, Node node) {
+            int currentAllocations = getCurrentAllocations(deployment, node);
+            int assignmentAllocations = assignments.get(deployment).get(node);
+            return currentAllocations + assignmentAllocations;
+        }
+
+        private static int getCurrentAllocations(Deployment m, Node n) {
+            return m.currentAllocationsByNodeId.containsKey(n.id()) ? m.currentAllocationsByNodeId.get(n.id()) : 0;
+        }
+
         public void accountMemory(Deployment m, Node n) {
-            remainingNodeMemory.computeIfPresent(n, (k, v) -> v - m.memoryBytes());
-            if (remainingNodeMemory.get(n) < 0) {
+            // TODO (#101612) remove or refactor unused method
+            long requiredMemory = getDeploymentMemoryRequirement(m, n, getCurrentAllocations(m, n));
+            accountMemory(m, n, requiredMemory);
+        }
+
+        public void accountMemory(Deployment m, Node n, long requiredMemory) {
+            // TODO (#101612) computation of required memory should be done internally
+            remainingNodeMemory.computeIfPresent(n, (k, v) -> v - requiredMemory);
+            if (remainingNodeMemory.containsKey(n) && remainingNodeMemory.get(n) < 0) {
                 throw new IllegalArgumentException("not enough memory on node [" + n.id() + "] to assign model [" + m.id() + "]");
             }
         }
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanner.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanner.java
index 73b713cced32a..b1c017b1a784c 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanner.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanner.java
@@ -115,8 +115,11 @@ private AssignmentPlan solveAllocatingAtLeastOnceModelsThatWerePreviouslyAllocat
                     m.memoryBytes(),
                     1,
                     m.threadsPerAllocation(),
-                    m.currentAllocationsByNodeId(),
-                    m.maxAssignedAllocations()
+                    // don't rely on the current allocation
+                    new HashMap<>(),
+                    m.maxAssignedAllocations(),
+                    m.perDeploymentMemoryBytes(),
+                    m.perAllocationMemoryBytes()
                 )
             )
             .toList();
@@ -145,7 +148,9 @@ private AssignmentPlan solveAllocatingAtLeastOnceModelsThatWerePreviouslyAllocat
                 m.allocations(),
                 m.threadsPerAllocation(),
                 currentAllocationsByNodeId,
-                m.maxAssignedAllocations()
+                m.maxAssignedAllocations(),
+                m.perDeploymentMemoryBytes(),
+                m.perAllocationMemoryBytes()
             );
         }).toList();
 
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/LinearProgrammingPlanSolver.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/LinearProgrammingPlanSolver.java
index 90c5a2257d94d..bd97680e285cc 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/LinearProgrammingPlanSolver.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/LinearProgrammingPlanSolver.java
@@ -68,6 +68,8 @@ class LinearProgrammingPlanSolver {
     private final Map<Node, Double> normalizedMemoryPerNode;
     private final Map<Node, Integer> coresPerNode;
     private final Map<AssignmentPlan.Deployment, Double> normalizedMemoryPerModel;
+    private final Map<AssignmentPlan.Deployment, Double> normalizedMemoryPerAllocation;
+    private final Map<AssignmentPlan.Deployment, Double> normalizedMinimumDeploymentMemoryRequired;
 
     private final int maxNodeCores;
     private final long maxModelMemoryBytes;
@@ -84,12 +86,17 @@ class LinearProgrammingPlanSolver {
             .filter(m -> m.threadsPerAllocation() <= maxNodeCores)
             .toList();
 
-        maxModelMemoryBytes = this.deployments.stream().map(AssignmentPlan.Deployment::memoryBytes).max(Long::compareTo).orElse(1L);
+        // We use the maximum memory to deploy a model with one allocation as the normalization factor.
+        maxModelMemoryBytes = this.deployments.stream().map(m -> m.minimumMemoryRequiredBytes()).max(Long::compareTo).orElse(1L);
         normalizedMemoryPerNode = this.nodes.stream()
             .collect(Collectors.toMap(Function.identity(), n -> n.availableMemoryBytes() / (double) maxModelMemoryBytes));
         coresPerNode = this.nodes.stream().collect(Collectors.toMap(Function.identity(), Node::cores));
         normalizedMemoryPerModel = this.deployments.stream()
-            .collect(Collectors.toMap(Function.identity(), m -> m.memoryBytes() / (double) maxModelMemoryBytes));
+            .collect(Collectors.toMap(Function.identity(), m -> m.estimateMemoryUsageBytes(0) / (double) maxModelMemoryBytes));
+        normalizedMemoryPerAllocation = this.deployments.stream()
+            .collect(Collectors.toMap(Function.identity(), m -> m.perAllocationMemoryBytes() / (double) maxModelMemoryBytes));
+        normalizedMinimumDeploymentMemoryRequired = this.deployments.stream()
+            .collect(Collectors.toMap(Function.identity(), m -> m.minimumMemoryRequiredBytes() / (double) maxModelMemoryBytes));
     }
 
     AssignmentPlan solvePlan(boolean useBinPackingOnly) {
@@ -133,8 +140,8 @@ private double weightForAllocationVar(
         Node n,
         Map<Tuple<AssignmentPlan.Deployment, Node>, Double> weights
     ) {
-        return (1 + weights.get(Tuple.tuple(m, n)) - (m.memoryBytes() > n.availableMemoryBytes() ? 10 : 0)) - L1 * normalizedMemoryPerModel
-            .get(m) / maxNodeCores;
+        return (1 + weights.get(Tuple.tuple(m, n)) - (m.minimumMemoryRequiredBytes() > n.availableMemoryBytes() ? 10 : 0)) - L1
+            * normalizedMemoryPerModel.get(m) / maxNodeCores;
     }
 
     private Tuple<Map<Tuple<Deployment, Node>, Double>, AssignmentPlan> calculateWeightsAndBinPackingPlan() {
@@ -156,9 +163,9 @@ private Tuple<Map<Tuple<Deployment, Node>, Double>, AssignmentPlan> calculateWei
                     .sorted(Comparator.comparingDouble(n -> descendingSizeAnyFitsNodeOrder(n, m, assignmentPlan)))
                     .toList();
                 for (Node n : orderedNodes) {
-                    int allocations = Math.min(
-                        assignmentPlan.getRemainingCores(n) / m.threadsPerAllocation(),
-                        assignmentPlan.getRemainingAllocations(m)
+                    int allocations = m.findOptimalAllocations(
+                        Math.min(assignmentPlan.getRemainingCores(n) / m.threadsPerAllocation(), assignmentPlan.getRemainingAllocations(m)),
+                        assignmentPlan.getRemainingMemory(n)
                     );
                     if (allocations > 0 && assignmentPlan.canAssign(m, n, allocations)) {
                         assignmentPlan.assignModelToNode(m, n, allocations);
@@ -185,7 +192,8 @@ private Tuple<Map<Tuple<Deployment, Node>, Double>, AssignmentPlan> calculateWei
     }
 
     private double descendingSizeAnyFitsModelOrder(AssignmentPlan.Deployment m) {
-        return (m.currentAllocationsByNodeId().isEmpty() ? 1 : 2) * -normalizedMemoryPerModel.get(m) * m.threadsPerAllocation();
+        return (m.currentAllocationsByNodeId().isEmpty() ? 1 : 2) * -normalizedMinimumDeploymentMemoryRequired.get(m) * m
+            .threadsPerAllocation();
     }
 
     private double descendingSizeAnyFitsNodeOrder(Node n, AssignmentPlan.Deployment m, AssignmentPlan.Builder assignmentPlan) {
@@ -307,7 +315,10 @@ private boolean solveLinearProgram(
             List<Double> modelMemories = new ArrayList<>();
             deployments.stream().filter(m -> m.currentAllocationsByNodeId().containsKey(n.id()) == false).forEach(m -> {
                 allocations.add(allocationVars.get(Tuple.tuple(m, n)));
-                modelMemories.add(normalizedMemoryPerModel.get(m) * m.threadsPerAllocation() / (double) coresPerNode.get(n));
+                modelMemories.add(
+                    (normalizedMemoryPerModel.get(m) / (double) coresPerNode.get(n) + normalizedMemoryPerAllocation.get(m)) * m
+                        .threadsPerAllocation()
+                );
             });
             model.addExpression("used_memory_on_node_" + n.id() + "_not_more_than_available")
                 .upper(normalizedMemoryPerNode.get(n))
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/PreserveAllAllocations.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/PreserveAllAllocations.java
index f10ece8f5a593..72109941ad477 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/PreserveAllAllocations.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/PreserveAllAllocations.java
@@ -37,6 +37,6 @@ protected int calculatePreservedAllocations(Deployment m) {
 
     @Override
     protected int addPreservedAllocations(Node n, Deployment m) {
-        return m.currentAllocationsByNodeId().get(n.id());
+        return m.currentAllocationsByNodeId().containsKey(n.id()) ? m.currentAllocationsByNodeId().get(n.id()) : 0;
     }
 }
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/PreserveOneAllocation.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/PreserveOneAllocation.java
index 324e1a8d69a53..43b8860803596 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/PreserveOneAllocation.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/PreserveOneAllocation.java
@@ -37,6 +37,6 @@ protected int calculatePreservedAllocations(AssignmentPlan.Deployment m) {
 
     @Override
     protected int addPreservedAllocations(Node n, AssignmentPlan.Deployment m) {
-        return 1;
+        return m.currentAllocationsByNodeId().containsKey(n.id()) ? 1 : 0;
     }
 }
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/RandomizedAssignmentRounding.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/RandomizedAssignmentRounding.java
index dafc07099f850..8bdc99998a0c2 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/RandomizedAssignmentRounding.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/RandomizedAssignmentRounding.java
@@ -135,8 +135,9 @@ private void assignUnderSubscribedNodes(Collection<Node> nodeSelection) {
                 for (AssignmentPlan.Deployment m : deployments) {
                     Tuple<AssignmentPlan.Deployment, Node> assignment = Tuple.tuple(m, n);
                     if (assignments.get(assignment) > 0) {
-                        totalModelMemory += m.memoryBytes();
-                        maxTotalThreads += (int) Math.ceil(allocations.get(assignment)) * m.threadsPerAllocation();
+                        int roundedAllocations = (int) Math.ceil(allocations.get(assignment));
+                        totalModelMemory += m.estimateMemoryUsageBytes(roundedAllocations);
+                        maxTotalThreads += roundedAllocations * m.threadsPerAllocation();
                         assignedDeployments.add(m);
                     }
                 }
@@ -199,9 +200,12 @@ private void assignExcessCores(Node n) {
                 if (resourceTracker.remainingNodeCores.get(n) <= 0) {
                     break;
                 }
-                int extraAllocations = Math.min(
-                    resourceTracker.remainingNodeCores.get(n) / m.threadsPerAllocation(),
-                    resourceTracker.remainingModelAllocations.get(m)
+                int extraAllocations = m.findExcessAllocations(
+                    Math.min(
+                        resourceTracker.remainingNodeCores.get(n) / m.threadsPerAllocation(),
+                        resourceTracker.remainingModelAllocations.get(m)
+                    ),
+                    resourceTracker.remainingNodeMemory.get(n)
                 );
                 allocations.compute(Tuple.tuple(m, n), (k, v) -> v + extraAllocations);
                 resourceTracker.assign(m, n, extraAllocations);
@@ -211,7 +215,7 @@ private void assignExcessCores(Node n) {
         }
 
         private static double remainingModelOrder(AssignmentPlan.Deployment m) {
-            return (m.currentAllocationsByNodeId().isEmpty() ? 1 : 2) * -m.memoryBytes();
+            return (m.currentAllocationsByNodeId().isEmpty() ? 1 : 2) * -m.minimumMemoryRequiredBytes();
         }
 
         private boolean hasSoftAssignments(Node n) {
@@ -275,15 +279,17 @@ private void doRandomizedRounding(List<Tuple<AssignmentPlan.Deployment, Node>> s
                 int roundedAllocations = random.nextDouble() < roundUpProbability
                     ? (int) Math.ceil(allocations.get(assignment))
                     : (int) Math.floor(allocations.get(assignment));
-
-                if (m.memoryBytes() > resourceTracker.remainingNodeMemory.get(n)
+                if (m.estimateMemoryUsageBytes(roundedAllocations) > resourceTracker.remainingNodeMemory.get(n)
                     || m.threadsPerAllocation() > resourceTracker.remainingNodeCores.get(n)
                     || roundedAllocations == 0
                     || random.nextDouble() > assignments.get(assignment)) {
                     unassign(assignment);
                     assignUnderSubscribedNodes(Set.of(n));
                 } else {
-                    roundedAllocations = Math.min(roundedAllocations, resourceTracker.remainingNodeCores.get(n) / m.threadsPerAllocation());
+                    roundedAllocations = m.findOptimalAllocations(
+                        Math.min(roundedAllocations, resourceTracker.remainingNodeCores.get(n) / m.threadsPerAllocation()),
+                        resourceTracker.remainingNodeMemory.get(n)
+                    );
                     assignModelToNode(m, n, roundedAllocations);
                     unassignOversizedModels(n);
                     assignExcessCores(n);
@@ -294,7 +300,8 @@ private void doRandomizedRounding(List<Tuple<AssignmentPlan.Deployment, Node>> s
         private void unassignOversizedModels(Node n) {
             for (AssignmentPlan.Deployment m : deployments) {
                 Tuple<AssignmentPlan.Deployment, Node> assignment = Tuple.tuple(m, n);
-                if (assignments.get(assignment) < 1.0 && m.memoryBytes() > resourceTracker.remainingNodeMemory.get(n)) {
+                int roundedAllocations = (int) Math.ceil(allocations.get(assignment));
+                if (assignments.get(assignment) < 1.0 && m.minimumMemoryRequiredBytes() > resourceTracker.remainingNodeMemory.get(n)) {
                     unassign(assignment);
                 }
             }
@@ -303,7 +310,11 @@ private void unassignOversizedModels(Node n) {
         private AssignmentPlan toPlan() {
             AssignmentPlan.Builder builder = AssignmentPlan.builder(nodes, deployments);
             for (Map.Entry<Tuple<AssignmentPlan.Deployment, Node>, Integer> assignment : tryAssigningRemainingCores().entrySet()) {
-                builder.assignModelToNode(assignment.getKey().v1(), assignment.getKey().v2(), assignment.getValue());
+                // TODO (#101612) The model should be assigned to the node only when it is possible. This means, that canAssign should be
+                // integrated into the assignModelToNode.
+                if (builder.canAssign(assignment.getKey().v1(), assignment.getKey().v2(), assignment.getValue())) {
+                    builder.assignModelToNode(assignment.getKey().v1(), assignment.getKey().v2(), assignment.getValue());
+                }
             }
             return builder.build();
         }
@@ -338,7 +349,7 @@ private Map<Tuple<AssignmentPlan.Deployment, Node>, Integer> tryAssigningRemaini
                 .toList()) {
                 for (Node n : nodes.stream()
                     .filter(
-                        n -> resourceTracker.remainingNodeMemory.get(n) >= m.memoryBytes()
+                        n -> resourceTracker.remainingNodeMemory.get(n) >= m.minimumMemoryRequiredBytes()
                             && resourceTracker.remainingNodeCores.get(n) >= m.threadsPerAllocation()
                             && resultAllocations.get(Tuple.tuple(m, n)) == 0
                     )
@@ -354,10 +365,15 @@ private Map<Tuple<AssignmentPlan.Deployment, Node>, Integer> tryAssigningRemaini
                         )
                     )
                     .toList()) {
-
                     int assigningAllocations = Math.min(
                         resourceTracker.remainingNodeCores.get(n) / m.threadsPerAllocation(),
-                        resourceTracker.remainingModelAllocations.get(m)
+                        Math.min(
+                            resourceTracker.remainingModelAllocations.get(m),
+                            m.findOptimalAllocations(
+                                resourceTracker.remainingNodeCores.get(n) / m.threadsPerAllocation(),
+                                resourceTracker.remainingModelAllocations.get(m)
+                            )
+                        )
                     );
                     resourceTracker.assign(m, n, assigningAllocations);
                     resultAllocations.put(Tuple.tuple(m, n), assigningAllocations);
@@ -427,7 +443,7 @@ private static class ResourceTracker {
         void assign(AssignmentPlan.Deployment m, Node n, int allocations) {
             if (assignments.contains(Tuple.tuple(m, n)) == false) {
                 assignments.add(Tuple.tuple(m, n));
-                remainingNodeMemory.compute(n, (k, v) -> v - m.memoryBytes());
+                remainingNodeMemory.compute(n, (k, v) -> v - m.estimateMemoryUsageBytes(allocations));
             }
             remainingNodeCores.compute(n, (k, v) -> v - allocations * m.threadsPerAllocation());
             remainingModelAllocations.compute(m, (k, v) -> v - allocations);
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java
index 9870aa93bf6ce..8c9499ca9e00c 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java
@@ -126,10 +126,12 @@ private AssignmentPlan computeZonePlan(
                     modelIdToTargetAllocations.get(m.id()),
                     m.threadsPerAllocation(),
                     m.currentAllocationsByNodeId(),
-                    // Only force assigning at least once previously assigned models that have not had any allocation yet
                     (tryAssigningPreviouslyAssignedModels && modelIdToRemainingAllocations.get(m.id()) == m.allocations())
                         ? m.maxAssignedAllocations()
-                        : 0
+                        : 0,
+                    // Only force assigning at least once previously assigned models that have not had any allocation yet
+                    m.perDeploymentMemoryBytes(),
+                    m.perAllocationMemoryBytes()
                 )
             )
             .toList();
@@ -151,7 +153,9 @@ private AssignmentPlan computePlanAcrossAllNodes(List<AssignmentPlan> plans) {
                     m.allocations(),
                     m.threadsPerAllocation(),
                     allocationsByNodeIdByModelId.get(m.id()),
-                    m.maxAssignedAllocations()
+                    m.maxAssignedAllocations(),
+                    m.perDeploymentMemoryBytes(),
+                    m.perAllocationMemoryBytes()
                 )
             )
             .toList();
@@ -180,9 +184,13 @@ private AssignmentPlan swapOriginalModelsInPlan(
                 Node originalNode = originalNodeById.get(assignment.getKey().id());
                 planBuilder.assignModelToNode(originalDeployment, originalNode, assignment.getValue());
                 if (originalDeployment.currentAllocationsByNodeId().containsKey(originalNode.id())) {
+                    // TODO (#101612) requiredMemory should be calculated by the AssignmentPlan.Builder
                     // As the node has all its available memory we need to manually account memory of models with
                     // current allocations.
-                    planBuilder.accountMemory(m, originalNode);
+                    long requiredMemory = originalDeployment.estimateMemoryUsageBytes(
+                        originalDeployment.currentAllocationsByNodeId().get(originalNode.id())
+                    );
+                    planBuilder.accountMemory(m, originalNode, requiredMemory);
                 }
             }
         }
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java
index 8ccf8839cfc08..334fdfbb8b922 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java
@@ -44,7 +44,8 @@ public void testRebalance_GivenNoAssignments() {
             Map.of(),
             Map.of(),
             Optional.empty(),
-            1
+            1,
+            false
         ).rebalance().build();
         assertThat(result.allAssignments().isEmpty(), is(true));
     }
@@ -78,7 +79,8 @@ public void testRebalance_GivenAllAssignmentsAreSatisfied_ShouldMakeNoChanges()
             nodeLoads,
             Map.of(),
             Optional.empty(),
-            1
+            1,
+            false
         ).rebalance().build();
 
         assertThat(currentMetadata, equalTo(result));
@@ -116,7 +118,8 @@ public void testRebalance_GivenAllAssignmentsAreSatisfied_GivenOutdatedRoutingEn
             nodeLoads,
             Map.of(List.of(), List.of(node1, node2)),
             Optional.empty(),
-            1
+            1,
+            false
         ).rebalance().build();
 
         assertThat(result.allAssignments(), is(aMapWithSize(2)));
@@ -140,7 +143,7 @@ public void testRebalance_GivenModelToAddAlreadyExists() {
             .build();
         expectThrows(
             ResourceAlreadyExistsException.class,
-            () -> new TrainedModelAssignmentRebalancer(currentMetadata, Map.of(), Map.of(), Optional.of(taskParams), 1).rebalance()
+            () -> new TrainedModelAssignmentRebalancer(currentMetadata, Map.of(), Map.of(), Optional.of(taskParams), 1, false).rebalance()
         );
     }
 
@@ -154,7 +157,8 @@ public void testRebalance_GivenFirstModelToAdd_NoMLNodes() throws Exception {
             Map.of(),
             Map.of(),
             Optional.of(taskParams),
-            1
+            1,
+            false
         ).rebalance().build();
 
         TrainedModelAssignment assignment = result.getDeploymentAssignment(modelId);
@@ -181,7 +185,8 @@ public void testRebalance_GivenFirstModelToAdd_NotEnoughProcessors() throws Exce
             nodeLoads,
             Map.of(List.of(), List.of(node)),
             Optional.of(taskParams),
-            1
+            1,
+            false
         ).rebalance().build();
 
         TrainedModelAssignment assignment = result.getDeploymentAssignment(modelId);
@@ -217,7 +222,8 @@ public void testRebalance_GivenFirstModelToAdd_NotEnoughMemory() throws Exceptio
             nodeLoads,
             Map.of(),
             Optional.of(taskParams),
-            1
+            1,
+            false
         ).rebalance().build();
 
         TrainedModelAssignment assignment = result.getDeploymentAssignment(modelId);
@@ -253,7 +259,8 @@ public void testRebalance_GivenFirstModelToAdd_ErrorDetectingNodeLoad() throws E
             nodeLoads,
             Map.of(),
             Optional.of(taskParams),
-            1
+            1,
+            false
         ).rebalance().build();
 
         TrainedModelAssignment assignment = result.getDeploymentAssignment(modelId);
@@ -289,7 +296,8 @@ public void testRebalance_GivenProblemsOnMultipleNodes() throws Exception {
             nodeLoads,
             Map.of(List.of(), List.of(node1, node2)),
             Optional.of(taskParams),
-            1
+            1,
+            false
         ).rebalance().build();
 
         TrainedModelAssignment assignment = result.getDeploymentAssignment(modelId);
@@ -322,7 +330,8 @@ public void testRebalance_GivenFirstModelToAdd_FitsFully() throws Exception {
             nodeLoads,
             Map.of(List.of(), List.of(node1)),
             Optional.of(taskParams),
-            1
+            1,
+            false
         ).rebalance().build();
 
         TrainedModelAssignment assignment = result.getDeploymentAssignment(modelId);
@@ -361,7 +370,8 @@ public void testRebalance_GivenModelToAdd_AndPreviousAssignments_AndTwoNodes_All
             nodeLoads,
             Map.of(List.of(), List.of(node1, node2)),
             Optional.of(taskParams),
-            1
+            1,
+            false
         ).rebalance().build();
 
         assertThat(result.allAssignments(), is(aMapWithSize(2)));
@@ -425,7 +435,8 @@ public void testRebalance_GivenPreviousAssignments_AndNewNode() throws Exception
             nodeLoads,
             Map.of(List.of(), List.of(node1, node2, node3)),
             Optional.empty(),
-            1
+            1,
+            false
         ).rebalance().build();
 
         assertThat(result.allAssignments(), is(aMapWithSize(2)));
@@ -489,7 +500,8 @@ public void testRebalance_GivenPreviousAssignments_AndRemovedNode_AndRemainingNo
             nodeLoads,
             Map.of(List.of(), List.of(node1)),
             Optional.empty(),
-            1
+            1,
+            false
         ).rebalance().build();
 
         assertThat(result.allAssignments(), is(aMapWithSize(2)));
@@ -559,7 +571,8 @@ public void testRebalance_GivenPreviousAssignments_AndRemovedNode_AndRemainingNo
             nodeLoads,
             Map.of(List.of(), List.of(node1)),
             Optional.empty(),
-            1
+            1,
+            false
         ).rebalance().build();
 
         assertThat(result.allAssignments(), is(aMapWithSize(2)));
@@ -608,7 +621,8 @@ public void testRebalance_GivenFailedAssignment_RestartsAssignment() throws Exce
             nodeLoads,
             Map.of(List.of(), List.of(node1)),
             Optional.empty(),
-            1
+            1,
+            false
         ).rebalance().build();
 
         assertThat(result.allAssignments(), is(aMapWithSize(1)));
@@ -642,7 +656,8 @@ public void testRebalance_GivenLowPriorityModelToAdd_OnlyModel_NotEnoughMemory()
             nodeLoads,
             Map.of(),
             Optional.of(taskParams),
-            1
+            1,
+            false
         ).rebalance().build();
 
         TrainedModelAssignment assignment = result.getDeploymentAssignment(deploymentId);
@@ -658,8 +673,8 @@ public void testRebalance_GivenLowPriorityModelToAdd_OnlyModel_NotEnoughMemory()
 
     public void testRebalance_GivenLowPriorityModelToAdd_NotEnoughMemoryNorProcessors() throws Exception {
         long nodeMemoryBytes = ByteSizeValue.ofGb(1).getBytes();
-        DiscoveryNode node1 = buildNode("node-1", nodeMemoryBytes, 1);
-        DiscoveryNode node2 = buildNode("node-2", nodeMemoryBytes, 1);
+        DiscoveryNode node1 = buildNode("node-1", nodeMemoryBytes, 8);
+        DiscoveryNode node2 = buildNode("node-2", nodeMemoryBytes, 8);
 
         Map<DiscoveryNode, NodeLoad> nodeLoads = new HashMap<>();
         nodeLoads.put(node1, NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).build());
@@ -688,7 +703,8 @@ public void testRebalance_GivenLowPriorityModelToAdd_NotEnoughMemoryNorProcessor
             nodeLoads,
             Map.of(List.of("zone-1"), List.of(node1), List.of("zone-2"), List.of(node2)),
             Optional.of(taskParams1),
-            1
+            1,
+            false
         ).rebalance().build();
 
         TrainedModelAssignment assignment = result.getDeploymentAssignment(deployment1);
@@ -727,7 +743,8 @@ public void testRebalance_GivenMixedPriorityModels_NotEnoughMemoryForLowPriority
             nodeLoads,
             Map.of(List.of(), List.of(node1)),
             Optional.empty(),
-            1
+            1,
+            false
         ).rebalance().build();
 
         {
@@ -780,7 +797,8 @@ public void testRebalance_GivenMixedPriorityModels_TwoZones_EachNodeCanHoldOneMo
             nodeLoads,
             Map.of(List.of("zone-1"), List.of(node1), List.of("zone-2"), List.of(node2)),
             Optional.empty(),
-            1
+            1,
+            false
         ).rebalance().build();
 
         List<String> assignedNodes = new ArrayList<>();
@@ -834,7 +852,8 @@ public void testRebalance_GivenModelUsingAllCpu_FittingLowPriorityModelCanStart(
             nodeLoads,
             Map.of(List.of(), List.of(node1)),
             Optional.empty(),
-            1
+            1,
+            false
         ).rebalance().build();
 
         {
@@ -884,7 +903,8 @@ public void testRebalance_GivenMultipleLowPriorityModels_AndMultipleNodes() thro
             nodeLoads,
             Map.of(List.of(), List.of(node1, node2)),
             Optional.empty(),
-            1
+            1,
+            false
         ).rebalance().build();
 
         {
@@ -934,7 +954,8 @@ public void testRebalance_GivenNormalPriorityModelToLoad_EvictsLowPriorityModel(
             nodeLoads,
             Map.of(List.of(), List.of(node1)),
             Optional.of(taskParams2),
-            1
+            1,
+            false
         ).rebalance().build();
 
         {
@@ -986,7 +1007,8 @@ public void testRebalance_GivenNormalPriorityModelToLoad_AndLowPriorityModelCanS
             nodeLoads,
             Map.of(List.of(), List.of(node1, node2)),
             Optional.of(taskParams2),
-            1
+            1,
+            false
         ).rebalance().build();
 
         {
@@ -1038,7 +1060,8 @@ public void testRebalance_GivenNormalPriorityModelToLoad_AndLowPriorityModelMust
             nodeLoads,
             Map.of(List.of(), List.of(node1, node2)),
             Optional.of(taskParams2),
-            1
+            1,
+            false
         ).rebalance().build();
 
         {
@@ -1084,7 +1107,8 @@ public void testRebalance_GivenFirstModelToAdd_GivenScalingProcessorSetting() {
             nodeLoads,
             Map.of(List.of(), List.of(node)),
             Optional.of(taskParams),
-            2
+            2,
+            false
         ).rebalance().build();
 
         TrainedModelAssignment assignment = result.getDeploymentAssignment(modelId);
@@ -1106,7 +1130,8 @@ public void testRebalance_GivenFirstModelToAdd_GivenScalingProcessorSetting() {
             nodeLoads,
             Map.of(List.of(), List.of(node)),
             Optional.of(taskParams),
-            1
+            1,
+            false
         ).rebalance().build();
 
         assignment = result.getDeploymentAssignment(modelId);
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java
index 3ecdd5000ba35..cbbb38f1d1ddd 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java
@@ -7,6 +7,7 @@
 
 package org.elasticsearch.xpack.ml.inference.assignment.planning;
 
+import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.xpack.ml.inference.assignment.planning.AssignmentPlan.Deployment;
 import org.elasticsearch.xpack.ml.inference.assignment.planning.AssignmentPlan.Node;
@@ -24,109 +25,248 @@ public class AssignmentPlanTests extends ESTestCase {
 
     public void testBuilderCtor_GivenDuplicateNode() {
         Node n = new Node("n_1", 100, 4);
-        AssignmentPlan.Deployment m = new AssignmentPlan.Deployment("m_1", 40, 1, 2, Map.of(), 0);
+        AssignmentPlan.Deployment m = new AssignmentPlan.Deployment("m_1", 40, 1, 2, Map.of(), 0, 0, 0);
 
         expectThrows(IllegalArgumentException.class, () -> AssignmentPlan.builder(List.of(n, n), List.of(m)));
     }
 
     public void testBuilderCtor_GivenDuplicateModel() {
         Node n = new Node("n_1", 100, 4);
-        Deployment m = new AssignmentPlan.Deployment("m_1", 40, 1, 2, Map.of(), 0);
+        Deployment m = new AssignmentPlan.Deployment("m_1", 40, 1, 2, Map.of(), 0, 0, 0);
 
         expectThrows(IllegalArgumentException.class, () -> AssignmentPlan.builder(List.of(n), List.of(m, m)));
     }
 
     public void testAssignModelToNode_GivenNoPreviousAssignment() {
-        Node n = new Node("n_1", 100, 4);
-        AssignmentPlan.Deployment m = new AssignmentPlan.Deployment("m_1", 40, 1, 2, Map.of(), 0);
+        Node n = new Node("n_1", ByteSizeValue.ofMb(350).getBytes(), 4);
 
-        AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
+        { // old memory format
+            AssignmentPlan.Deployment m = new AssignmentPlan.Deployment("m_1", ByteSizeValue.ofMb(40).getBytes(), 1, 2, Map.of(), 0, 0, 0);
 
-        assertThat(builder.getRemainingCores(n), equalTo(4));
-        assertThat(builder.getRemainingMemory(n), equalTo(100L));
-        assertThat(builder.getRemainingAllocations(m), equalTo(1));
-        assertThat(builder.getRemainingThreads(m), equalTo(2));
+            AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
 
-        builder.assignModelToNode(m, n, 1);
+            assertThat(builder.getRemainingCores(n), equalTo(4));
+            assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(350).getBytes()));
+            assertThat(builder.getRemainingAllocations(m), equalTo(1));
+            assertThat(builder.getRemainingThreads(m), equalTo(2));
 
-        assertThat(builder.getRemainingCores(n), equalTo(2));
-        assertThat(builder.getRemainingMemory(n), equalTo(60L));
-        assertThat(builder.getRemainingAllocations(m), equalTo(0));
-        assertThat(builder.getRemainingThreads(m), equalTo(0));
+            builder.assignModelToNode(m, n, 1);
 
-        AssignmentPlan plan = builder.build();
+            assertThat(builder.getRemainingCores(n), equalTo(2));
+            assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(30).getBytes()));
+            assertThat(builder.getRemainingAllocations(m), equalTo(0));
+            assertThat(builder.getRemainingThreads(m), equalTo(0));
 
-        assertThat(plan.models(), contains(m));
-        assertThat(plan.satisfiesCurrentAssignments(), is(true));
-        assertThat(plan.assignments(m).get(), equalTo(Map.of(n, 1)));
+            AssignmentPlan plan = builder.build();
+
+            assertThat(plan.models(), contains(m));
+            assertThat(plan.satisfiesCurrentAssignments(), is(true));
+            assertThat(plan.assignments(m).get(), equalTo(Map.of(n, 1)));
+        }
+        { // new memory format
+            AssignmentPlan.Deployment m = new AssignmentPlan.Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(20).getBytes(),
+                1,
+                2,
+                Map.of(),
+                0,
+                ByteSizeValue.ofMb(300).getBytes(),
+                ByteSizeValue.ofMb(30).getBytes()
+            );
+
+            AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
+
+            assertThat(builder.getRemainingCores(n), equalTo(4));
+            assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(350).getBytes()));
+            assertThat(builder.getRemainingAllocations(m), equalTo(1));
+            assertThat(builder.getRemainingThreads(m), equalTo(2));
+
+            builder.assignModelToNode(m, n, 1);
+
+            assertThat(builder.getRemainingCores(n), equalTo(2));
+            assertThat(builder.getRemainingMemory(n), equalTo(0L));
+            assertThat(builder.getRemainingAllocations(m), equalTo(0));
+            assertThat(builder.getRemainingThreads(m), equalTo(0));
+
+            AssignmentPlan plan = builder.build();
+
+            assertThat(plan.models(), contains(m));
+            assertThat(plan.satisfiesCurrentAssignments(), is(true));
+            assertThat(plan.assignments(m).get(), equalTo(Map.of(n, 1)));
+        }
     }
 
     public void testAssignModelToNode_GivenNewPlanSatisfiesCurrentAssignment() {
-        Node n = new Node("n_1", 100, 4);
-        AssignmentPlan.Deployment m = new AssignmentPlan.Deployment("m_1", 40, 2, 2, Map.of("n_1", 1), 0);
+        Node n = new Node("n_1", ByteSizeValue.ofMb(350).getBytes(), 4);
+        {   // old memory format
+            AssignmentPlan.Deployment m = new AssignmentPlan.Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(30).getBytes(),
+                2,
+                2,
+                Map.of("n_1", 1),
+                0,
+                0,
+                0
+            );
 
-        AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
+            AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
 
-        builder.assignModelToNode(m, n, 1);
+            builder.assignModelToNode(m, n, 1);
 
-        assertThat(builder.getRemainingCores(n), equalTo(2));
-        assertThat(builder.getRemainingMemory(n), equalTo(100L));
-        assertThat(builder.getRemainingAllocations(m), equalTo(1));
-        assertThat(builder.getRemainingThreads(m), equalTo(2));
+            assertThat(builder.getRemainingCores(n), equalTo(2));
+            assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(350).getBytes()));
+            assertThat(builder.getRemainingAllocations(m), equalTo(1));
+            assertThat(builder.getRemainingThreads(m), equalTo(2));
 
-        AssignmentPlan plan = builder.build();
+            AssignmentPlan plan = builder.build();
 
-        assertThat(plan.models(), contains(m));
-        assertThat(plan.satisfiesCurrentAssignments(), is(true));
-        assertThat(plan.assignments(m).get(), equalTo(Map.of(n, 1)));
+            assertThat(plan.models(), contains(m));
+            assertThat(plan.satisfiesCurrentAssignments(), is(true));
+            assertThat(plan.assignments(m).get(), equalTo(Map.of(n, 1)));
+        }
+        {   // new memory format
+            AssignmentPlan.Deployment m = new AssignmentPlan.Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(25).getBytes(),
+                2,
+                2,
+                Map.of("n_1", 1),
+                0,
+                ByteSizeValue.ofMb(300).getBytes(),
+                ByteSizeValue.ofMb(25).getBytes()
+            );
+
+            AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
+
+            builder.assignModelToNode(m, n, 1);
+
+            assertThat(builder.getRemainingCores(n), equalTo(2));
+            assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(325).getBytes()));
+            assertThat(builder.getRemainingAllocations(m), equalTo(1));
+            assertThat(builder.getRemainingThreads(m), equalTo(2));
+
+            AssignmentPlan plan = builder.build();
+
+            assertThat(plan.models(), contains(m));
+            assertThat(plan.satisfiesCurrentAssignments(), is(true));
+            assertThat(plan.assignments(m).get(), equalTo(Map.of(n, 1)));
+
+        }
     }
 
     public void testAssignModelToNode_GivenNewPlanDoesNotSatisfyCurrentAssignment() {
-        Node n = new Node("n_1", 100, 4);
-        AssignmentPlan.Deployment m = new AssignmentPlan.Deployment("m_1", 40, 2, 2, Map.of("n_1", 2), 0);
+        Node n = new Node("n_1", ByteSizeValue.ofMb(300).getBytes(), 4);
+        {
+            // old memory format
+            Deployment m = new Deployment("m_1", ByteSizeValue.ofMb(30).getBytes(), 2, 2, Map.of("n_1", 2), 0, 0, 0);
 
-        AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
+            AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
 
-        builder.assignModelToNode(m, n, 1);
+            builder.assignModelToNode(m, n, 1);
 
-        assertThat(builder.getRemainingCores(n), equalTo(2));
-        assertThat(builder.getRemainingMemory(n), equalTo(100L));
-        assertThat(builder.getRemainingAllocations(m), equalTo(1));
-        assertThat(builder.getRemainingThreads(m), equalTo(2));
+            assertThat(builder.getRemainingCores(n), equalTo(2));
+            assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(300).getBytes()));
+            assertThat(builder.getRemainingAllocations(m), equalTo(1));
+            assertThat(builder.getRemainingThreads(m), equalTo(2));
 
-        AssignmentPlan plan = builder.build();
+            AssignmentPlan plan = builder.build();
 
-        assertThat(plan.models(), contains(m));
-        assertThat(plan.satisfiesCurrentAssignments(), is(false));
-        assertThat(plan.assignments(m).get(), equalTo(Map.of(n, 1)));
+            assertThat(plan.models(), contains(m));
+            assertThat(plan.satisfiesCurrentAssignments(), is(false));
+            assertThat(plan.assignments(m).get(), equalTo(Map.of(n, 1)));
+        }
+        {
+            // new memory format
+            Deployment m = new Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(25).getBytes(),
+                2,
+                2,
+                Map.of("n_1", 2),
+                0,
+                ByteSizeValue.ofMb(250).getBytes(),
+                ByteSizeValue.ofMb(25).getBytes()
+            );
+
+            AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
+
+            builder.assignModelToNode(m, n, 1);
+
+            assertThat(builder.getRemainingCores(n), equalTo(2));
+            assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(275).getBytes()));
+            assertThat(builder.getRemainingAllocations(m), equalTo(1));
+            assertThat(builder.getRemainingThreads(m), equalTo(2));
+
+            AssignmentPlan plan = builder.build();
+
+            assertThat(plan.models(), contains(m));
+            assertThat(plan.satisfiesCurrentAssignments(), is(false));
+            assertThat(plan.assignments(m).get(), equalTo(Map.of(n, 1)));
+        }
     }
 
     public void testAssignModelToNode_GivenPreviouslyUnassignedModelDoesNotFit() {
-        Node n = new Node("n_1", 100, 4);
-        Deployment m = new AssignmentPlan.Deployment("m_1", 101, 2, 2, Map.of(), 0);
+        Node n = new Node("n_1", ByteSizeValue.ofMb(340 - 1).getBytes(), 4);
+        Deployment m = new AssignmentPlan.Deployment("m_1", ByteSizeValue.ofMb(50).getBytes(), 2, 2, Map.of(), 0, 0, 0);
 
         AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
         Exception e = expectThrows(IllegalArgumentException.class, () -> builder.assignModelToNode(m, n, 1));
 
-        assertThat(e.getMessage(), equalTo("not enough memory on node [n_1] to assign model [m_1]"));
+        assertThat(e.getMessage(), equalTo("not enough memory on node [n_1] to assign [1] allocations to deployment [m_1]"));
     }
 
     public void testAssignModelToNode_GivenPreviouslyAssignedModelDoesNotFit() {
-        Node n = new Node("n_1", 100, 4);
-        AssignmentPlan.Deployment m = new AssignmentPlan.Deployment("m_1", 101, 2, 2, Map.of("n_1", 1), 0);
+        { // old memory format
+            Node n = new Node("n_1", ByteSizeValue.ofMb(340 - 1).getBytes(), 4);
+            AssignmentPlan.Deployment m = new AssignmentPlan.Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(50).getBytes(),
+                2,
+                2,
+                Map.of("n_1", 1),
+                0,
+                0,
+                0
+            );
 
-        AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
-        builder.assignModelToNode(m, n, 2);
-        AssignmentPlan plan = builder.build();
+            AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
 
-        assertThat(plan.models(), contains(m));
-        assertThat(plan.satisfiesCurrentAssignments(), is(true));
-        assertThat(plan.assignments(m).get(), equalTo(Map.of(n, 2)));
+            builder.assignModelToNode(m, n, 2);
+            AssignmentPlan plan = builder.build();
+
+            assertThat(plan.models(), contains(m));
+            assertThat(plan.satisfiesCurrentAssignments(), is(true));
+            assertThat(plan.assignments(m).get(), equalTo(Map.of(n, 2)));
+        }
+        { // new memory format
+            Node n = new Node("n_1", ByteSizeValue.ofMb(340 - 1).getBytes(), 4);
+            AssignmentPlan.Deployment m = new AssignmentPlan.Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(30).getBytes(),
+                2,
+                2,
+                Map.of("n_1", 1),
+                0,
+                ByteSizeValue.ofMb(300).getBytes(),
+                ByteSizeValue.ofMb(5).getBytes()
+            );
+
+            AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
+
+            builder.assignModelToNode(m, n, 2);
+            AssignmentPlan plan = builder.build();
+
+            assertThat(plan.models(), contains(m));
+            assertThat(plan.satisfiesCurrentAssignments(), is(true));
+            assertThat(plan.assignments(m).get(), equalTo(Map.of(n, 2)));
+        }
     }
 
     public void testAssignModelToNode_GivenNotEnoughCores_AndSingleThreadPerAllocation() {
-        Node n = new Node("n_1", 100, 4);
-        Deployment m = new AssignmentPlan.Deployment("m_1", 100, 5, 1, Map.of(), 0);
+        Node n = new Node("n_1", ByteSizeValue.ofMb(500).getBytes(), 4);
+        Deployment m = new AssignmentPlan.Deployment("m_1", ByteSizeValue.ofMb(100).getBytes(), 5, 1, Map.of(), 0, 0, 0);
 
         AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
         Exception e = expectThrows(IllegalArgumentException.class, () -> builder.assignModelToNode(m, n, 5));
@@ -138,8 +278,8 @@ public void testAssignModelToNode_GivenNotEnoughCores_AndSingleThreadPerAllocati
     }
 
     public void testAssignModelToNode_GivenNotEnoughCores_AndMultipleThreadsPerAllocation() {
-        Node n = new Node("n_1", 100, 5);
-        AssignmentPlan.Deployment m = new AssignmentPlan.Deployment("m_1", 100, 3, 2, Map.of(), 0);
+        Node n = new Node("n_1", ByteSizeValue.ofMb(500).getBytes(), 5);
+        AssignmentPlan.Deployment m = new AssignmentPlan.Deployment("m_1", ByteSizeValue.ofMb(100).getBytes(), 3, 2, Map.of(), 0, 0, 0);
 
         AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
         Exception e = expectThrows(IllegalArgumentException.class, () -> builder.assignModelToNode(m, n, 3));
@@ -151,13 +291,22 @@ public void testAssignModelToNode_GivenNotEnoughCores_AndMultipleThreadsPerAlloc
     }
 
     public void testAssignModelToNode_GivenSameModelAssignedTwice() {
-        Node n = new Node("n_1", 100, 8);
-        Deployment m = new AssignmentPlan.Deployment("m_1", 60, 4, 2, Map.of(), 0);
+        Node n = new Node("n_1", ByteSizeValue.ofMb(1000).getBytes(), 8);
+        Deployment m = new AssignmentPlan.Deployment(
+            "m_1",
+            ByteSizeValue.ofMb(50).getBytes(),
+            4,
+            2,
+            Map.of(),
+            0,
+            ByteSizeValue.ofMb(300).getBytes(),
+            ByteSizeValue.ofMb(50).getBytes()
+        );
 
         AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
 
         assertThat(builder.getRemainingCores(n), equalTo(8));
-        assertThat(builder.getRemainingMemory(n), equalTo(100L));
+        assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(1000).getBytes()));
         assertThat(builder.getRemainingAllocations(m), equalTo(4));
         assertThat(builder.getRemainingThreads(m), equalTo(8));
         assertThat(builder.canAssign(m, n, 1), is(true));
@@ -165,7 +314,7 @@ public void testAssignModelToNode_GivenSameModelAssignedTwice() {
         builder.assignModelToNode(m, n, 1);
 
         assertThat(builder.getRemainingCores(n), equalTo(6));
-        assertThat(builder.getRemainingMemory(n), equalTo(40L));
+        assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(600).getBytes()));
         assertThat(builder.getRemainingAllocations(m), equalTo(3));
         assertThat(builder.getRemainingThreads(m), equalTo(6));
         assertThat(builder.canAssign(m, n, 2), is(true));
@@ -173,7 +322,7 @@ public void testAssignModelToNode_GivenSameModelAssignedTwice() {
         builder.assignModelToNode(m, n, 2);
 
         assertThat(builder.getRemainingCores(n), equalTo(2));
-        assertThat(builder.getRemainingMemory(n), equalTo(40L));
+        assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(500).getBytes()));
         assertThat(builder.getRemainingAllocations(m), equalTo(1));
         assertThat(builder.getRemainingThreads(m), equalTo(2));
 
@@ -186,7 +335,7 @@ public void testAssignModelToNode_GivenSameModelAssignedTwice() {
 
     public void testCanAssign_GivenPreviouslyUnassignedModelDoesNotFit() {
         Node n = new Node("n_1", 100, 5);
-        AssignmentPlan.Deployment m = new AssignmentPlan.Deployment("m_1", 101, 1, 1, Map.of(), 0);
+        AssignmentPlan.Deployment m = new AssignmentPlan.Deployment("m_1", 101, 1, 1, Map.of(), 0, 0, 0);
 
         AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
 
@@ -194,17 +343,33 @@ public void testCanAssign_GivenPreviouslyUnassignedModelDoesNotFit() {
     }
 
     public void testCanAssign_GivenPreviouslyAssignedModelDoesNotFit() {
-        Node n = new Node("n_1", 100, 5);
-        Deployment m = new AssignmentPlan.Deployment("m_1", 101, 1, 1, Map.of("n_1", 1), 0);
-
-        AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
-
-        assertThat(builder.canAssign(m, n, 1), is(true));
+        Node n = new Node("n_1", ByteSizeValue.ofMb(300).getBytes(), 5);
+        {
+            // old memory format
+            Deployment m = new Deployment("m_1", ByteSizeValue.ofMb(31).getBytes(), 1, 1, Map.of("n_1", 1), 0, 0, 0);
+            AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
+            assertThat(builder.canAssign(m, n, 1), is(true));
+        }
+        {
+            // new memory format
+            Deployment m = new Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(25).getBytes(),
+                1,
+                1,
+                Map.of("n_1", 1),
+                0,
+                ByteSizeValue.ofMb(300).getBytes(),
+                ByteSizeValue.ofMb(10).getBytes()
+            );
+            AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
+            assertThat(builder.canAssign(m, n, 1), is(true));
+        }
     }
 
     public void testCanAssign_GivenEnoughMemory() {
-        Node n = new Node("n_1", 100, 5);
-        AssignmentPlan.Deployment m = new AssignmentPlan.Deployment("m_1", 100, 3, 2, Map.of(), 0);
+        Node n = new Node("n_1", ByteSizeValue.ofMb(440).getBytes(), 5);
+        AssignmentPlan.Deployment m = new AssignmentPlan.Deployment("m_1", ByteSizeValue.ofMb(100).getBytes(), 3, 2, Map.of(), 0, 0, 0);
 
         AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
 
@@ -216,16 +381,25 @@ public void testCanAssign_GivenEnoughMemory() {
     public void testCompareTo_GivenDifferenceInPreviousAssignments() {
         AssignmentPlan planSatisfyingPreviousAssignments;
         AssignmentPlan planNotSatisfyingPreviousAssignments;
-        Node n = new Node("n_1", 100, 5);
+        Node n = new Node("n_1", ByteSizeValue.ofMb(300).getBytes(), 5);
 
         {
-            Deployment m = new AssignmentPlan.Deployment("m_1", 100, 3, 2, Map.of("n_1", 2), 0);
+            Deployment m = new AssignmentPlan.Deployment("m_1", ByteSizeValue.ofMb(30).getBytes(), 3, 2, Map.of("n_1", 2), 0, 0, 0);
             AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
             builder.assignModelToNode(m, n, 2);
             planSatisfyingPreviousAssignments = builder.build();
         }
         {
-            AssignmentPlan.Deployment m = new AssignmentPlan.Deployment("m_1", 100, 3, 2, Map.of("n_1", 3), 0);
+            AssignmentPlan.Deployment m = new AssignmentPlan.Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(30).getBytes(),
+                3,
+                2,
+                Map.of("n_1", 3),
+                0,
+                0,
+                0
+            );
             AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
             builder.assignModelToNode(m, n, 2);
             planNotSatisfyingPreviousAssignments = builder.build();
@@ -238,8 +412,17 @@ public void testCompareTo_GivenDifferenceInPreviousAssignments() {
     public void testCompareTo_GivenDifferenceInAllocations() {
         AssignmentPlan planWithMoreAllocations;
         AssignmentPlan planWithFewerAllocations;
-        Node n = new Node("n_1", 100, 5);
-        AssignmentPlan.Deployment m = new AssignmentPlan.Deployment("m_1", 100, 3, 2, Map.of("n_1", 1), 0);
+        Node n = new Node("n_1", ByteSizeValue.ofMb(300).getBytes(), 5);
+        AssignmentPlan.Deployment m = new AssignmentPlan.Deployment(
+            "m_1",
+            ByteSizeValue.ofMb(30).getBytes(),
+            3,
+            2,
+            Map.of("n_1", 1),
+            0,
+            0,
+            0
+        );
 
         {
             AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
@@ -259,16 +442,25 @@ public void testCompareTo_GivenDifferenceInAllocations() {
     public void testCompareTo_GivenDifferenceInMemory() {
         AssignmentPlan planUsingMoreMemory;
         AssignmentPlan planUsingLessMemory;
-        Node n = new Node("n_1", 100, 5);
+        Node n = new Node("n_1", ByteSizeValue.ofMb(300).getBytes(), 5);
 
         {
-            Deployment m = new AssignmentPlan.Deployment("m_1", 100, 3, 2, Map.of("n_1", 1), 0);
+            Deployment m = new AssignmentPlan.Deployment("m_1", ByteSizeValue.ofMb(30).getBytes(), 3, 2, Map.of("n_1", 1), 0, 0, 0);
             AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
             builder.assignModelToNode(m, n, 2);
             planUsingMoreMemory = builder.build();
         }
         {
-            AssignmentPlan.Deployment m = new AssignmentPlan.Deployment("m_1", 99, 3, 2, Map.of("n_1", 1), 0);
+            AssignmentPlan.Deployment m = new AssignmentPlan.Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(29).getBytes(),
+                3,
+                2,
+                Map.of("n_1", 1),
+                0,
+                0,
+                0
+            );
             AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m));
             builder.assignModelToNode(m, n, 2);
             planUsingLessMemory = builder.build();
@@ -279,26 +471,96 @@ public void testCompareTo_GivenDifferenceInMemory() {
     }
 
     public void testSatisfiesAllModels_GivenAllModelsAreSatisfied() {
-        Node node1 = new Node("n_1", 100, 4);
-        Node node2 = new Node("n_2", 100, 4);
-        AssignmentPlan.Deployment deployment1 = new AssignmentPlan.Deployment("m_1", 50, 1, 2, Map.of(), 0);
-        AssignmentPlan.Deployment deployment2 = new AssignmentPlan.Deployment("m_2", 30, 2, 1, Map.of(), 0);
-        AssignmentPlan.Deployment deployment3 = new AssignmentPlan.Deployment("m_3", 20, 4, 1, Map.of(), 0);
-        AssignmentPlan plan = AssignmentPlan.builder(List.of(node1, node2), List.of(deployment1, deployment2, deployment3))
-            .assignModelToNode(deployment1, node1, 1)
-            .assignModelToNode(deployment2, node2, 2)
-            .assignModelToNode(deployment3, node1, 2)
-            .assignModelToNode(deployment3, node2, 2)
-            .build();
-        assertThat(plan.satisfiesAllModels(), is(true));
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        {
+            // old memory format
+            AssignmentPlan.Deployment deployment1 = new AssignmentPlan.Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(50).getBytes(),
+                1,
+                2,
+                Map.of(),
+                0,
+                0,
+                0
+            );
+            AssignmentPlan.Deployment deployment2 = new AssignmentPlan.Deployment(
+                "m_2",
+                ByteSizeValue.ofMb(30).getBytes(),
+                2,
+                1,
+                Map.of(),
+                0,
+                0,
+                0
+            );
+            AssignmentPlan.Deployment deployment3 = new AssignmentPlan.Deployment(
+                "m_3",
+                ByteSizeValue.ofMb(20).getBytes(),
+                4,
+                1,
+                Map.of(),
+                0,
+                0,
+                0
+            );
+            AssignmentPlan plan = AssignmentPlan.builder(List.of(node1, node2), List.of(deployment1, deployment2, deployment3))
+                .assignModelToNode(deployment1, node1, 1)
+                .assignModelToNode(deployment2, node2, 2)
+                .assignModelToNode(deployment3, node1, 2)
+                .assignModelToNode(deployment3, node2, 2)
+                .build();
+            assertThat(plan.satisfiesAllModels(), is(true));
+        }
+        {
+            // new memory format
+            AssignmentPlan.Deployment deployment1 = new AssignmentPlan.Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(50).getBytes(),
+                1,
+                2,
+                Map.of(),
+                0,
+                ByteSizeValue.ofMb(300).getBytes(),
+                ByteSizeValue.ofMb(10).getBytes()
+            );
+            AssignmentPlan.Deployment deployment2 = new AssignmentPlan.Deployment(
+                "m_2",
+                ByteSizeValue.ofMb(30).getBytes(),
+                2,
+                1,
+                Map.of(),
+                0,
+                ByteSizeValue.ofMb(300).getBytes(),
+                ByteSizeValue.ofMb(10).getBytes()
+            );
+            AssignmentPlan.Deployment deployment3 = new AssignmentPlan.Deployment(
+                "m_3",
+                ByteSizeValue.ofMb(20).getBytes(),
+                4,
+                1,
+                Map.of(),
+                0,
+                ByteSizeValue.ofMb(300).getBytes(),
+                ByteSizeValue.ofMb(10).getBytes()
+            );
+            AssignmentPlan plan = AssignmentPlan.builder(List.of(node1, node2), List.of(deployment1, deployment2, deployment3))
+                .assignModelToNode(deployment1, node1, 1)
+                .assignModelToNode(deployment2, node2, 2)
+                .assignModelToNode(deployment3, node1, 2)
+                .assignModelToNode(deployment3, node2, 2)
+                .build();
+            assertThat(plan.satisfiesAllModels(), is(true));
+        }
     }
 
     public void testSatisfiesAllModels_GivenOneModelHasOneAllocationLess() {
-        Node node1 = new Node("n_1", 100, 4);
-        Node node2 = new Node("n_2", 100, 4);
-        AssignmentPlan.Deployment deployment1 = new AssignmentPlan.Deployment("m_1", 50, 1, 2, Map.of(), 0);
-        AssignmentPlan.Deployment deployment2 = new AssignmentPlan.Deployment("m_2", 30, 2, 1, Map.of(), 0);
-        Deployment deployment3 = new Deployment("m_3", 20, 4, 1, Map.of(), 0);
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Deployment deployment1 = new Deployment("m_1", ByteSizeValue.ofMb(50).getBytes(), 1, 2, Map.of(), 0, 0, 0);
+        Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(30).getBytes(), 2, 1, Map.of(), 0, 0, 0);
+        Deployment deployment3 = new Deployment("m_3", ByteSizeValue.ofMb(20).getBytes(), 4, 1, Map.of(), 0, 0, 0);
         AssignmentPlan plan = AssignmentPlan.builder(List.of(node1, node2), List.of(deployment1, deployment2, deployment3))
             .assignModelToNode(deployment1, node1, 1)
             .assignModelToNode(deployment2, node2, 2)
@@ -309,11 +571,11 @@ public void testSatisfiesAllModels_GivenOneModelHasOneAllocationLess() {
     }
 
     public void testArePreviouslyAssignedModelsAssigned_GivenTrue() {
-        Node node1 = new Node("n_1", 100, 4);
-        Node node2 = new Node("n_2", 100, 4);
-        AssignmentPlan.Deployment deployment1 = new AssignmentPlan.Deployment("m_1", 50, 1, 2, Map.of(), 3);
-        AssignmentPlan.Deployment deployment2 = new Deployment("m_2", 30, 2, 1, Map.of(), 4);
-        AssignmentPlan.Deployment deployment3 = new AssignmentPlan.Deployment("m_3", 20, 4, 1, Map.of(), 0);
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Deployment deployment1 = new Deployment("m_1", ByteSizeValue.ofMb(50).getBytes(), 1, 2, Map.of(), 3, 0, 0);
+        Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(30).getBytes(), 2, 1, Map.of(), 4, 0, 0);
+        Deployment deployment3 = new Deployment("m_3", ByteSizeValue.ofMb(20).getBytes(), 4, 1, Map.of(), 0, 0, 0);
         AssignmentPlan plan = AssignmentPlan.builder(List.of(node1, node2), List.of(deployment1, deployment2, deployment3))
             .assignModelToNode(deployment1, node1, 1)
             .assignModelToNode(deployment2, node2, 1)
@@ -322,10 +584,10 @@ public void testArePreviouslyAssignedModelsAssigned_GivenTrue() {
     }
 
     public void testArePreviouslyAssignedModelsAssigned_GivenFalse() {
-        Node node1 = new Node("n_1", 100, 4);
-        Node node2 = new Node("n_2", 100, 4);
-        AssignmentPlan.Deployment deployment1 = new Deployment("m_1", 50, 1, 2, Map.of(), 3);
-        AssignmentPlan.Deployment deployment2 = new AssignmentPlan.Deployment("m_2", 30, 2, 1, Map.of(), 4);
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Deployment deployment1 = new Deployment("m_1", ByteSizeValue.ofMb(50).getBytes(), 1, 2, Map.of(), 3, 0, 0);
+        Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(30).getBytes(), 2, 1, Map.of(), 4, 0, 0);
         AssignmentPlan plan = AssignmentPlan.builder(List.of(node1, node2), List.of(deployment1, deployment2))
             .assignModelToNode(deployment1, node1, 1)
             .build();
@@ -333,12 +595,39 @@ public void testArePreviouslyAssignedModelsAssigned_GivenFalse() {
     }
 
     public void testCountPreviouslyAssignedThatAreStillAssigned() {
-        Node node1 = new Node("n_1", 100, 4);
-        Node node2 = new Node("n_2", 100, 4);
-        Deployment deployment1 = new AssignmentPlan.Deployment("m_1", 50, 1, 2, Map.of(), 3);
-        AssignmentPlan.Deployment deployment2 = new AssignmentPlan.Deployment("m_2", 30, 2, 1, Map.of(), 4);
-        AssignmentPlan.Deployment deployment3 = new AssignmentPlan.Deployment("m_3", 20, 4, 1, Map.of(), 1);
-        AssignmentPlan.Deployment deployment4 = new AssignmentPlan.Deployment("m_4", 20, 4, 1, Map.of(), 0);
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Deployment deployment1 = new AssignmentPlan.Deployment("m_1", ByteSizeValue.ofMb(50).getBytes(), 1, 2, Map.of(), 3, 0, 0);
+        AssignmentPlan.Deployment deployment2 = new AssignmentPlan.Deployment(
+            "m_2",
+            ByteSizeValue.ofMb(30).getBytes(),
+            2,
+            1,
+            Map.of(),
+            4,
+            0,
+            0
+        );
+        AssignmentPlan.Deployment deployment3 = new AssignmentPlan.Deployment(
+            "m_3",
+            ByteSizeValue.ofMb(20).getBytes(),
+            4,
+            1,
+            Map.of(),
+            1,
+            0,
+            0
+        );
+        AssignmentPlan.Deployment deployment4 = new AssignmentPlan.Deployment(
+            "m_4",
+            ByteSizeValue.ofMb(20).getBytes(),
+            4,
+            1,
+            Map.of(),
+            0,
+            0,
+            0
+        );
         AssignmentPlan plan = AssignmentPlan.builder(List.of(node1, node2), List.of(deployment1, deployment2, deployment3, deployment4))
             .assignModelToNode(deployment1, node1, 1)
             .assignModelToNode(deployment2, node2, 1)
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlannerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlannerTests.java
index 82a291a8d9fb2..6a72ccf4c4445 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlannerTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlannerTests.java
@@ -33,50 +33,144 @@
 
 public class AssignmentPlannerTests extends ESTestCase {
 
+    private static long scaleNodeSize(long nodeMemory) {
+        // 240 Mb is the size in StartTrainedModelDeploymentAction.MEMORY_OVERHEAD
+        return ByteSizeValue.ofMb(240 + 2 * nodeMemory).getBytes();
+    }
+
     public void testModelThatDoesNotFitInMemory() {
-        List<Node> nodes = List.of(new Node("n_1", 100, 4));
-        Deployment deployment = new AssignmentPlan.Deployment("m_1", 101, 4, 1, Map.of(), 0);
-        AssignmentPlan plan = new AssignmentPlanner(nodes, List.of(deployment)).computePlan();
-        assertThat(plan.assignments(deployment).isEmpty(), is(true));
+        { // Without perDeploymentMemory and perAllocationMemory specified
+            List<Node> nodes = List.of(new Node("n_1", scaleNodeSize(50), 4));
+            Deployment deployment = new AssignmentPlan.Deployment("m_1", ByteSizeValue.ofMb(51).getBytes(), 4, 1, Map.of(), 0, 0, 0);
+            AssignmentPlan plan = new AssignmentPlanner(nodes, List.of(deployment)).computePlan();
+            assertThat(plan.assignments(deployment).isEmpty(), is(true));
+        }
+        { // With perDeploymentMemory and perAllocationMemory specified
+            List<Node> nodes = List.of(new Node("n_1", scaleNodeSize(55), 4));
+            Deployment deployment = new AssignmentPlan.Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(50).getBytes(),
+                4,
+                1,
+                Map.of(),
+                0,
+                ByteSizeValue.ofMb(250).getBytes(),
+                ByteSizeValue.ofMb(51).getBytes()
+            );
+            AssignmentPlan plan = new AssignmentPlanner(nodes, List.of(deployment)).computePlan();
+            assertThat(plan.assignments(deployment).isEmpty(), is(true));
+        }
     }
 
     public void testModelWithThreadsPerAllocationNotFittingOnAnyNode() {
-        List<Node> nodes = List.of(new Node("n_1", 100, 4), new Node("n_2", 100, 5));
-        Deployment deployment = new AssignmentPlan.Deployment("m_1", 1, 1, 6, Map.of(), 0);
+        List<Node> nodes = List.of(new Node("n_1", scaleNodeSize(100), 4), new Node("n_2", scaleNodeSize(100), 5));
+        Deployment deployment = new AssignmentPlan.Deployment("m_1", ByteSizeValue.ofMb(1).getBytes(), 1, 6, Map.of(), 0, 0, 0);
         AssignmentPlan plan = new AssignmentPlanner(nodes, List.of(deployment)).computePlan();
         assertThat(plan.assignments(deployment).isEmpty(), is(true));
     }
 
     public void testSingleModelThatFitsFullyOnSingleNode() {
         {
-            Node node = new Node("n_1", 100, 4);
-            Deployment deployment = new AssignmentPlan.Deployment("m_1", 100, 1, 1, Map.of(), 0);
+            Node node = new Node("n_1", scaleNodeSize(100), 4);
+            Deployment deployment = new AssignmentPlan.Deployment("m_1", ByteSizeValue.ofMb(100).getBytes(), 1, 1, Map.of(), 0, 0, 0);
+            AssignmentPlan plan = new AssignmentPlanner(List.of(node), List.of(deployment)).computePlan();
+            assertModelFullyAssignedToNode(plan, deployment, node);
+        }
+        {
+            Node node = new Node("n_1", scaleNodeSize(1000), 8);
+            Deployment deployment = new Deployment("m_1", ByteSizeValue.ofMb(1000).getBytes(), 8, 1, Map.of(), 0, 0, 0);
             AssignmentPlan plan = new AssignmentPlanner(List.of(node), List.of(deployment)).computePlan();
             assertModelFullyAssignedToNode(plan, deployment, node);
         }
         {
-            Node node = new Node("n_1", 1000, 8);
-            Deployment deployment = new Deployment("m_1", 1000, 8, 1, Map.of(), 0);
+            Node node = new Node("n_1", scaleNodeSize(10000), 16);
+            AssignmentPlan.Deployment deployment = new AssignmentPlan.Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(10000).getBytes(),
+                1,
+                16,
+                Map.of(),
+                0,
+                0,
+                0
+            );
             AssignmentPlan plan = new AssignmentPlanner(List.of(node), List.of(deployment)).computePlan();
             assertModelFullyAssignedToNode(plan, deployment, node);
         }
         {
-            Node node = new Node("n_1", 10000, 16);
-            AssignmentPlan.Deployment deployment = new AssignmentPlan.Deployment("m_1", 10000, 1, 16, Map.of(), 0);
+            Node node = new Node("n_1", scaleNodeSize(100), 4);
+            Deployment deployment = new AssignmentPlan.Deployment("m_1", ByteSizeValue.ofMb(100).getBytes(), 1, 1, Map.of(), 0, 0, 0);
+            AssignmentPlan plan = new AssignmentPlanner(List.of(node), List.of(deployment)).computePlan();
+            assertModelFullyAssignedToNode(plan, deployment, node);
+        }
+    }
+
+    public void testSingleModelThatFitsFullyOnSingleNode_NewMemoryFields() {
+        {
+            Node node = new Node("n_1", ByteSizeValue.ofMb(500).getBytes(), 4);
+            Deployment deployment = new AssignmentPlan.Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(100).getBytes(),
+                1,
+                1,
+                Map.of(),
+                0,
+                ByteSizeValue.ofMb(300).getBytes(),
+                ByteSizeValue.ofMb(100).getBytes()
+            );
+            AssignmentPlan plan = new AssignmentPlanner(List.of(node), List.of(deployment)).computePlan();
+            assertModelFullyAssignedToNode(plan, deployment, node);
+        }
+        {
+            Node node = new Node("n_1", ByteSizeValue.ofMb(1000).getBytes(), 8);
+            Deployment deployment = new Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(100).getBytes(),
+                8,
+                1,
+                Map.of(),
+                0,
+                ByteSizeValue.ofMb(100).getBytes(),
+                ByteSizeValue.ofMb(100).getBytes()
+            );
             AssignmentPlan plan = new AssignmentPlanner(List.of(node), List.of(deployment)).computePlan();
             assertModelFullyAssignedToNode(plan, deployment, node);
         }
     }
 
     public void testSingleModelThatFitsFullyOnSingleNode_GivenTwoNodes_ShouldBeFullyAssignedOnOneNode() {
-        Node node1 = new Node("n_1", 100, 4);
-        Node node2 = new Node("n_2", 100, 4);
-        AssignmentPlan.Deployment deployment = new Deployment("m_1", 100, 4, 1, Map.of(), 0);
+        Node node1 = new Node("n_1", scaleNodeSize(100), 4);
+        Node node2 = new Node("n_2", scaleNodeSize(100), 4);
+        AssignmentPlan.Deployment deployment = new Deployment("m_1", ByteSizeValue.ofMb(100).getBytes(), 4, 1, Map.of(), 0, 0, 0);
 
         AssignmentPlan plan = new AssignmentPlanner(List.of(node1, node2), List.of(deployment)).computePlan();
 
         Map<Node, Integer> assignments = plan.assignments(deployment).get();
-        if (assignments.get(node1) > 0) {
+        if (assignments.get(node1) != null) {
+            assertThat(assignments.get(node1), equalTo(4));
+        } else {
+            assertThat(assignments.get(node2), equalTo(4));
+        }
+    }
+
+    public void testSingleModelThatFitsFullyOnSingleNode_GivenTwoNodes_ShouldBeFullyAssignedOnOneNode_NewMemoryFields() {
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        AssignmentPlan.Deployment deployment = new Deployment(
+            "m_1",
+            ByteSizeValue.ofMb(100).getBytes(),
+            4,
+            1,
+            Map.of(),
+            0,
+            ByteSizeValue.ofMb(300).getBytes(),
+            ByteSizeValue.ofMb(150).getBytes()
+        );
+
+        AssignmentPlan plan = new AssignmentPlanner(List.of(node1, node2), List.of(deployment)).computePlan();
+
+        Map<Node, Integer> assignments = plan.assignments(deployment).get();
+        if (assignments.get(node1) != null) {
             assertThat(assignments.get(node1), equalTo(4));
         } else {
             assertThat(assignments.get(node2), equalTo(4));
@@ -84,10 +178,53 @@ public void testSingleModelThatFitsFullyOnSingleNode_GivenTwoNodes_ShouldBeFully
     }
 
     public void testModelWithMoreAllocationsThanAvailableCores_GivenSingleThreadPerAllocation() {
-        AssignmentPlan.Deployment deployment = new Deployment("m_1", 30, 10, 1, Map.of(), 0);
+        AssignmentPlan.Deployment deployment = new Deployment("m_1", ByteSizeValue.ofMb(30).getBytes(), 10, 1, Map.of(), 0, 0, 0);
+        // Single node
+        {
+            Node node = new Node("n_1", scaleNodeSize(100), 4);
+            AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node), List.of(deployment)).computePlan();
+            assertThat(assignmentPlan.assignments(deployment).isPresent(), is(true));
+            Map<Node, Integer> assignments = assignmentPlan.assignments(deployment).get();
+            assertThat(assignments.get(node), equalTo(4));
+        }
+        // Two nodes
+        {
+            Node node1 = new Node("n_1", scaleNodeSize(100), 4);
+            Node node2 = new Node("n_2", scaleNodeSize(100), 2);
+            AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node1, node2), List.of(deployment)).computePlan();
+            assertThat(assignmentPlan.assignments(deployment).isPresent(), is(true));
+            Map<Node, Integer> assignments = assignmentPlan.assignments(deployment).get();
+            assertThat(assignments.get(node1), equalTo(4));
+            assertThat(assignments.get(node2), equalTo(2));
+        }
+        // Three nodes
+        {
+            Node node1 = new Node("n_1", scaleNodeSize(100), 4);
+            Node node2 = new Node("n_2", scaleNodeSize(100), 2);
+            Node node3 = new Node("n_3", scaleNodeSize(100), 3);
+            AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node1, node2, node3), List.of(deployment)).computePlan();
+            assertThat(assignmentPlan.assignments(deployment).isPresent(), is(true));
+            Map<Node, Integer> assignments = assignmentPlan.assignments(deployment).get();
+            assertThat(assignments.get(node1), equalTo(4));
+            assertThat(assignments.get(node2), equalTo(2));
+            assertThat(assignments.get(node3), equalTo(3));
+        }
+    }
+
+    public void testModelWithMoreAllocationsThanAvailableCores_GivenSingleThreadPerAllocation_NewMemoryFields() {
+        AssignmentPlan.Deployment deployment = new Deployment(
+            "m_1",
+            ByteSizeValue.ofMb(100).getBytes(),
+            10,
+            1,
+            Map.of(),
+            0,
+            ByteSizeValue.ofMb(300).getBytes(),
+            ByteSizeValue.ofMb(100).getBytes()
+        );
         // Single node
         {
-            Node node = new Node("n_1", 100, 4);
+            Node node = new Node("n_1", ByteSizeValue.ofMb(800).getBytes(), 4);
             AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node), List.of(deployment)).computePlan();
             assertThat(assignmentPlan.assignments(deployment).isPresent(), is(true));
             Map<Node, Integer> assignments = assignmentPlan.assignments(deployment).get();
@@ -95,8 +232,8 @@ public void testModelWithMoreAllocationsThanAvailableCores_GivenSingleThreadPerA
         }
         // Two nodes
         {
-            Node node1 = new Node("n_1", 100, 4);
-            Node node2 = new Node("n_2", 100, 2);
+            Node node1 = new Node("n_1", ByteSizeValue.ofMb(800).getBytes(), 4);
+            Node node2 = new Node("n_2", ByteSizeValue.ofMb(600).getBytes(), 2);
             AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node1, node2), List.of(deployment)).computePlan();
             assertThat(assignmentPlan.assignments(deployment).isPresent(), is(true));
             Map<Node, Integer> assignments = assignmentPlan.assignments(deployment).get();
@@ -105,9 +242,9 @@ public void testModelWithMoreAllocationsThanAvailableCores_GivenSingleThreadPerA
         }
         // Three nodes
         {
-            Node node1 = new Node("n_1", 100, 4);
-            Node node2 = new Node("n_2", 100, 2);
-            Node node3 = new Node("n_3", 100, 3);
+            Node node1 = new Node("n_1", ByteSizeValue.ofMb(800).getBytes(), 4);
+            Node node2 = new Node("n_2", ByteSizeValue.ofMb(600).getBytes(), 2);
+            Node node3 = new Node("n_3", ByteSizeValue.ofMb(700).getBytes(), 3);
             AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node1, node2, node3), List.of(deployment)).computePlan();
             assertThat(assignmentPlan.assignments(deployment).isPresent(), is(true));
             Map<Node, Integer> assignments = assignmentPlan.assignments(deployment).get();
@@ -118,14 +255,105 @@ public void testModelWithMoreAllocationsThanAvailableCores_GivenSingleThreadPerA
     }
 
     public void testMultipleModelsAndNodesWithSingleSolution() {
-        Node node1 = new Node("n_1", 100, 7);
-        Node node2 = new Node("n_2", 100, 7);
-        Node node3 = new Node("n_3", 100, 2);
-        Node node4 = new Node("n_4", 100, 2);
-        Deployment deployment1 = new Deployment("m_1", 50, 2, 4, Map.of(), 0);
-        AssignmentPlan.Deployment deployment2 = new Deployment("m_2", 50, 2, 3, Map.of(), 0);
-        Deployment deployment3 = new AssignmentPlan.Deployment("m_3", 50, 1, 2, Map.of(), 0);
-        AssignmentPlan.Deployment deployment4 = new AssignmentPlan.Deployment("m_4", 50, 2, 1, Map.of(), 0);
+        Node node1 = new Node("n_1", 2 * scaleNodeSize(50), 7);
+        Node node2 = new Node("n_2", 2 * scaleNodeSize(50), 7);
+        Node node3 = new Node("n_3", 2 * scaleNodeSize(50), 2);
+        Node node4 = new Node("n_4", 2 * scaleNodeSize(50), 2);
+        Deployment deployment1 = new Deployment("m_1", ByteSizeValue.ofMb(50).getBytes(), 2, 4, Map.of(), 0, 0, 0);
+        Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(50).getBytes(), 2, 3, Map.of(), 0, 0, 0);
+        Deployment deployment3 = new Deployment("m_3", ByteSizeValue.ofMb(50).getBytes(), 1, 2, Map.of(), 0, 0, 0);
+        Deployment deployment4 = new Deployment("m_4", ByteSizeValue.ofMb(50).getBytes(), 2, 1, Map.of(), 0, 0, 0);
+
+        AssignmentPlan plan = new AssignmentPlanner(
+            List.of(node1, node2, node3, node4),
+            List.of(deployment1, deployment2, deployment3, deployment4)
+        ).computePlan();
+
+        {
+            assertThat(plan.assignments(deployment1).isPresent(), is(true));
+            Map<Node, Integer> assignments = plan.assignments(deployment1).get();
+            assertThat(assignments.get(node1), equalTo(1));
+            assertThat(assignments.get(node2), equalTo(1));
+            assertThat(assignments.get(node3), is(nullValue()));
+            assertThat(assignments.get(node4), is(nullValue()));
+        }
+        {
+            assertThat(plan.assignments(deployment2).isPresent(), is(true));
+            Map<Node, Integer> assignments = plan.assignments(deployment2).get();
+            assertThat(assignments.get(node1), equalTo(1));
+            assertThat(assignments.get(node2), equalTo(1));
+            assertThat(assignments.get(node3), is(nullValue()));
+            assertThat(assignments.get(node4), is(nullValue()));
+        }
+        {
+            assertThat(plan.assignments(deployment3).isPresent(), is(true));
+            Map<Node, Integer> assignments = plan.assignments(deployment3).get();
+            assertThat(assignments.get(node1), is(nullValue()));
+            assertThat(assignments.get(node2), is(nullValue()));
+            // Will either be on node 3 or 4
+            Node assignedNode = assignments.get(node3) != null ? node3 : node4;
+            Node otherNode = assignedNode.equals(node3) ? node4 : node3;
+            assertThat(assignments.get(assignedNode), equalTo(1));
+            assertThat(assignments.get(otherNode), is(nullValue()));
+        }
+        {
+            assertThat(plan.assignments(deployment4).isPresent(), is(true));
+            Map<Node, Integer> assignments = plan.assignments(deployment4).get();
+            assertThat(assignments.get(node1), is(nullValue()));
+            assertThat(assignments.get(node2), is(nullValue()));
+            // Will either be on node 3 or 4
+            Node assignedNode = assignments.get(node3) != null ? node3 : node4;
+            Node otherNode = assignedNode.equals(node3) ? node4 : node3;
+            assertThat(assignments.get(assignedNode), equalTo(2));
+            assertThat(assignments.get(otherNode), is(nullValue()));
+        }
+    }
+
+    public void testMultipleModelsAndNodesWithSingleSolution_NewMemoryFields() {
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(800).getBytes(), 7);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(800).getBytes(), 7);
+        Node node3 = new Node("n_3", ByteSizeValue.ofMb(900).getBytes(), 2);
+        Node node4 = new Node("n_4", ByteSizeValue.ofMb(900).getBytes(), 2);
+        Deployment deployment1 = new Deployment(
+            "m_1",
+            ByteSizeValue.ofMb(50).getBytes(),
+            2,
+            4,
+            Map.of(),
+            0,
+            ByteSizeValue.ofMb(300).getBytes(),
+            ByteSizeValue.ofMb(50).getBytes()
+        );
+        Deployment deployment2 = new Deployment(
+            "m_2",
+            ByteSizeValue.ofMb(50).getBytes(),
+            2,
+            3,
+            Map.of(),
+            0,
+            ByteSizeValue.ofMb(300).getBytes(),
+            ByteSizeValue.ofMb(50).getBytes()
+        );
+        Deployment deployment3 = new Deployment(
+            "m_3",
+            ByteSizeValue.ofMb(50).getBytes(),
+            1,
+            2,
+            Map.of(),
+            0,
+            ByteSizeValue.ofMb(300).getBytes(),
+            ByteSizeValue.ofMb(50).getBytes()
+        );
+        Deployment deployment4 = new Deployment(
+            "m_4",
+            ByteSizeValue.ofMb(50).getBytes(),
+            2,
+            1,
+            Map.of(),
+            0,
+            ByteSizeValue.ofMb(300).getBytes(),
+            ByteSizeValue.ofMb(50).getBytes()
+        );
 
         AssignmentPlan plan = new AssignmentPlanner(
             List.of(node1, node2, node3, node4),
@@ -173,10 +401,53 @@ public void testMultipleModelsAndNodesWithSingleSolution() {
     }
 
     public void testModelWithMoreAllocationsThanAvailableCores_GivenThreeThreadsPerAllocation() {
-        Deployment deployment = new AssignmentPlan.Deployment("m_1", 30, 10, 3, Map.of(), 0);
+        Deployment deployment = new AssignmentPlan.Deployment("m_1", ByteSizeValue.ofMb(30).getBytes(), 10, 3, Map.of(), 0, 0, 0);
+        // Single node
+        {
+            Node node = new Node("n_1", scaleNodeSize(100), 4);
+            AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node), List.of(deployment)).computePlan();
+            assertThat(assignmentPlan.assignments(deployment).isPresent(), is(true));
+            Map<Node, Integer> assignments = assignmentPlan.assignments(deployment).get();
+            assertThat(assignments.get(node), equalTo(1));
+        }
+        // Two nodes
+        {
+            Node node1 = new Node("n_1", scaleNodeSize(100), 4);
+            Node node2 = new Node("n_2", scaleNodeSize(100), 8);
+            AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node1, node2), List.of(deployment)).computePlan();
+            assertThat(assignmentPlan.assignments(deployment).isPresent(), is(true));
+            Map<Node, Integer> assignments = assignmentPlan.assignments(deployment).get();
+            assertThat(assignments.get(node1), equalTo(1));
+            assertThat(assignments.get(node2), equalTo(2));
+        }
+        // Three nodes
+        {
+            Node node1 = new Node("n_1", scaleNodeSize(100), 4);
+            Node node2 = new Node("n_2", scaleNodeSize(100), 7);
+            Node node3 = new Node("n_3", scaleNodeSize(100), 15);
+            AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node1, node2, node3), List.of(deployment)).computePlan();
+            assertThat(assignmentPlan.assignments(deployment).isPresent(), is(true));
+            Map<Node, Integer> assignments = assignmentPlan.assignments(deployment).get();
+            assertThat(assignments.get(node1), equalTo(1));
+            assertThat(assignments.get(node2), equalTo(2));
+            assertThat(assignments.get(node3), equalTo(5));
+        }
+    }
+
+    public void testModelWithMoreAllocationsThanAvailableCores_GivenThreeThreadsPerAllocation_NewMemoryFields() {
+        Deployment deployment = new AssignmentPlan.Deployment(
+            "m_1",
+            ByteSizeValue.ofMb(50).getBytes(),
+            10,
+            3,
+            Map.of(),
+            0,
+            ByteSizeValue.ofMb(300).getBytes(),
+            ByteSizeValue.ofMb(50).getBytes()
+        );
         // Single node
         {
-            Node node = new Node("n_1", 100, 4);
+            Node node = new Node("n_1", ByteSizeValue.ofMb(800).getBytes(), 4);
             AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node), List.of(deployment)).computePlan();
             assertThat(assignmentPlan.assignments(deployment).isPresent(), is(true));
             Map<Node, Integer> assignments = assignmentPlan.assignments(deployment).get();
@@ -184,8 +455,8 @@ public void testModelWithMoreAllocationsThanAvailableCores_GivenThreeThreadsPerA
         }
         // Two nodes
         {
-            Node node1 = new Node("n_1", 100, 4);
-            Node node2 = new Node("n_2", 100, 8);
+            Node node1 = new Node("n_1", ByteSizeValue.ofMb(800).getBytes(), 4);
+            Node node2 = new Node("n_2", ByteSizeValue.ofMb(800).getBytes(), 8);
             AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node1, node2), List.of(deployment)).computePlan();
             assertThat(assignmentPlan.assignments(deployment).isPresent(), is(true));
             Map<Node, Integer> assignments = assignmentPlan.assignments(deployment).get();
@@ -194,9 +465,9 @@ public void testModelWithMoreAllocationsThanAvailableCores_GivenThreeThreadsPerA
         }
         // Three nodes
         {
-            Node node1 = new Node("n_1", 100, 4);
-            Node node2 = new Node("n_2", 100, 7);
-            Node node3 = new Node("n_3", 100, 15);
+            Node node1 = new Node("n_1", ByteSizeValue.ofMb(800).getBytes(), 4);
+            Node node2 = new Node("n_2", ByteSizeValue.ofMb(800).getBytes(), 7);
+            Node node3 = new Node("n_3", ByteSizeValue.ofMb(800).getBytes(), 15);
             AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node1, node2, node3), List.of(deployment)).computePlan();
             assertThat(assignmentPlan.assignments(deployment).isPresent(), is(true));
             Map<Node, Integer> assignments = assignmentPlan.assignments(deployment).get();
@@ -207,8 +478,17 @@ public void testModelWithMoreAllocationsThanAvailableCores_GivenThreeThreadsPerA
     }
 
     public void testModelWithPreviousAssignmentAndNoMoreCoresAvailable() {
-        Node node = new Node("n_1", 100, 4);
-        AssignmentPlan.Deployment deployment = new AssignmentPlan.Deployment("m_1", 30, 4, 1, Map.of("n_1", 4), 0);
+        Node node = new Node("n_1", scaleNodeSize(100), 4);
+        AssignmentPlan.Deployment deployment = new AssignmentPlan.Deployment(
+            "m_1",
+            ByteSizeValue.ofMb(30).getBytes(),
+            4,
+            1,
+            Map.of("n_1", 4),
+            0,
+            0,
+            0
+        );
         AssignmentPlan plan = new AssignmentPlanner(List.of(node), List.of(deployment)).computePlan();
 
         assertThat(plan.assignments(deployment).isPresent(), is(true));
@@ -217,26 +497,117 @@ public void testModelWithPreviousAssignmentAndNoMoreCoresAvailable() {
 
     public void testFullCoreUtilization_GivenModelsWithSingleThreadPerAllocation() {
         List<Node> nodes = List.of(
-            new Node("n_1", ByteSizeValue.ofGb(6).getBytes(), 8),
-            new Node("n_2", ByteSizeValue.ofGb(6).getBytes(), 8),
-            new Node("n_3", ByteSizeValue.ofGb(6).getBytes(), 8),
-            new Node("n_4", ByteSizeValue.ofGb(6).getBytes(), 8),
-            new Node("n_5", ByteSizeValue.ofGb(16).getBytes(), 16),
-            new Node("n_6", ByteSizeValue.ofGb(8).getBytes(), 16)
+            new Node("n_1", ByteSizeValue.ofGb(18).getBytes(), 8),
+            new Node("n_2", ByteSizeValue.ofGb(18).getBytes(), 8),
+            new Node("n_3", ByteSizeValue.ofGb(18).getBytes(), 8),
+            new Node("n_4", ByteSizeValue.ofGb(18).getBytes(), 8),
+            new Node("n_5", ByteSizeValue.ofGb(64).getBytes(), 16),
+            new Node("n_6", ByteSizeValue.ofGb(32).getBytes(), 16)
         );
         List<Deployment> deployments = List.of(
-            new Deployment("m_1", ByteSizeValue.ofGb(4).getBytes(), 10, 1, Map.of("n_1", 5), 0),
-            new AssignmentPlan.Deployment("m_2", ByteSizeValue.ofGb(2).getBytes(), 3, 1, Map.of("n_3", 2), 0),
-            new AssignmentPlan.Deployment("m_3", ByteSizeValue.ofGb(3).getBytes(), 3, 1, Map.of(), 0),
-            new Deployment("m_4", ByteSizeValue.ofGb(1).getBytes(), 4, 1, Map.of("n_3", 2), 0),
-            new Deployment("m_5", ByteSizeValue.ofGb(6).getBytes(), 2, 1, Map.of(), 0),
-            new Deployment("m_6", ByteSizeValue.ofGb(1).getBytes(), 12, 1, Map.of(), 0),
-            new AssignmentPlan.Deployment("m_7", ByteSizeValue.ofGb(1).getBytes() / 2, 12, 1, Map.of("n_2", 6), 0),
-            new Deployment("m_8", ByteSizeValue.ofGb(2).getBytes(), 4, 1, Map.of(), 0),
-            new Deployment("m_9", ByteSizeValue.ofGb(1).getBytes(), 4, 1, Map.of(), 0),
-            new AssignmentPlan.Deployment("m_10", ByteSizeValue.ofGb(7).getBytes(), 7, 1, Map.of(), 0),
-            new Deployment("m_11", ByteSizeValue.ofGb(2).getBytes(), 3, 1, Map.of(), 0),
-            new Deployment("m_12", ByteSizeValue.ofGb(1).getBytes(), 10, 1, Map.of(), 0)
+            new Deployment("m_1", ByteSizeValue.ofGb(4).getBytes(), 10, 1, Map.of("n_1", 5), 0, 0, 0),
+            new AssignmentPlan.Deployment("m_2", ByteSizeValue.ofGb(2).getBytes(), 3, 1, Map.of("n_3", 2), 0, 0, 0),
+            new AssignmentPlan.Deployment("m_3", ByteSizeValue.ofGb(3).getBytes(), 3, 1, Map.of(), 0, 0, 0),
+            new Deployment("m_4", ByteSizeValue.ofGb(1).getBytes(), 4, 1, Map.of("n_3", 2), 0, 0, 0),
+            new Deployment("m_5", ByteSizeValue.ofGb(6).getBytes(), 2, 1, Map.of(), 0, 0, 0),
+            new Deployment("m_6", ByteSizeValue.ofGb(1).getBytes(), 12, 1, Map.of(), 0, 0, 0),
+            new AssignmentPlan.Deployment("m_7", ByteSizeValue.ofGb(1).getBytes() / 2, 12, 1, Map.of("n_2", 6), 0, 0, 0),
+            new Deployment("m_8", ByteSizeValue.ofGb(2).getBytes(), 4, 1, Map.of(), 0, 0, 0),
+            new Deployment("m_9", ByteSizeValue.ofGb(1).getBytes(), 4, 1, Map.of(), 0, 0, 0),
+            new AssignmentPlan.Deployment("m_10", ByteSizeValue.ofGb(7).getBytes(), 7, 1, Map.of(), 0, 0, 0),
+            new Deployment("m_11", ByteSizeValue.ofGb(2).getBytes(), 3, 1, Map.of(), 0, 0, 0),
+            new Deployment("m_12", ByteSizeValue.ofGb(1).getBytes(), 10, 1, Map.of(), 0, 0, 0)
+        );
+
+        AssignmentPlan assignmentPlan = new AssignmentPlanner(nodes, deployments).computePlan();
+
+        int usedCores = 0;
+        for (AssignmentPlan.Deployment m : deployments) {
+            Map<Node, Integer> assignments = assignmentPlan.assignments(m).orElse(Map.of());
+            usedCores += assignments.values().stream().mapToInt(Integer::intValue).sum();
+        }
+        assertThat(usedCores, equalTo(64));
+
+        assertPreviousAssignmentsAreSatisfied(deployments, assignmentPlan);
+    }
+
+    public void testFullCoreUtilization_GivenModelsWithSingleThreadPerAllocation_NewMemoryFields() {
+        List<Node> nodes = List.of(
+            new Node("n_1", ByteSizeValue.ofGb(18).getBytes(), 8),
+            new Node("n_2", ByteSizeValue.ofGb(18).getBytes(), 8),
+            new Node("n_3", ByteSizeValue.ofGb(18).getBytes(), 8),
+            new Node("n_4", ByteSizeValue.ofGb(18).getBytes(), 8),
+            new Node("n_5", ByteSizeValue.ofGb(64).getBytes(), 16),
+            new Node("n_6", ByteSizeValue.ofGb(32).getBytes(), 16)
+        );
+        // Use mix of old and new memory fields
+        List<Deployment> deployments = List.of(
+            new Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(100).getBytes(),
+                10,
+                1,
+                Map.of("n_1", 5),
+                0,
+                ByteSizeValue.ofMb(400).getBytes(),
+                ByteSizeValue.ofMb(100).getBytes()
+            ),
+            new Deployment("m_2", ByteSizeValue.ofMb(100).getBytes(), 3, 1, Map.of("n_3", 2), 0, 0, 0),
+            new Deployment(
+                "m_3",
+                ByteSizeValue.ofMb(50).getBytes(),
+                3,
+                1,
+                Map.of(),
+                0,
+                ByteSizeValue.ofMb(300).getBytes(),
+                ByteSizeValue.ofMb(50).getBytes()
+            ),
+            new Deployment(
+                "m_4",
+                ByteSizeValue.ofMb(50).getBytes(),
+                4,
+                1,
+                Map.of("n_3", 2),
+                0,
+                ByteSizeValue.ofMb(400).getBytes(),
+                ByteSizeValue.ofMb(100).getBytes()
+            ),
+            new Deployment(
+                "m_5",
+                ByteSizeValue.ofMb(500).getBytes(),
+                2,
+                1,
+                Map.of(),
+                0,
+                ByteSizeValue.ofMb(800).getBytes(),
+                ByteSizeValue.ofMb(100).getBytes()
+            ),
+            new Deployment(
+                "m_6",
+                ByteSizeValue.ofMb(50).getBytes(),
+                12,
+                1,
+                Map.of(),
+                0,
+                ByteSizeValue.ofMb(50).getBytes(),
+                ByteSizeValue.ofMb(20).getBytes()
+            ),
+            new Deployment(
+                "m_7",
+                ByteSizeValue.ofMb(50).getBytes(),
+                12,
+                1,
+                Map.of("n_2", 6),
+                0,
+                ByteSizeValue.ofMb(300).getBytes(),
+                ByteSizeValue.ofMb(50).getBytes()
+            ),
+            new Deployment("m_8", ByteSizeValue.ofGb(2).getBytes(), 4, 1, Map.of(), 0, 0, 0),
+            new Deployment("m_9", ByteSizeValue.ofGb(1).getBytes(), 4, 1, Map.of(), 0, 0, 0),
+            new Deployment("m_10", ByteSizeValue.ofGb(7).getBytes(), 7, 1, Map.of(), 0, 0, 0),
+            new Deployment("m_11", ByteSizeValue.ofGb(2).getBytes(), 3, 1, Map.of(), 0, 0, 0),
+            new Deployment("m_12", ByteSizeValue.ofGb(1).getBytes(), 10, 1, Map.of(), 0, 0, 0)
         );
 
         AssignmentPlan assignmentPlan = new AssignmentPlanner(nodes, deployments).computePlan();
@@ -297,6 +668,9 @@ public void testRandomBenchmark() {
             StopWatch stopWatch = new StopWatch();
             stopWatch.start();
             AssignmentPlan assignmentPlan = solver.computePlan();
+            for (Node node : nodes) {
+                assertThat(assignmentPlan.getRemainingNodeMemory(node.id()), greaterThanOrEqualTo(0L));
+            }
             stopWatch.stop();
 
             Quality quality = computeQuality(nodes, deployments, assignmentPlan);
@@ -336,7 +710,16 @@ public void testPreviousAssignmentsGetAtLeastAsManyAllocationsAfterAddingNewMode
                 .stream()
                 .collect(Collectors.toMap(e -> e.getKey().id(), Map.Entry::getValue));
             previousModelsPlusNew.add(
-                new AssignmentPlan.Deployment(m.id(), m.memoryBytes(), m.allocations(), m.threadsPerAllocation(), previousAssignments, 0)
+                new AssignmentPlan.Deployment(
+                    m.id(),
+                    m.memoryBytes(),
+                    m.allocations(),
+                    m.threadsPerAllocation(),
+                    previousAssignments,
+                    0,
+                    0,
+                    0
+                )
             );
         }
         previousModelsPlusNew.add(randomModel("new"));
@@ -347,18 +730,20 @@ public void testPreviousAssignmentsGetAtLeastAsManyAllocationsAfterAddingNewMode
     }
 
     public void testGivenLargerModelWithPreviousAssignmentsAndSmallerModelWithoutAssignments() {
-        Node node1 = new Node("n_1", ByteSizeValue.ofGb(2).getBytes(), 2);
-        Node node2 = new Node("n_2", ByteSizeValue.ofGb(2).getBytes(), 2);
-        Node node3 = new Node("n_3", ByteSizeValue.ofGb(2).getBytes(), 2);
+        Node node1 = new Node("n_1", scaleNodeSize(ByteSizeValue.ofGb(2).getMb()), 2);
+        Node node2 = new Node("n_2", scaleNodeSize(ByteSizeValue.ofGb(2).getMb()), 2);
+        Node node3 = new Node("n_3", scaleNodeSize(ByteSizeValue.ofGb(2).getMb()), 2);
         Deployment deployment1 = new AssignmentPlan.Deployment(
             "m_1",
             ByteSizeValue.ofMb(1200).getBytes(),
             3,
             1,
             Map.of("n_1", 2, "n_2", 1),
+            0,
+            0,
             0
         );
-        Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(1100).getBytes(), 2, 1, Map.of(), 0);
+        Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(1100).getBytes(), 2, 1, Map.of(), 0, 0, 0);
         AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node1, node2, node3), List.of(deployment1, deployment2))
             .computePlan();
         assertThat(assignmentPlan.getRemainingNodeMemory("n_1"), greaterThanOrEqualTo(0L));
@@ -381,15 +766,17 @@ public void testGivenLargerModelWithPreviousAssignmentsAndSmallerModelWithoutAss
     }
 
     public void testModelWithoutCurrentAllocationsGetsAssignedIfAllocatedPreviously() {
-        Node node1 = new Node("n_1", ByteSizeValue.ofGb(4).getBytes(), 2);
-        Node node2 = new Node("n_2", ByteSizeValue.ofGb(4).getBytes(), 2);
+        Node node1 = new Node("n_1", ByteSizeValue.ofGb(6).getBytes(), 2);
+        Node node2 = new Node("n_2", ByteSizeValue.ofGb(6).getBytes(), 2);
         AssignmentPlan.Deployment deployment1 = new Deployment(
             "m_1",
             ByteSizeValue.ofMb(1200).getBytes(),
             3,
             1,
             Map.of("n_1", 2, "n_2", 1),
-            3
+            3,
+            0,
+            0
         );
         AssignmentPlan.Deployment deployment2 = new AssignmentPlan.Deployment(
             "m_2",
@@ -397,35 +784,84 @@ public void testModelWithoutCurrentAllocationsGetsAssignedIfAllocatedPreviously(
             1,
             2,
             Map.of(),
-            1
+            1,
+            0,
+            0
         );
 
         AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node1, node2), List.of(deployment1, deployment2)).computePlan();
 
         Map<String, Map<String, Integer>> indexedBasedPlan = convertToIdIndexed(assignmentPlan);
         assertThat(indexedBasedPlan.keySet(), hasItems("m_1", "m_2"));
-        assertThat(indexedBasedPlan.get("m_1"), equalTo(Map.of("n_1", 2)));
-        assertThat(indexedBasedPlan.get("m_2"), equalTo(Map.of("n_2", 1)));
+        if (indexedBasedPlan.get("m_2").containsKey("n_1")) {
+            assertThat(indexedBasedPlan.get("m_1"), equalTo(Map.of("n_2", 2)));
+            assertThat(indexedBasedPlan.get("m_2"), equalTo(Map.of("n_1", 1)));
+        } else {
+            assertThat(indexedBasedPlan.get("m_1"), equalTo(Map.of("n_1", 2)));
+            assertThat(indexedBasedPlan.get("m_2"), equalTo(Map.of("n_2", 1)));
+        }
         assertThat(assignmentPlan.getRemainingNodeMemory("n_1"), greaterThanOrEqualTo(0L));
         assertThat(assignmentPlan.getRemainingNodeMemory("n_2"), greaterThanOrEqualTo(0L));
     }
 
     public void testGivenPreviouslyAssignedModels_CannotAllBeAllocated() {
-        Node node1 = new Node("n_1", ByteSizeValue.ofGb(2).getBytes(), 2);
-        AssignmentPlan.Deployment deployment1 = new Deployment("m_1", ByteSizeValue.ofMb(1200).getBytes(), 1, 1, Map.of(), 1);
-        AssignmentPlan.Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(1100).getBytes(), 1, 1, Map.of(), 1);
+        Node node1 = new Node("n_1", scaleNodeSize(ByteSizeValue.ofGb(2).getMb()), 2);
+        AssignmentPlan.Deployment deployment1 = new Deployment("m_1", ByteSizeValue.ofMb(1200).getBytes(), 1, 1, Map.of(), 1, 0, 0);
+        AssignmentPlan.Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(1100).getBytes(), 1, 1, Map.of(), 1, 0, 0);
 
         AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node1), List.of(deployment1, deployment2)).computePlan();
 
         assertThat(assignmentPlan.countPreviouslyAssignedModelsThatAreStillAssigned(), equalTo(1L));
     }
 
+    public void testGivenClusterResize_AllocationShouldNotExceedMemoryConstraints() {
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(1840).getBytes(), 2);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(2580).getBytes(), 2);
+        Deployment deployment1 = new Deployment("m_1", ByteSizeValue.ofMb(800).getBytes(), 2, 1, Map.of(), 0, 0, 0);
+        Deployment deployment2 = new AssignmentPlan.Deployment("m_2", ByteSizeValue.ofMb(800).getBytes(), 1, 1, Map.of(), 0, 0, 0);
+        Deployment deployment3 = new Deployment("m_3", ByteSizeValue.ofMb(250).getBytes(), 4, 1, Map.of(), 0, 0, 0);
+
+        // First only start m_1
+        AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node1, node2), List.of(deployment1)).computePlan();
+
+        Map<String, Map<String, Integer>> indexedBasedPlan = convertToIdIndexed(assignmentPlan);
+        assertThat(indexedBasedPlan.keySet(), hasItems("m_1"));
+        assertThat(indexedBasedPlan.get("m_1"), equalTo(Map.of("n_1", 2)));
+
+        // Then start m_2
+        assignmentPlan = new AssignmentPlanner(
+            List.of(node1, node2),
+            Stream.concat(createModelsFromPlan(assignmentPlan).stream(), Stream.of(deployment2)).toList()
+        ).computePlan();
+
+        indexedBasedPlan = convertToIdIndexed(assignmentPlan);
+        assertThat(indexedBasedPlan.keySet(), hasItems("m_1", "m_2"));
+        assertThat(indexedBasedPlan.get("m_1"), equalTo(Map.of("n_1", 2)));
+        assertThat(indexedBasedPlan.get("m_2"), equalTo(Map.of("n_2", 1)));
+
+        // Then start m_3
+        assignmentPlan = new AssignmentPlanner(
+            List.of(node1, node2),
+            Stream.concat(createModelsFromPlan(assignmentPlan).stream(), Stream.of(deployment3)).toList()
+        ).computePlan();
+
+        indexedBasedPlan = convertToIdIndexed(assignmentPlan);
+        assertThat(indexedBasedPlan.keySet(), hasItems("m_1", "m_2", "m_3"));
+        assertThat(indexedBasedPlan.get("m_1"), equalTo(Map.of("n_1", 2)));
+        assertThat(indexedBasedPlan.get("m_2"), equalTo(Map.of("n_2", 1)));
+        assertThat(indexedBasedPlan.get("m_3"), equalTo(Map.of("n_2", 1)));
+
+        // First, one node goes away.
+        assignmentPlan = new AssignmentPlanner(List.of(node1), createModelsFromPlan(assignmentPlan)).computePlan();
+        assertThat(assignmentPlan.getRemainingNodeMemory("n_1"), greaterThanOrEqualTo(0L));
+    }
+
     public void testGivenClusterResize_ShouldAllocateEachModelAtLeastOnce() {
-        Node node1 = new Node("n_1", ByteSizeValue.ofMb(1200).getBytes(), 2);
-        Node node2 = new Node("n_2", ByteSizeValue.ofMb(1200).getBytes(), 2);
-        Deployment deployment1 = new Deployment("m_1", ByteSizeValue.ofMb(800).getBytes(), 2, 1, Map.of(), 0);
-        Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(800).getBytes(), 1, 1, Map.of(), 0);
-        Deployment deployment3 = new Deployment("m_3", ByteSizeValue.ofMb(250).getBytes(), 4, 1, Map.of(), 0);
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(2600).getBytes(), 2);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(2600).getBytes(), 2);
+        Deployment deployment1 = new Deployment("m_1", ByteSizeValue.ofMb(800).getBytes(), 2, 1, Map.of(), 0, 0, 0);
+        Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(800).getBytes(), 1, 1, Map.of(), 0, 0, 0);
+        Deployment deployment3 = new Deployment("m_3", ByteSizeValue.ofMb(250).getBytes(), 4, 1, Map.of(), 0, 0, 0);
 
         // First only start m_1
         AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node1, node2), List.of(deployment1)).computePlan();
@@ -458,8 +894,8 @@ public void testGivenClusterResize_ShouldAllocateEachModelAtLeastOnce() {
         assertThat(indexedBasedPlan.get("m_3"), equalTo(Map.of("n_2", 1)));
 
         // Now the cluster starts getting resized.
-        Node node3 = new Node("n_3", ByteSizeValue.ofMb(2400).getBytes(), 2);
-        Node node4 = new Node("n_4", ByteSizeValue.ofMb(2400).getBytes(), 2);
+        Node node3 = new Node("n_3", ByteSizeValue.ofMb(2600).getBytes(), 2);
+        Node node4 = new Node("n_4", ByteSizeValue.ofMb(2600).getBytes(), 2);
 
         // First, one node goes away.
         assignmentPlan = new AssignmentPlanner(List.of(node1), createModelsFromPlan(assignmentPlan)).computePlan();
@@ -492,11 +928,65 @@ public void testGivenClusterResize_ShouldAllocateEachModelAtLeastOnce() {
 
     public void testGivenClusterResize_ShouldRemoveAllocatedModels() {
         // Ensure that plan is removing previously allocated models if not enough memory is available
-        Node node1 = new Node("n_1", ByteSizeValue.ofMb(1200).getBytes(), 2);
-        Node node2 = new Node("n_2", ByteSizeValue.ofMb(1200).getBytes(), 2);
-        Deployment deployment1 = new Deployment("m_1", ByteSizeValue.ofMb(800).getBytes(), 2, 1, Map.of(), 0);
-        Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(800).getBytes(), 1, 1, Map.of(), 0);
-        Deployment deployment3 = new Deployment("m_3", ByteSizeValue.ofMb(250).getBytes(), 1, 1, Map.of(), 0);
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(1840).getBytes(), 2);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(2580).getBytes(), 2);
+        Deployment deployment1 = new Deployment("m_1", ByteSizeValue.ofMb(800).getBytes(), 2, 1, Map.of(), 0, 0, 0);
+        Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(800).getBytes(), 1, 1, Map.of(), 0, 0, 0);
+        Deployment deployment3 = new Deployment("m_3", ByteSizeValue.ofMb(250).getBytes(), 1, 1, Map.of(), 0, 0, 0);
+
+        // Create a plan where all deployments are assigned at least once
+        AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node1, node2), List.of(deployment1, deployment2, deployment3))
+            .computePlan();
+        Map<String, Map<String, Integer>> indexedBasedPlan = convertToIdIndexed(assignmentPlan);
+        assertThat(indexedBasedPlan.keySet(), hasItems("m_1", "m_2", "m_3"));
+        assertThat(indexedBasedPlan.get("m_1"), equalTo(Map.of("n_1", 2)));
+        assertThat(indexedBasedPlan.get("m_2"), equalTo(Map.of("n_2", 1)));
+        assertThat(indexedBasedPlan.get("m_3"), equalTo(Map.of("n_2", 1)));
+        assertThat(assignmentPlan.getRemainingNodeMemory(node1.id()), greaterThanOrEqualTo(0L));
+        assertThat(assignmentPlan.getRemainingNodeMemory(node2.id()), greaterThanOrEqualTo(0L));
+
+        // Now the cluster starts getting resized. Ensure that resources are not over-allocated.
+        assignmentPlan = new AssignmentPlanner(List.of(node1), createModelsFromPlan(assignmentPlan)).computePlan();
+        assertThat(indexedBasedPlan.get("m_1"), equalTo(Map.of("n_1", 2)));
+        assertThat(assignmentPlan.getRemainingNodeMemory(node1.id()), greaterThanOrEqualTo(0L));
+        assertThat(assignmentPlan.getRemainingNodeCores(node1.id()), greaterThanOrEqualTo(0));
+
+    }
+
+    public void testGivenClusterResize_ShouldRemoveAllocatedModels_NewMemoryFields() {
+        // Ensure that plan is removing previously allocated models if not enough memory is available
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(700).getBytes(), 2);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(1000).getBytes(), 2);
+        Deployment deployment1 = new Deployment(
+            "m_1",
+            ByteSizeValue.ofMb(100).getBytes(),
+            2,
+            1,
+            Map.of(),
+            0,
+            ByteSizeValue.ofMb(400).getBytes(),
+            ByteSizeValue.ofMb(100).getBytes()
+        );
+        Deployment deployment2 = new Deployment(
+            "m_2",
+            ByteSizeValue.ofMb(100).getBytes(),
+            1,
+            1,
+            Map.of(),
+            0,
+            ByteSizeValue.ofMb(400).getBytes(),
+            ByteSizeValue.ofMb(150).getBytes()
+        );
+        Deployment deployment3 = new Deployment(
+            "m_3",
+            ByteSizeValue.ofMb(50).getBytes(),
+            1,
+            1,
+            Map.of(),
+            0,
+            ByteSizeValue.ofMb(250).getBytes(),
+            ByteSizeValue.ofMb(50).getBytes()
+        );
 
         // Create a plan where all deployments are assigned at least once
         AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node1, node2), List.of(deployment1, deployment2, deployment3))
@@ -536,7 +1026,9 @@ public static List<Deployment> createModelsFromPlan(AssignmentPlan plan) {
                     m.allocations(),
                     m.threadsPerAllocation(),
                     currentAllocations,
-                    Math.max(m.maxAssignedAllocations(), totalAllocations)
+                    Math.max(m.maxAssignedAllocations(), totalAllocations),
+                    0,
+                    0
                 )
             );
         }
@@ -579,7 +1071,7 @@ public static List<Node> randomNodes(int scale, String nodeIdPrefix) {
         for (int i = 0; i < 1 + 3 * scale; i++) {
             int cores = randomIntBetween(2, 32);
             long memBytesPerCore = randomFrom(memBytesPerCoreValues);
-            nodes.add(new Node(nodeIdPrefix + "n_" + i, cores * memBytesPerCore, cores));
+            nodes.add(new Node(nodeIdPrefix + "n_" + i, scaleNodeSize(ByteSizeValue.ofBytes(cores * memBytesPerCore).getMb()), cores));
         }
         return nodes;
     }
@@ -594,14 +1086,30 @@ public static List<Deployment> randomModels(int scale, double load) {
 
     public static Deployment randomModel(String idSuffix) {
         int allocations = randomIntBetween(1, 32);
-        return new Deployment(
-            "m_" + idSuffix,
-            randomLongBetween(ByteSizeValue.ofMb(100).getBytes(), ByteSizeValue.ofGb(10).getBytes()),
-            randomIntBetween(1, 32),
-            randomIntBetween(1, 4),
-            Map.of(),
-            0
-        );
+        // randomly choose between old and new memory fields format
+        if (randomBoolean()) {
+            return new Deployment(
+                "m_" + idSuffix,
+                randomLongBetween(ByteSizeValue.ofMb(100).getBytes(), ByteSizeValue.ofGb(10).getBytes()),
+                randomIntBetween(1, 32),
+                randomIntBetween(1, 4),
+                Map.of(),
+                0,
+                0,
+                0
+            );
+        } else {
+            return new Deployment(
+                "m_" + idSuffix,
+                randomLongBetween(ByteSizeValue.ofMb(100).getBytes(), ByteSizeValue.ofGb(1).getBytes()),
+                randomIntBetween(1, 32),
+                randomIntBetween(1, 4),
+                Map.of(),
+                0,
+                randomLongBetween(ByteSizeValue.ofMb(100).getBytes(), ByteSizeValue.ofGb(1).getBytes()),
+                randomLongBetween(ByteSizeValue.ofMb(100).getBytes(), ByteSizeValue.ofGb(1).getBytes())
+            );
+        }
     }
 
     public static void assertPreviousAssignmentsAreSatisfied(List<AssignmentPlan.Deployment> deployments, AssignmentPlan assignmentPlan) {
@@ -628,7 +1136,7 @@ private void runTooManyNodesAndModels(int nodesSize, int modelsSize) {
         }
         List<Deployment> deployments = new ArrayList<>();
         for (int i = 0; i < modelsSize; i++) {
-            deployments.add(new Deployment("m_" + i, ByteSizeValue.ofMb(200).getBytes(), 2, 1, Map.of(), 0));
+            deployments.add(new Deployment("m_" + i, ByteSizeValue.ofMb(200).getBytes(), 2, 1, Map.of(), 0, 0, 0));
         }
 
         // Check plan is computed without OOM exception
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/PreserveAllAllocationsTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/PreserveAllAllocationsTests.java
index 4a9b01e535d88..c45ce36394109 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/PreserveAllAllocationsTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/PreserveAllAllocationsTests.java
@@ -7,6 +7,7 @@
 
 package org.elasticsearch.xpack.ml.inference.assignment.planning;
 
+import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.xpack.ml.inference.assignment.planning.AssignmentPlan.Deployment;
 import org.elasticsearch.xpack.ml.inference.assignment.planning.AssignmentPlan.Node;
@@ -14,7 +15,6 @@
 import java.util.List;
 import java.util.Map;
 
-import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.hasSize;
 import static org.hamcrest.Matchers.is;
@@ -22,77 +22,179 @@
 public class PreserveAllAllocationsTests extends ESTestCase {
 
     public void testGivenNoPreviousAssignments() {
-        Node node1 = new Node("n_1", 100, 4);
-        Node node2 = new Node("n_2", 100, 4);
-        Deployment deployment1 = new Deployment("m_1", 30, 2, 1, Map.of(), 0);
-        Deployment deployment2 = new Deployment("m_2", 30, 2, 4, Map.of(), 0);
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(440).getBytes(), 4);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(440).getBytes(), 4);
+        Deployment deployment1 = new Deployment("m_1", ByteSizeValue.ofMb(30).getBytes(), 2, 1, Map.of(), 0, 0, 0);
+        Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(30).getBytes(), 2, 4, Map.of(), 0, 0, 0);
         PreserveAllAllocations preserveAllAllocations = new PreserveAllAllocations(
             List.of(node1, node2),
             List.of(deployment1, deployment2)
         );
-
-        List<Node> nodesPreservingAllocations = preserveAllAllocations.nodesPreservingAllocations();
-        assertThat(nodesPreservingAllocations, contains(node1, node2));
-
-        List<AssignmentPlan.Deployment> modelsPreservingAllocations = preserveAllAllocations.modelsPreservingAllocations();
-        assertThat(modelsPreservingAllocations, contains(deployment1, deployment2));
     }
 
     public void testGivenPreviousAssignments() {
-        Node node1 = new Node("n_1", 100, 8);
-        Node node2 = new Node("n_2", 100, 8);
-        Deployment deployment1 = new AssignmentPlan.Deployment("m_1", 30, 2, 1, Map.of("n_1", 1), 1);
-        Deployment deployment2 = new Deployment("m_2", 50, 6, 4, Map.of("n_1", 1, "n_2", 2), 3);
-        PreserveAllAllocations preserveAllAllocations = new PreserveAllAllocations(
-            List.of(node1, node2),
-            List.of(deployment1, deployment2)
-        );
-
-        List<Node> nodesPreservingAllocations = preserveAllAllocations.nodesPreservingAllocations();
-        assertThat(nodesPreservingAllocations, hasSize(2));
-
-        assertThat(nodesPreservingAllocations.get(0).id(), equalTo("n_1"));
-        assertThat(nodesPreservingAllocations.get(0).availableMemoryBytes(), equalTo(20L));
-        assertThat(nodesPreservingAllocations.get(0).cores(), equalTo(3));
-
-        assertThat(nodesPreservingAllocations.get(1).id(), equalTo("n_2"));
-        assertThat(nodesPreservingAllocations.get(1).availableMemoryBytes(), equalTo(50L));
-        assertThat(nodesPreservingAllocations.get(1).cores(), equalTo(0));
-
-        List<AssignmentPlan.Deployment> modelsPreservingAllocations = preserveAllAllocations.modelsPreservingAllocations();
-        assertThat(modelsPreservingAllocations, hasSize(2));
-
-        assertThat(modelsPreservingAllocations.get(0).id(), equalTo("m_1"));
-        assertThat(modelsPreservingAllocations.get(0).memoryBytes(), equalTo(30L));
-        assertThat(modelsPreservingAllocations.get(0).allocations(), equalTo(1));
-        assertThat(modelsPreservingAllocations.get(0).threadsPerAllocation(), equalTo(1));
-        assertThat(modelsPreservingAllocations.get(0).currentAllocationsByNodeId(), equalTo(Map.of("n_1", 0)));
-
-        assertThat(modelsPreservingAllocations.get(1).id(), equalTo("m_2"));
-        assertThat(modelsPreservingAllocations.get(1).memoryBytes(), equalTo(50L));
-        assertThat(modelsPreservingAllocations.get(1).allocations(), equalTo(3));
-        assertThat(modelsPreservingAllocations.get(1).threadsPerAllocation(), equalTo(4));
-        assertThat(modelsPreservingAllocations.get(1).currentAllocationsByNodeId(), equalTo(Map.of("n_1", 0, "n_2", 0)));
-
-        AssignmentPlan plan = AssignmentPlan.builder(List.of(node1, node2), List.of(deployment1, deployment2))
-            .assignModelToNode(deployment1, node1, 2)
-            .build();
-        assertThat(plan.assignments(deployment1).get(), equalTo(Map.of(node1, 2)));
-        assertThat(plan.assignments(deployment2).isEmpty(), is(true));
-
-        plan = preserveAllAllocations.mergePreservedAllocations(plan);
-
-        assertThat(plan.assignments(deployment1).get(), equalTo(Map.of(node1, 3)));
-        assertThat(plan.assignments(deployment2).get(), equalTo(Map.of(node1, 1, node2, 2)));
-        assertThat(plan.getRemainingNodeMemory("n_1"), equalTo(20L));
-        assertThat(plan.getRemainingNodeCores("n_1"), equalTo(1));
-        assertThat(plan.getRemainingNodeMemory("n_2"), equalTo(50L));
-        assertThat(plan.getRemainingNodeCores("n_2"), equalTo(0));
+        {
+            // old memory format
+            Node node1 = new Node("n_1", ByteSizeValue.ofMb(640).getBytes(), 8);
+            Node node2 = new Node("n_2", ByteSizeValue.ofMb(640).getBytes(), 8);
+            Deployment deployment1 = new AssignmentPlan.Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(30).getBytes(),
+                2,
+                1,
+                Map.of("n_1", 1),
+                1,
+                0,
+                0
+            );
+            Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(50).getBytes(), 6, 4, Map.of("n_1", 1, "n_2", 2), 3, 0, 0);
+            PreserveAllAllocations preserveAllAllocations = new PreserveAllAllocations(
+                List.of(node1, node2),
+                List.of(deployment1, deployment2)
+            );
+
+            List<Node> nodesPreservingAllocations = preserveAllAllocations.nodesPreservingAllocations();
+            assertThat(nodesPreservingAllocations, hasSize(2));
+
+            assertThat(nodesPreservingAllocations.get(0).id(), equalTo("n_1"));
+            // 640 - [(2*30 + 240) + (2*50 + 240)] = 0: deployments use 640 MB on the node 1
+            assertThat(nodesPreservingAllocations.get(0).availableMemoryBytes(), equalTo(0L));
+            // 8 - (1*1+1*4) = 3 : deployments use 5 cores on the node
+            assertThat(nodesPreservingAllocations.get(0).cores(), equalTo(3));
+
+            assertThat(nodesPreservingAllocations.get(1).id(), equalTo("n_2"));
+            // 640 - (50*2+240) = 300 : deployments use 340MB on the node
+            assertThat(nodesPreservingAllocations.get(1).availableMemoryBytes(), equalTo(ByteSizeValue.ofMb(300).getBytes()));
+            // 8 - (2*4) = 0 : preserving all allocation2 of deployment 2 should use 8 cores on the node
+            assertThat(nodesPreservingAllocations.get(1).cores(), equalTo(0));
+
+            List<AssignmentPlan.Deployment> modelsPreservingAllocations = preserveAllAllocations.modelsPreservingAllocations();
+            assertThat(modelsPreservingAllocations, hasSize(2));
+
+            assertThat(modelsPreservingAllocations.get(0).id(), equalTo("m_1"));
+            assertThat(modelsPreservingAllocations.get(0).memoryBytes(), equalTo(ByteSizeValue.ofMb(30).getBytes()));
+            assertThat(modelsPreservingAllocations.get(0).allocations(), equalTo(1));
+            assertThat(modelsPreservingAllocations.get(0).threadsPerAllocation(), equalTo(1));
+            assertThat(modelsPreservingAllocations.get(0).currentAllocationsByNodeId(), equalTo(Map.of("n_1", 0)));
+
+            assertThat(modelsPreservingAllocations.get(1).id(), equalTo("m_2"));
+            assertThat(modelsPreservingAllocations.get(1).memoryBytes(), equalTo(ByteSizeValue.ofMb(50).getBytes()));
+            assertThat(modelsPreservingAllocations.get(1).allocations(), equalTo(3));
+            assertThat(modelsPreservingAllocations.get(1).threadsPerAllocation(), equalTo(4));
+            assertThat(modelsPreservingAllocations.get(1).currentAllocationsByNodeId(), equalTo(Map.of("n_1", 0, "n_2", 0)));
+
+            // Now we have a plan with 2 deployments assigned to 2 nodes.
+            // Note that deployment 1 has already 1 allocation on node 1, and it gets 2 more. It's more than 2 allocations defined during
+            // initialization of deployment1, but we don't care at this point.
+            AssignmentPlan plan = AssignmentPlan.builder(List.of(node1, node2), List.of(deployment1, deployment2))
+                .assignModelToNode(deployment1, node1, 2)
+                .build();
+            assertThat(plan.assignments(deployment1).get(), equalTo(Map.of(node1, 2)));
+            assertThat(plan.assignments(deployment2).isEmpty(), is(true));
+
+            plan = preserveAllAllocations.mergePreservedAllocations(plan);
+            assertThat(plan.assignments(deployment1).get(), equalTo(Map.of(node1, 3)));
+            assertThat(plan.assignments(deployment2).get(), equalTo(Map.of(node1, 1, node2, 2)));
+
+            // Node 1 already had deployments 1 and 2 assigned to it so adding more allocation doesn't change memory usage.
+            assertThat(plan.getRemainingNodeMemory("n_1"), equalTo(0L));
+            // 8 - ((1*1+1*4) + 2*1) = 1 : deployments use 7 cores on the node
+            assertThat(plan.getRemainingNodeCores("n_1"), equalTo(1));
+            // Nothing changed for Node 2
+            assertThat(plan.getRemainingNodeMemory("n_2"), equalTo(ByteSizeValue.ofMb(300).getBytes()));
+            // Nothing changed for Node 2
+            assertThat(plan.getRemainingNodeCores("n_2"), equalTo(0));
+        }
+        {
+            // new memory format
+            Node node1 = new Node("n_1", ByteSizeValue.ofMb(1000).getBytes(), 8);
+            Node node2 = new Node("n_2", ByteSizeValue.ofMb(1000).getBytes(), 8);
+            Deployment deployment1 = new AssignmentPlan.Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(30).getBytes(),
+                2,
+                1,
+                Map.of("n_1", 1),
+                1,
+                ByteSizeValue.ofMb(300).getBytes(),
+                ByteSizeValue.ofMb(10).getBytes()
+            );
+            Deployment deployment2 = new Deployment(
+                "m_2",
+                ByteSizeValue.ofMb(50).getBytes(),
+                6,
+                4,
+                Map.of("n_1", 1, "n_2", 2),
+                3,
+                ByteSizeValue.ofMb(300).getBytes(),
+                ByteSizeValue.ofMb(10).getBytes()
+            );
+            PreserveAllAllocations preserveAllAllocations = new PreserveAllAllocations(
+                List.of(node1, node2),
+                List.of(deployment1, deployment2)
+            );
+
+            List<Node> nodesPreservingAllocations = preserveAllAllocations.nodesPreservingAllocations();
+            assertThat(nodesPreservingAllocations, hasSize(2));
+
+            assertThat(nodesPreservingAllocations.get(0).id(), equalTo("n_1"));
+            // 1000 - [(30 + 300+10) + (50 + 300 + 10)] = 300: deployments use 700 MB on the node 1
+            assertThat(nodesPreservingAllocations.get(0).availableMemoryBytes(), equalTo(ByteSizeValue.ofMb(300).getBytes()));
+            // 8 - (1*1+1*4) = 3 : deployments use 5 cores on the node
+            assertThat(nodesPreservingAllocations.get(0).cores(), equalTo(3));
+
+            assertThat(nodesPreservingAllocations.get(1).id(), equalTo("n_2"));
+            // 1000 - (50 + 300 + 2*10) = 630 : deployments use 370MB on the node
+            assertThat(nodesPreservingAllocations.get(1).availableMemoryBytes(), equalTo(ByteSizeValue.ofMb(630).getBytes()));
+            // 8 - (2*4) = 0 : preserving all allocation2 of deployment 2 should use 8 cores on the node
+            assertThat(nodesPreservingAllocations.get(1).cores(), equalTo(0));
+
+            List<AssignmentPlan.Deployment> modelsPreservingAllocations = preserveAllAllocations.modelsPreservingAllocations();
+            assertThat(modelsPreservingAllocations, hasSize(2));
+
+            assertThat(modelsPreservingAllocations.get(0).id(), equalTo("m_1"));
+            assertThat(modelsPreservingAllocations.get(0).memoryBytes(), equalTo(ByteSizeValue.ofMb(30).getBytes()));
+            assertThat(modelsPreservingAllocations.get(0).perDeploymentMemoryBytes(), equalTo(ByteSizeValue.ofMb(300).getBytes()));
+            assertThat(modelsPreservingAllocations.get(0).perAllocationMemoryBytes(), equalTo(ByteSizeValue.ofMb(10).getBytes()));
+            assertThat(modelsPreservingAllocations.get(0).allocations(), equalTo(1));
+            assertThat(modelsPreservingAllocations.get(0).threadsPerAllocation(), equalTo(1));
+            assertThat(modelsPreservingAllocations.get(0).currentAllocationsByNodeId(), equalTo(Map.of("n_1", 0)));
+
+            assertThat(modelsPreservingAllocations.get(1).id(), equalTo("m_2"));
+            assertThat(modelsPreservingAllocations.get(1).memoryBytes(), equalTo(ByteSizeValue.ofMb(50).getBytes()));
+            assertThat(modelsPreservingAllocations.get(1).perDeploymentMemoryBytes(), equalTo(ByteSizeValue.ofMb(300).getBytes()));
+            assertThat(modelsPreservingAllocations.get(1).perAllocationMemoryBytes(), equalTo(ByteSizeValue.ofMb(10).getBytes()));
+            assertThat(modelsPreservingAllocations.get(1).allocations(), equalTo(3));
+            assertThat(modelsPreservingAllocations.get(1).threadsPerAllocation(), equalTo(4));
+            assertThat(modelsPreservingAllocations.get(1).currentAllocationsByNodeId(), equalTo(Map.of("n_1", 0, "n_2", 0)));
+
+            // Now we have a plan with 2 deployments assigned to 2 nodes.
+            // Note that deployment 1 has already 1 allocation on node 1, and it gets 2 more. It's more than 2 allocations defined during
+            // initialization of deployment1, but we don't care at this point.
+            AssignmentPlan plan = AssignmentPlan.builder(List.of(node1, node2), List.of(deployment1, deployment2))
+                .assignModelToNode(deployment1, node1, 2)
+                .build();
+            assertThat(plan.assignments(deployment1).get(), equalTo(Map.of(node1, 2)));
+            assertThat(plan.assignments(deployment2).isEmpty(), is(true));
+
+            plan = preserveAllAllocations.mergePreservedAllocations(plan);
+            assertThat(plan.assignments(deployment1).get(), equalTo(Map.of(node1, 3)));
+            assertThat(plan.assignments(deployment2).get(), equalTo(Map.of(node1, 1, node2, 2)));
+
+            // 1000 - ((30 + 300 + 3*10) + (50 + 300 + 10)) = 280 : deployments use 720 MB on the node 1
+            assertThat(plan.getRemainingNodeMemory("n_1"), equalTo(ByteSizeValue.ofMb(280).getBytes()));
+            // 8 - ((1*1+1*4) + 2*1) = 1 : deployments use 7 cores on the node
+            assertThat(plan.getRemainingNodeCores("n_1"), equalTo(1));
+            // Nothing changed for Node 2
+            assertThat(plan.getRemainingNodeMemory("n_2"), equalTo(ByteSizeValue.ofMb(630).getBytes()));
+            // Nothing changed for Node 2
+            assertThat(plan.getRemainingNodeCores("n_2"), equalTo(0));
+        }
     }
 
     public void testGivenModelWithPreviousAssignments_AndPlanToMergeHasNoAssignments() {
-        Node node = new Node("n_1", 100, 4);
-        AssignmentPlan.Deployment deployment = new Deployment("m_1", 30, 2, 2, Map.of("n_1", 2), 2);
+        Node node = new Node("n_1", ByteSizeValue.ofMb(400).getBytes(), 4);
+        Deployment deployment = new Deployment("m_1", ByteSizeValue.ofMb(30).getBytes(), 2, 2, Map.of("n_1", 2), 2, 0, 0);
         PreserveAllAllocations preserveAllAllocations = new PreserveAllAllocations(List.of(node), List.of(deployment));
 
         AssignmentPlan plan = AssignmentPlan.builder(List.of(node), List.of(deployment)).build();
@@ -101,7 +203,7 @@ public void testGivenModelWithPreviousAssignments_AndPlanToMergeHasNoAssignments
         plan = preserveAllAllocations.mergePreservedAllocations(plan);
         assertThat(plan.assignments(deployment).isPresent(), is(true));
         assertThat(plan.assignments(deployment).get(), equalTo(Map.of(node, 2)));
-        assertThat(plan.getRemainingNodeMemory("n_1"), equalTo(70L));
+        assertThat(plan.getRemainingNodeMemory("n_1"), equalTo(ByteSizeValue.ofMb(100).getBytes()));
         assertThat(plan.getRemainingNodeCores("n_1"), equalTo(0));
     }
 }
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/PreserveOneAllocationTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/PreserveOneAllocationTests.java
index d8c3b09422e92..f646bf5cb2e9d 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/PreserveOneAllocationTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/PreserveOneAllocationTests.java
@@ -7,6 +7,7 @@
 
 package org.elasticsearch.xpack.ml.inference.assignment.planning;
 
+import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.xpack.ml.inference.assignment.planning.AssignmentPlan.Deployment;
 import org.elasticsearch.xpack.ml.inference.assignment.planning.AssignmentPlan.Node;
@@ -22,10 +23,10 @@
 public class PreserveOneAllocationTests extends ESTestCase {
 
     public void testGivenNoPreviousAssignments() {
-        Node node1 = new Node("n_1", 100, 4);
-        Node node2 = new Node("n_2", 100, 4);
-        Deployment deployment1 = new AssignmentPlan.Deployment("m_1", 30, 2, 1, Map.of(), 0);
-        AssignmentPlan.Deployment deployment2 = new Deployment("m_2", 30, 2, 4, Map.of(), 0);
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(440).getBytes(), 4);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(440).getBytes(), 4);
+        Deployment deployment1 = new AssignmentPlan.Deployment("m_1", ByteSizeValue.ofMb(30).getBytes(), 2, 1, Map.of(), 0, 0, 0);
+        AssignmentPlan.Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(30).getBytes(), 2, 4, Map.of(), 0, 0, 0);
         PreserveOneAllocation preserveOneAllocation = new PreserveOneAllocation(List.of(node1, node2), List.of(deployment1, deployment2));
 
         List<Node> nodesPreservingAllocations = preserveOneAllocation.nodesPreservingAllocations();
@@ -36,67 +37,204 @@ public void testGivenNoPreviousAssignments() {
     }
 
     public void testGivenPreviousAssignments() {
-        Node node1 = new Node("n_1", 100, 8);
-        Node node2 = new Node("n_2", 100, 8);
-        AssignmentPlan.Deployment deployment1 = new AssignmentPlan.Deployment("m_1", 30, 2, 1, Map.of("n_1", 1), 1);
-        AssignmentPlan.Deployment deployment2 = new Deployment("m_2", 50, 6, 4, Map.of("n_1", 1, "n_2", 2), 3);
-        PreserveOneAllocation preserveOneAllocation = new PreserveOneAllocation(List.of(node1, node2), List.of(deployment1, deployment2));
-
-        List<Node> nodesPreservingAllocations = preserveOneAllocation.nodesPreservingAllocations();
-        assertThat(nodesPreservingAllocations, hasSize(2));
-
-        assertThat(nodesPreservingAllocations.get(0).id(), equalTo("n_1"));
-        assertThat(nodesPreservingAllocations.get(0).availableMemoryBytes(), equalTo(20L));
-        assertThat(nodesPreservingAllocations.get(0).cores(), equalTo(3));
-
-        assertThat(nodesPreservingAllocations.get(1).id(), equalTo("n_2"));
-        assertThat(nodesPreservingAllocations.get(1).availableMemoryBytes(), equalTo(50L));
-        assertThat(nodesPreservingAllocations.get(1).cores(), equalTo(4));
-
-        List<AssignmentPlan.Deployment> modelsPreservingAllocations = preserveOneAllocation.modelsPreservingAllocations();
-        assertThat(modelsPreservingAllocations, hasSize(2));
-
-        assertThat(modelsPreservingAllocations.get(0).id(), equalTo("m_1"));
-        assertThat(modelsPreservingAllocations.get(0).memoryBytes(), equalTo(30L));
-        assertThat(modelsPreservingAllocations.get(0).allocations(), equalTo(1));
-        assertThat(modelsPreservingAllocations.get(0).threadsPerAllocation(), equalTo(1));
-        assertThat(modelsPreservingAllocations.get(0).currentAllocationsByNodeId(), equalTo(Map.of("n_1", 0)));
-
-        assertThat(modelsPreservingAllocations.get(1).id(), equalTo("m_2"));
-        assertThat(modelsPreservingAllocations.get(1).memoryBytes(), equalTo(50L));
-        assertThat(modelsPreservingAllocations.get(1).allocations(), equalTo(4));
-        assertThat(modelsPreservingAllocations.get(1).threadsPerAllocation(), equalTo(4));
-        assertThat(modelsPreservingAllocations.get(1).currentAllocationsByNodeId(), equalTo(Map.of("n_1", 0, "n_2", 1)));
-
-        AssignmentPlan plan = AssignmentPlan.builder(List.of(node1, node2), List.of(deployment1, deployment2))
-            .assignModelToNode(deployment1, node1, 2)
-            .assignModelToNode(deployment2, node2, 1)
-            .build();
-        assertThat(plan.assignments(deployment1).get(), equalTo(Map.of(node1, 2)));
-        assertThat(plan.assignments(deployment2).get(), equalTo(Map.of(node2, 1)));
-
-        plan = preserveOneAllocation.mergePreservedAllocations(plan);
-
-        assertThat(plan.assignments(deployment1).get(), equalTo(Map.of(node1, 3)));
-        assertThat(plan.assignments(deployment2).get(), equalTo(Map.of(node1, 1, node2, 2)));
-        assertThat(plan.getRemainingNodeMemory("n_1"), equalTo(20L));
-        assertThat(plan.getRemainingNodeCores("n_1"), equalTo(1));
-        assertThat(plan.getRemainingNodeMemory("n_2"), equalTo(50L));
-        assertThat(plan.getRemainingNodeCores("n_2"), equalTo(0));
+        {
+            // old memory format
+            Node node1 = new Node("n_1", ByteSizeValue.ofMb(640).getBytes(), 8);
+            Node node2 = new Node("n_2", ByteSizeValue.ofMb(640).getBytes(), 8);
+            Deployment deployment1 = new Deployment("m_1", ByteSizeValue.ofMb(30).getBytes(), 2, 1, Map.of("n_1", 1), 1, 0, 0);
+            Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(50).getBytes(), 6, 4, Map.of("n_1", 1, "n_2", 2), 3, 0, 0);
+            PreserveOneAllocation preserveOneAllocation = new PreserveOneAllocation(
+                List.of(node1, node2),
+                List.of(deployment1, deployment2)
+            );
+
+            List<Node> nodesPreservingAllocations = preserveOneAllocation.nodesPreservingAllocations();
+            assertThat(nodesPreservingAllocations, hasSize(2));
+
+            assertThat(nodesPreservingAllocations.get(0).id(), equalTo("n_1"));
+            // 640 - [(30*2+240)+(50*2+240)] = 0 : deployments use all memory on the node
+            assertThat(nodesPreservingAllocations.get(0).availableMemoryBytes(), equalTo(0L));
+            // 8 - (1*1+1*4) = 3 : deployments use 5 cores on the node
+            assertThat(nodesPreservingAllocations.get(0).cores(), equalTo(3));
+
+            assertThat(nodesPreservingAllocations.get(1).id(), equalTo("n_2"));
+            // 640 - (50*2+240) = 300 : deployments use 340MB on the node
+            assertThat(nodesPreservingAllocations.get(1).availableMemoryBytes(), equalTo(ByteSizeValue.ofMb(300).getBytes()));
+            // 8 - (1*4) = 4 : preserving 1 allocation of deployment 2 should use 4 cores on the node
+            assertThat(nodesPreservingAllocations.get(1).cores(), equalTo(4));
+
+            List<AssignmentPlan.Deployment> modelsPreservingAllocations = preserveOneAllocation.modelsPreservingAllocations();
+            assertThat(modelsPreservingAllocations, hasSize(2));
+
+            assertThat(modelsPreservingAllocations.get(0).id(), equalTo("m_1"));
+            assertThat(modelsPreservingAllocations.get(0).memoryBytes(), equalTo(ByteSizeValue.ofMb(30).getBytes()));
+            assertThat(modelsPreservingAllocations.get(0).perDeploymentMemoryBytes(), equalTo(ByteSizeValue.ofMb(0).getBytes()));
+            assertThat(modelsPreservingAllocations.get(0).perAllocationMemoryBytes(), equalTo(ByteSizeValue.ofMb(0).getBytes()));
+            assertThat(modelsPreservingAllocations.get(0).allocations(), equalTo(1));
+            assertThat(modelsPreservingAllocations.get(0).threadsPerAllocation(), equalTo(1));
+            assertThat(modelsPreservingAllocations.get(0).currentAllocationsByNodeId(), equalTo(Map.of("n_1", 0)));
+
+            assertThat(modelsPreservingAllocations.get(1).id(), equalTo("m_2"));
+            assertThat(modelsPreservingAllocations.get(1).memoryBytes(), equalTo(ByteSizeValue.ofMb(50).getBytes()));
+            assertThat(modelsPreservingAllocations.get(1).perDeploymentMemoryBytes(), equalTo(ByteSizeValue.ofMb(0).getBytes()));
+            assertThat(modelsPreservingAllocations.get(1).perAllocationMemoryBytes(), equalTo(ByteSizeValue.ofMb(0).getBytes()));
+            assertThat(modelsPreservingAllocations.get(1).allocations(), equalTo(4));
+            assertThat(modelsPreservingAllocations.get(1).threadsPerAllocation(), equalTo(4));
+            assertThat(modelsPreservingAllocations.get(1).currentAllocationsByNodeId(), equalTo(Map.of("n_1", 0, "n_2", 1)));
+
+            // Now we have a plan with 2 deployments assigned to 2 nodes.
+            // Note that deployment 1 has already 1 allocation on node 1, and it gets 2 more. It's more than 2 allocations defined during
+            // initialization of deployment1, but we don't care at this point.
+            AssignmentPlan plan = AssignmentPlan.builder(List.of(node1, node2), List.of(deployment1, deployment2))
+                .assignModelToNode(deployment1, node1, 2)
+                .assignModelToNode(deployment2, node2, 1)
+                .build();
+            assertThat(plan.assignments(deployment1).get(), equalTo(Map.of(node1, 2)));
+            assertThat(plan.assignments(deployment2).get(), equalTo(Map.of(node2, 1)));
+
+            plan = preserveOneAllocation.mergePreservedAllocations(plan);
+
+            assertThat(plan.assignments(deployment1).get(), equalTo(Map.of(node1, 3)));
+            assertThat(plan.assignments(deployment2).get(), equalTo(Map.of(node1, 1, node2, 2)));
+            // Node 1 already had deployments 1 and 2 assigned to it so adding more allocation doesn't change memory usage.
+            assertThat(plan.getRemainingNodeMemory("n_1"), equalTo(0L));
+            // 8 - ((1*1+1*4) + 2*1) = 1 : deployments use 7 cores on the node
+            assertThat(plan.getRemainingNodeCores("n_1"), equalTo(1));
+            // Node 2 already had deployment 2 assigned to it so adding more allocation doesn't change memory usage.
+            assertThat(plan.getRemainingNodeMemory("n_2"), equalTo(ByteSizeValue.ofMb(300).getBytes()));
+            // 8 - [(1*4) + (1*4)] = 4 : deployment 2 should use all cores on the node
+            assertThat(plan.getRemainingNodeCores("n_2"), equalTo(0));
+        }
+        {
+            // new memory format
+            Node node1 = new Node("n_1", ByteSizeValue.ofMb(1000).getBytes(), 8);
+            Node node2 = new Node("n_2", ByteSizeValue.ofMb(1000).getBytes(), 8);
+            Deployment deployment1 = new Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(30).getBytes(),
+                2,
+                1,
+                Map.of("n_1", 1),
+                1,
+                ByteSizeValue.ofMb(300).getBytes(),
+                ByteSizeValue.ofMb(10).getBytes()
+            );
+            Deployment deployment2 = new Deployment(
+                "m_2",
+                ByteSizeValue.ofMb(50).getBytes(),
+                6,
+                4,
+                Map.of("n_1", 1, "n_2", 2),
+                3,
+                ByteSizeValue.ofMb(300).getBytes(),
+                ByteSizeValue.ofMb(10).getBytes()
+            );
+            PreserveOneAllocation preserveOneAllocation = new PreserveOneAllocation(
+                List.of(node1, node2),
+                List.of(deployment1, deployment2)
+            );
+
+            List<Node> nodesPreservingAllocations = preserveOneAllocation.nodesPreservingAllocations();
+            assertThat(nodesPreservingAllocations, hasSize(2));
+
+            assertThat(nodesPreservingAllocations.get(0).id(), equalTo("n_1"));
+            // 1000 - [(30+300+10)+(50 + 300 +10)] = 300 : deployments use 700 memory on the node
+            assertThat(nodesPreservingAllocations.get(0).availableMemoryBytes(), equalTo(ByteSizeValue.ofMb(300).getBytes()));
+            // 8 - (1*1+1*4) = 3 : deployments use 5 cores on the node
+            assertThat(nodesPreservingAllocations.get(0).cores(), equalTo(3));
+
+            assertThat(nodesPreservingAllocations.get(1).id(), equalTo("n_2"));
+            // 1000 - (50 +300 + 2*10) = 630 : deployments use 340MB on the node
+            assertThat(nodesPreservingAllocations.get(1).availableMemoryBytes(), equalTo(ByteSizeValue.ofMb(630).getBytes()));
+            // 8 - (1*4) = 0 : preserving 1 allocation of deployment 2 should use 4 cores on the node
+            assertThat(nodesPreservingAllocations.get(1).cores(), equalTo(4));
+
+            List<AssignmentPlan.Deployment> modelsPreservingAllocations = preserveOneAllocation.modelsPreservingAllocations();
+            assertThat(modelsPreservingAllocations, hasSize(2));
+
+            assertThat(modelsPreservingAllocations.get(0).id(), equalTo("m_1"));
+            assertThat(modelsPreservingAllocations.get(0).memoryBytes(), equalTo(ByteSizeValue.ofMb(30).getBytes()));
+            assertThat(modelsPreservingAllocations.get(0).perDeploymentMemoryBytes(), equalTo(ByteSizeValue.ofMb(300).getBytes()));
+            assertThat(modelsPreservingAllocations.get(0).perAllocationMemoryBytes(), equalTo(ByteSizeValue.ofMb(10).getBytes()));
+            assertThat(modelsPreservingAllocations.get(0).allocations(), equalTo(1));
+            assertThat(modelsPreservingAllocations.get(0).threadsPerAllocation(), equalTo(1));
+            assertThat(modelsPreservingAllocations.get(0).currentAllocationsByNodeId(), equalTo(Map.of("n_1", 0)));
+
+            assertThat(modelsPreservingAllocations.get(1).id(), equalTo("m_2"));
+            assertThat(modelsPreservingAllocations.get(1).memoryBytes(), equalTo(ByteSizeValue.ofMb(50).getBytes()));
+            assertThat(modelsPreservingAllocations.get(1).perDeploymentMemoryBytes(), equalTo(ByteSizeValue.ofMb(300).getBytes()));
+            assertThat(modelsPreservingAllocations.get(1).perAllocationMemoryBytes(), equalTo(ByteSizeValue.ofMb(10).getBytes()));
+            assertThat(modelsPreservingAllocations.get(1).allocations(), equalTo(4));
+            assertThat(modelsPreservingAllocations.get(1).threadsPerAllocation(), equalTo(4));
+            assertThat(modelsPreservingAllocations.get(1).currentAllocationsByNodeId(), equalTo(Map.of("n_1", 0, "n_2", 1)));
+
+            // Now we have a plan with 2 deployments assigned to 2 nodes.
+            // Note that deployment 1 has already 1 allocation on node 1, and it gets 2 more. It's more than 2 allocations defined during
+            // initialization of deployment1, but we don't care at this point.
+            AssignmentPlan plan = AssignmentPlan.builder(List.of(node1, node2), List.of(deployment1, deployment2))
+                .assignModelToNode(deployment1, node1, 2)
+                .assignModelToNode(deployment2, node2, 1)
+                .build();
+            assertThat(plan.assignments(deployment1).get(), equalTo(Map.of(node1, 2)));
+            assertThat(plan.assignments(deployment2).get(), equalTo(Map.of(node2, 1)));
+
+            plan = preserveOneAllocation.mergePreservedAllocations(plan);
+
+            assertThat(plan.assignments(deployment1).get(), equalTo(Map.of(node1, 3)));
+            assertThat(plan.assignments(deployment2).get(), equalTo(Map.of(node1, 1, node2, 2)));
+            // 1000 - [(30+300+3*10) + (50+300+10)] = 280 : deployments use 720MB on the node
+            assertThat(plan.getRemainingNodeMemory("n_1"), equalTo(ByteSizeValue.ofMb(280).getBytes()));
+            // 8 - ((1*1+1*4) + 2*1) = 1 : deployments use 7 cores on the node
+            assertThat(plan.getRemainingNodeCores("n_1"), equalTo(1));
+            // 1000 - (50 + 300 + 2*10) = 630 : deployments use 370MB on the node
+            assertThat(plan.getRemainingNodeMemory("n_2"), equalTo(ByteSizeValue.ofMb(630).getBytes()));
+            // 8 - [(1*4) + (1*4)] = 4 : deployment 2 should use all cores on the node
+            assertThat(plan.getRemainingNodeCores("n_2"), equalTo(0));
+
+        }
     }
 
     public void testGivenModelWithPreviousAssignments_AndPlanToMergeHasNoAssignments() {
-        Node node = new Node("n_1", 100, 4);
-        AssignmentPlan.Deployment deployment = new Deployment("m_1", 30, 2, 2, Map.of("n_1", 2), 2);
-        PreserveOneAllocation preserveOneAllocation = new PreserveOneAllocation(List.of(node), List.of(deployment));
-
-        AssignmentPlan plan = AssignmentPlan.builder(List.of(node), List.of(deployment)).build();
-        assertThat(plan.assignments(deployment).isEmpty(), is(true));
-
-        plan = preserveOneAllocation.mergePreservedAllocations(plan);
-        assertThat(plan.assignments(deployment).isPresent(), is(true));
-        assertThat(plan.assignments(deployment).get(), equalTo(Map.of(node, 1)));
-        assertThat(plan.getRemainingNodeMemory("n_1"), equalTo(70L));
-        assertThat(plan.getRemainingNodeCores("n_1"), equalTo(2));
+        {
+            // old memory format
+            Node node = new Node("n_1", ByteSizeValue.ofMb(400).getBytes(), 4);
+            Deployment deployment = new Deployment("m_1", ByteSizeValue.ofMb(30).getBytes(), 2, 2, Map.of("n_1", 2), 2, 0, 0);
+            PreserveOneAllocation preserveOneAllocation = new PreserveOneAllocation(List.of(node), List.of(deployment));
+
+            AssignmentPlan plan = AssignmentPlan.builder(List.of(node), List.of(deployment)).build();
+            assertThat(plan.assignments(deployment).isEmpty(), is(true));
+
+            plan = preserveOneAllocation.mergePreservedAllocations(plan);
+            assertThat(plan.assignments(deployment).isPresent(), is(true));
+            assertThat(plan.assignments(deployment).get(), equalTo(Map.of(node, 1)));
+            // 400 - (30*2 + 240) = 100 : deployments use 300MB on the node
+            assertThat(plan.getRemainingNodeMemory("n_1"), equalTo(ByteSizeValue.ofMb(100).getBytes()));
+            assertThat(plan.getRemainingNodeCores("n_1"), equalTo(2));
+        }
+        {
+            // new memory format
+            Node node = new Node("n_1", ByteSizeValue.ofMb(400).getBytes(), 4);
+            Deployment deployment = new Deployment(
+                "m_1",
+                ByteSizeValue.ofMb(30).getBytes(),
+                2,
+                2,
+                Map.of("n_1", 2),
+                2,
+                ByteSizeValue.ofMb(300).getBytes(),
+                ByteSizeValue.ofMb(10).getBytes()
+            );
+            PreserveOneAllocation preserveOneAllocation = new PreserveOneAllocation(List.of(node), List.of(deployment));
+
+            AssignmentPlan plan = AssignmentPlan.builder(List.of(node), List.of(deployment)).build();
+            assertThat(plan.assignments(deployment).isEmpty(), is(true));
+
+            plan = preserveOneAllocation.mergePreservedAllocations(plan);
+            assertThat(plan.assignments(deployment).isPresent(), is(true));
+            assertThat(plan.assignments(deployment).get(), equalTo(Map.of(node, 1)));
+            // 400 - (30 + 300 + 10) = 60 : deployments use 340MB on the node
+            assertThat(plan.getRemainingNodeMemory("n_1"), equalTo(ByteSizeValue.ofMb(60).getBytes()));
+            assertThat(plan.getRemainingNodeCores("n_1"), equalTo(2));
+        }
     }
 }
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlannerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlannerTests.java
index 7ceb8bbb86869..651e4764cb894 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlannerTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlannerTests.java
@@ -36,7 +36,7 @@ public class ZoneAwareAssignmentPlannerTests extends ESTestCase {
 
     public void testGivenOneModel_OneNode_OneZone_DoesNotFit() {
         Node node = new Node("n_1", 100, 1);
-        AssignmentPlan.Deployment deployment = new AssignmentPlan.Deployment("m_1", 100, 1, 2, Map.of(), 0);
+        AssignmentPlan.Deployment deployment = new AssignmentPlan.Deployment("m_1", 100, 1, 2, Map.of(), 0, 0, 0);
 
         AssignmentPlan plan = new ZoneAwareAssignmentPlanner(Map.of(List.of(), List.of(node)), List.of(deployment)).computePlan();
 
@@ -44,8 +44,17 @@ public void testGivenOneModel_OneNode_OneZone_DoesNotFit() {
     }
 
     public void testGivenOneModel_OneNode_OneZone_FullyFits() {
-        Node node = new Node("n_1", 100, 4);
-        AssignmentPlan.Deployment deployment = new AssignmentPlan.Deployment("m_1", 100, 2, 2, Map.of(), 0);
+        Node node = new Node("n_1", ByteSizeValue.ofMb(440).getBytes(), 4);
+        AssignmentPlan.Deployment deployment = new AssignmentPlan.Deployment(
+            "m_1",
+            ByteSizeValue.ofMb(100).getBytes(),
+            2,
+            2,
+            Map.of(),
+            0,
+            0,
+            0
+        );
 
         AssignmentPlan plan = new ZoneAwareAssignmentPlanner(Map.of(List.of(), List.of(node)), List.of(deployment)).computePlan();
 
@@ -53,8 +62,17 @@ public void testGivenOneModel_OneNode_OneZone_FullyFits() {
     }
 
     public void testGivenOneModel_OneNode_OneZone_PartiallyFits() {
-        Node node = new Node("n_1", 100, 5);
-        AssignmentPlan.Deployment deployment = new AssignmentPlan.Deployment("m_1", 100, 3, 2, Map.of(), 0);
+        Node node = new Node("n_1", ByteSizeValue.ofMb(440).getBytes(), 5);
+        AssignmentPlan.Deployment deployment = new AssignmentPlan.Deployment(
+            "m_1",
+            ByteSizeValue.ofMb(100).getBytes(),
+            3,
+            2,
+            Map.of(),
+            0,
+            0,
+            0
+        );
 
         AssignmentPlan plan = new ZoneAwareAssignmentPlanner(Map.of(List.of(), List.of(node)), List.of(deployment)).computePlan();
 
@@ -64,9 +82,18 @@ public void testGivenOneModel_OneNode_OneZone_PartiallyFits() {
     }
 
     public void testGivenOneModelWithSingleAllocation_OneNode_TwoZones() {
-        Node node1 = new Node("n_1", 100, 4);
-        Node node2 = new Node("n_2", 100, 4);
-        AssignmentPlan.Deployment deployment = new AssignmentPlan.Deployment("m_1", 100, 1, 2, Map.of(), 0);
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(440).getBytes(), 4);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(440).getBytes(), 4);
+        AssignmentPlan.Deployment deployment = new AssignmentPlan.Deployment(
+            "m_1",
+            ByteSizeValue.ofMb(100).getBytes(),
+            1,
+            2,
+            Map.of(),
+            0,
+            0,
+            0
+        );
 
         AssignmentPlan plan = new ZoneAwareAssignmentPlanner(
             Map.of(List.of("z1"), List.of(node1), List.of("z2"), List.of(node2)),
@@ -82,9 +109,18 @@ public void testGivenOneModelWithSingleAllocation_OneNode_TwoZones() {
     }
 
     public void testGivenOneModel_OneNodePerZone_TwoZones_FullyFits() {
-        Node node1 = new Node("n_1", 100, 4);
-        Node node2 = new Node("n_2", 100, 4);
-        AssignmentPlan.Deployment deployment = new AssignmentPlan.Deployment("m_1", 100, 2, 2, Map.of(), 0);
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(440).getBytes(), 4);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(440).getBytes(), 4);
+        AssignmentPlan.Deployment deployment = new AssignmentPlan.Deployment(
+            "m_1",
+            ByteSizeValue.ofMb(100).getBytes(),
+            2,
+            2,
+            Map.of(),
+            0,
+            0,
+            0
+        );
 
         AssignmentPlan plan = new ZoneAwareAssignmentPlanner(
             Map.of(List.of("z_1"), List.of(node1), List.of("z_2"), List.of(node2)),
@@ -99,9 +135,18 @@ public void testGivenOneModel_OneNodePerZone_TwoZones_FullyFits() {
     }
 
     public void testGivenOneModel_OneNodePerZone_TwoZones_PartiallyFits() {
-        Node node1 = new Node("n_1", 100, 4);
-        Node node2 = new Node("n_2", 100, 4);
-        AssignmentPlan.Deployment deployment = new AssignmentPlan.Deployment("m_1", 100, 3, 3, Map.of(), 0);
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(440).getBytes(), 4);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(440).getBytes(), 4);
+        AssignmentPlan.Deployment deployment = new AssignmentPlan.Deployment(
+            "m_1",
+            ByteSizeValue.ofMb(100).getBytes(),
+            3,
+            3,
+            Map.of(),
+            0,
+            0,
+            0
+        );
 
         AssignmentPlan plan = new ZoneAwareAssignmentPlanner(
             Map.of(List.of("z_1"), List.of(node1), List.of("z_2"), List.of(node2)),
@@ -117,15 +162,15 @@ public void testGivenOneModel_OneNodePerZone_TwoZones_PartiallyFits() {
     }
 
     public void testGivenThreeModels_TwoNodesPerZone_ThreeZones_FullyFit() {
-        Node node1 = new Node("n_1", 100, 4);
-        Node node2 = new Node("n_2", 100, 4);
-        Node node3 = new Node("n_3", 100, 4);
-        Node node4 = new Node("n_4", 100, 4);
-        Node node5 = new Node("n_5", 100, 4);
-        Node node6 = new Node("n_6", 100, 4);
-        AssignmentPlan.Deployment deployment1 = new AssignmentPlan.Deployment("m_1", 25, 4, 1, Map.of(), 0);
-        Deployment deployment2 = new AssignmentPlan.Deployment("m_2", 25, 6, 2, Map.of(), 0);
-        AssignmentPlan.Deployment deployment3 = new AssignmentPlan.Deployment("m_3", 25, 2, 3, Map.of(), 0);
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Node node3 = new Node("n_3", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Node node4 = new Node("n_4", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Node node5 = new Node("n_5", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Node node6 = new Node("n_6", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Deployment deployment1 = new Deployment("m_1", ByteSizeValue.ofMb(30).getBytes(), 4, 1, Map.of(), 0, 0, 0);
+        Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(30).getBytes(), 6, 2, Map.of(), 0, 0, 0);
+        Deployment deployment3 = new Deployment("m_3", ByteSizeValue.ofMb(30).getBytes(), 2, 3, Map.of(), 0, 0, 0);
 
         Map<List<String>, List<Node>> nodesByZone = Map.of(
             List.of("z_1"),
@@ -168,11 +213,11 @@ public void testGivenThreeModels_TwoNodesPerZone_ThreeZones_FullyFit() {
     }
 
     public void testGivenTwoModelsWithSingleAllocation_OneNode_ThreeZones() {
-        Node node1 = new Node("n_1", 100, 4);
-        Node node2 = new Node("n_2", 100, 4);
-        Node node3 = new Node("n_3", 100, 4);
-        AssignmentPlan.Deployment deployment1 = new Deployment("m_1", 25, 1, 1, Map.of(), 0);
-        AssignmentPlan.Deployment deployment2 = new Deployment("m_2", 25, 1, 1, Map.of(), 0);
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Node node3 = new Node("n_3", ByteSizeValue.ofMb(1000).getBytes(), 4);
+        Deployment deployment1 = new Deployment("m_1", ByteSizeValue.ofMb(30).getBytes(), 1, 1, Map.of(), 0, 0, 0);
+        Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(30).getBytes(), 1, 1, Map.of(), 0, 0, 0);
 
         AssignmentPlan plan = new ZoneAwareAssignmentPlanner(
             Map.of(List.of("z1"), List.of(node1), List.of("z2"), List.of(node2), List.of("z3"), List.of(node3)),
@@ -203,7 +248,16 @@ public void testPreviousAssignmentsGetAtLeastAsManyAllocationsAfterAddingNewMode
                 .stream()
                 .collect(Collectors.toMap(e -> e.getKey().id(), Map.Entry::getValue));
             previousModelsPlusNew.add(
-                new AssignmentPlan.Deployment(m.id(), m.memoryBytes(), m.allocations(), m.threadsPerAllocation(), previousAssignments, 0)
+                new AssignmentPlan.Deployment(
+                    m.id(),
+                    m.memoryBytes(),
+                    m.allocations(),
+                    m.threadsPerAllocation(),
+                    previousAssignments,
+                    0,
+                    0,
+                    0
+                )
             );
         }
         previousModelsPlusNew.add(randomModel("new"));
@@ -214,11 +268,11 @@ public void testPreviousAssignmentsGetAtLeastAsManyAllocationsAfterAddingNewMode
     }
 
     public void testGivenClusterResize_GivenOneZone_ShouldAllocateEachModelAtLeastOnce() {
-        Node node1 = new Node("n_1", ByteSizeValue.ofMb(1200).getBytes(), 2);
-        Node node2 = new Node("n_2", ByteSizeValue.ofMb(1200).getBytes(), 2);
-        AssignmentPlan.Deployment deployment1 = new AssignmentPlan.Deployment("m_1", ByteSizeValue.ofMb(800).getBytes(), 2, 1, Map.of(), 0);
-        AssignmentPlan.Deployment deployment2 = new AssignmentPlan.Deployment("m_2", ByteSizeValue.ofMb(800).getBytes(), 1, 1, Map.of(), 0);
-        AssignmentPlan.Deployment deployment3 = new AssignmentPlan.Deployment("m_3", ByteSizeValue.ofMb(250).getBytes(), 4, 1, Map.of(), 0);
+        Node node1 = new Node("n_1", ByteSizeValue.ofMb(2580).getBytes(), 2);
+        Node node2 = new Node("n_2", ByteSizeValue.ofMb(2580).getBytes(), 2);
+        Deployment deployment1 = new Deployment("m_1", ByteSizeValue.ofMb(800).getBytes(), 2, 1, Map.of(), 0, 0, 0);
+        Deployment deployment2 = new Deployment("m_2", ByteSizeValue.ofMb(800).getBytes(), 1, 1, Map.of(), 0, 0, 0);
+        Deployment deployment3 = new Deployment("m_3", ByteSizeValue.ofMb(250).getBytes(), 4, 1, Map.of(), 0, 0, 0);
 
         // First only start m_1
         AssignmentPlan assignmentPlan = new ZoneAwareAssignmentPlanner(Map.of(List.of(), List.of(node1, node2)), List.of(deployment1))
@@ -252,8 +306,8 @@ public void testGivenClusterResize_GivenOneZone_ShouldAllocateEachModelAtLeastOn
         assertThat(indexedBasedPlan.get("m_3"), equalTo(Map.of("n_2", 1)));
 
         // Now the cluster starts getting resized.
-        Node node3 = new Node("n_3", ByteSizeValue.ofMb(2400).getBytes(), 2);
-        Node node4 = new Node("n_4", ByteSizeValue.ofMb(2400).getBytes(), 2);
+        Node node3 = new Node("n_3", ByteSizeValue.ofMb(5160).getBytes(), 2);
+        Node node4 = new Node("n_4", ByteSizeValue.ofMb(5160).getBytes(), 2);
 
         // First, one node goes away.
         assignmentPlan = new ZoneAwareAssignmentPlanner(Map.of(List.of(), List.of(node1)), createModelsFromPlan(assignmentPlan))
diff --git a/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/index/IndexResolver.java b/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/index/IndexResolver.java
index 0f887ef4fc105..291722f42ca94 100644
--- a/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/index/IndexResolver.java
+++ b/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/index/IndexResolver.java
@@ -651,8 +651,7 @@ public void resolveAsSeparateMappings(
     }
 
     private static GetAliasesRequest createGetAliasesRequest(FieldCapabilitiesResponse response, boolean includeFrozen) {
-        return new GetAliasesRequest().local(true)
-            .aliases("*")
+        return new GetAliasesRequest().aliases("*")
             .indices(response.getIndices())
             .indicesOptions(includeFrozen ? FIELD_CAPS_FROZEN_INDICES_OPTIONS : FIELD_CAPS_INDICES_OPTIONS);
     }
diff --git a/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/MlAssignmentPlannerUpgradeIT.java b/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/MlAssignmentPlannerUpgradeIT.java
new file mode 100644
index 0000000000000..549ac23e16845
--- /dev/null
+++ b/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/MlAssignmentPlannerUpgradeIT.java
@@ -0,0 +1,287 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.upgrades;
+
+import org.elasticsearch.Version;
+import org.elasticsearch.client.Request;
+import org.elasticsearch.client.Response;
+import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.xcontent.support.XContentMapValues;
+import org.elasticsearch.core.Strings;
+import org.elasticsearch.logging.LogManager;
+import org.elasticsearch.logging.Logger;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+import static org.elasticsearch.client.WarningsHandler.PERMISSIVE;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.hasSize;
+
+public class MlAssignmentPlannerUpgradeIT extends AbstractUpgradeTestCase {
+
+    private Logger logger = LogManager.getLogger(MlAssignmentPlannerUpgradeIT.class);
+
+    // See PyTorchModelIT for how this model was created
+    static final String BASE_64_ENCODED_MODEL =
+        "UEsDBAAACAgAAAAAAAAAAAAAAAAAAAAAAAAUAA4Ac2ltcGxlbW9kZWwvZGF0YS5wa2xGQgoAWlpaWlpaWlpaWoACY19fdG9yY2hfXwp"
+            + "TdXBlclNpbXBsZQpxACmBfShYCAAAAHRyYWluaW5ncQGIdWJxAi5QSwcIXOpBBDQAAAA0AAAAUEsDBBQACAgIAAAAAAAAAAAAAAAAAA"
+            + "AAAAAdAEEAc2ltcGxlbW9kZWwvY29kZS9fX3RvcmNoX18ucHlGQj0AWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaW"
+            + "lpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWnWOMWvDMBCF9/yKI5MMrnHTQsHgjt2aJdlCEIp9SgWSTpykFvfXV1htaYds0nfv473Jqhjh"
+            + "kAPywbhgUbzSnC02wwZAyqBYOUzIUUoY4XRe6SVr/Q8lVsYbf4UBLkS2kBk1aOIPxbOIaPVQtEQ8vUnZ/WlrSxTA+JCTNHMc4Ig+Ele"
+            + "s+Jod+iR3N/jDDf74wxu4e/5+DmtE9mUyhdgFNq7bZ3ekehbruC6aTxS/c1rom6Z698WrEfIYxcn4JGTftLA7tzCnJeD41IJVC+U07k"
+            + "umUHw3E47Vqh+xnULeFisYLx064mV8UTZibWFMmX0p23wBUEsHCE0EGH3yAAAAlwEAAFBLAwQUAAgICAAAAAAAAAAAAAAAAAAAAAAAJ"
+            + "wA5AHNpbXBsZW1vZGVsL2NvZGUvX190b3JjaF9fLnB5LmRlYnVnX3BrbEZCNQBaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpa"
+            + "WlpaWlpaWlpaWlpaWlpaWlpaWlpaWrWST0+DMBiHW6bOod/BGS94kKpo2Mwyox5x3pbgiXSAFtdR/nQu3IwHiZ9oX88CaeGu9tL0efq"
+            + "+v8P7fmiGA1wgTgoIcECZQqe6vmYD6G4hAJOcB1E8NazTm+ELyzY4C3Q0z8MsRwF+j4JlQUPEEo5wjH0WB9hCNFqgpOCExZY5QnnEw7"
+            + "ME+0v8GuaIs8wnKI7RigVrKkBzm0lh2OdjkeHllG28f066vK6SfEypF60S+vuYt4gjj2fYr/uPrSvRv356TepfJ9iWJRN0OaELQSZN3"
+            + "FRPNbcP1PTSntMr0x0HzLZQjPYIEo3UaFeiISRKH0Mil+BE/dyT1m7tCBLwVO1MX4DK3bbuTlXuy8r71j5Aoho66udAoseOnrdVzx28"
+            + "UFW6ROuO/lT6QKKyo79VU54emj9QSwcInsUTEDMBAAAFAwAAUEsDBAAACAgAAAAAAAAAAAAAAAAAAAAAAAAZAAYAc2ltcGxlbW9kZWw"
+            + "vY29uc3RhbnRzLnBrbEZCAgBaWoACKS5QSwcIbS8JVwQAAAAEAAAAUEsDBAAACAgAAAAAAAAAAAAAAAAAAAAAAAATADsAc2ltcGxlbW"
+            + "9kZWwvdmVyc2lvbkZCNwBaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaMwpQSwcI0"
+            + "Z5nVQIAAAACAAAAUEsBAgAAAAAICAAAAAAAAFzqQQQ0AAAANAAAABQAAAAAAAAAAAAAAAAAAAAAAHNpbXBsZW1vZGVsL2RhdGEucGts"
+            + "UEsBAgAAFAAICAgAAAAAAE0EGH3yAAAAlwEAAB0AAAAAAAAAAAAAAAAAhAAAAHNpbXBsZW1vZGVsL2NvZGUvX190b3JjaF9fLnB5UEs"
+            + "BAgAAFAAICAgAAAAAAJ7FExAzAQAABQMAACcAAAAAAAAAAAAAAAAAAgIAAHNpbXBsZW1vZGVsL2NvZGUvX190b3JjaF9fLnB5LmRlYn"
+            + "VnX3BrbFBLAQIAAAAACAgAAAAAAABtLwlXBAAAAAQAAAAZAAAAAAAAAAAAAAAAAMMDAABzaW1wbGVtb2RlbC9jb25zdGFudHMucGtsU"
+            + "EsBAgAAAAAICAAAAAAAANGeZ1UCAAAAAgAAABMAAAAAAAAAAAAAAAAAFAQAAHNpbXBsZW1vZGVsL3ZlcnNpb25QSwYGLAAAAAAAAAAe"
+            + "Ay0AAAAAAAAAAAAFAAAAAAAAAAUAAAAAAAAAagEAAAAAAACSBAAAAAAAAFBLBgcAAAAA/AUAAAAAAAABAAAAUEsFBgAAAAAFAAUAagE"
+            + "AAJIEAAAAAA==";
+    static final long RAW_MODEL_SIZE; // size of the model before base64 encoding
+    static {
+        RAW_MODEL_SIZE = Base64.getDecoder().decode(BASE_64_ENCODED_MODEL).length;
+    }
+
+    public void testMlAssignmentPlannerUpgrade() throws Exception {
+        assumeTrue("NLP model deployments added in 8.0", isOriginalClusterVersionAtLeast(Version.V_8_0_0));
+
+        logger.info("Starting testMlAssignmentPlannerUpgrade, model size {}", RAW_MODEL_SIZE);
+
+        switch (CLUSTER_TYPE) {
+            case OLD -> {
+                // setup deployments using old and new memory format
+                setupDeployments();
+
+                waitForDeploymentStarted("old_memory_format");
+                waitForDeploymentStarted("new_memory_format");
+
+                // assert correct memory format is used
+                assertOldMemoryFormat("old_memory_format");
+                if (isOriginalClusterVersionAtLeast(Version.V_8_11_0)) {
+                    assertNewMemoryFormat("new_memory_format");
+                } else {
+                    assertOldMemoryFormat("new_memory_format");
+                }
+            }
+            case MIXED -> {
+                ensureHealth(".ml-inference-*,.ml-config*", (request -> {
+                    request.addParameter("wait_for_status", "yellow");
+                    request.addParameter("timeout", "70s");
+                }));
+                waitForDeploymentStarted("old_memory_format");
+                waitForDeploymentStarted("new_memory_format");
+
+                // assert correct memory format is used
+                assertOldMemoryFormat("old_memory_format");
+                if (isOriginalClusterVersionAtLeast(Version.V_8_11_0)) {
+                    assertNewMemoryFormat("new_memory_format");
+                } else {
+                    assertOldMemoryFormat("new_memory_format");
+                }
+
+            }
+            case UPGRADED -> {
+                ensureHealth(".ml-inference-*,.ml-config*", (request -> {
+                    request.addParameter("wait_for_status", "yellow");
+                    request.addParameter("timeout", "70s");
+                }));
+                waitForDeploymentStarted("old_memory_format");
+                waitForDeploymentStarted("new_memory_format");
+
+                // assert correct memory format is used
+                assertOldMemoryFormat("old_memory_format");
+                assertNewMemoryFormat("new_memory_format");
+
+                cleanupDeployments();
+            }
+        }
+    }
+
+    @SuppressWarnings("unchecked")
+    private void waitForDeploymentStarted(String modelId) throws Exception {
+        assertBusy(() -> {
+            var response = getTrainedModelStats(modelId);
+            Map<String, Object> map = entityAsMap(response);
+            List<Map<String, Object>> stats = (List<Map<String, Object>>) map.get("trained_model_stats");
+            assertThat(stats, hasSize(1));
+            var stat = stats.get(0);
+            assertThat(stat.toString(), XContentMapValues.extractValue("deployment_stats.state", stat), equalTo("started"));
+        }, 30, TimeUnit.SECONDS);
+    }
+
+    @SuppressWarnings("unchecked")
+    private void assertOldMemoryFormat(String modelId) throws Exception {
+        var response = getTrainedModelStats(modelId);
+        Map<String, Object> map = entityAsMap(response);
+        List<Map<String, Object>> stats = (List<Map<String, Object>>) map.get("trained_model_stats");
+        assertThat(stats, hasSize(1));
+        var stat = stats.get(0);
+        Long expectedMemoryUsage = ByteSizeValue.ofMb(240).getBytes() + RAW_MODEL_SIZE * 2;
+        Integer actualMemoryUsage = (Integer) XContentMapValues.extractValue("model_size_stats.required_native_memory_bytes", stat);
+        assertThat(
+            Strings.format("Memory usage mismatch for the model %s in cluster state %s", modelId, CLUSTER_TYPE.toString()),
+            actualMemoryUsage,
+            equalTo(expectedMemoryUsage.intValue())
+        );
+    }
+
+    @SuppressWarnings("unchecked")
+    private void assertNewMemoryFormat(String modelId) throws Exception {
+        var response = getTrainedModelStats(modelId);
+        Map<String, Object> map = entityAsMap(response);
+        List<Map<String, Object>> stats = (List<Map<String, Object>>) map.get("trained_model_stats");
+        assertThat(stats, hasSize(1));
+        var stat = stats.get(0);
+        Long expectedMemoryUsage = ByteSizeValue.ofMb(300).getBytes() + RAW_MODEL_SIZE + ByteSizeValue.ofMb(10).getBytes();
+        Integer actualMemoryUsage = (Integer) XContentMapValues.extractValue("model_size_stats.required_native_memory_bytes", stat);
+        assertThat(stat.toString(), actualMemoryUsage.toString(), equalTo(expectedMemoryUsage.toString()));
+    }
+
+    private Response getTrainedModelStats(String modelId) throws IOException {
+        Request request = new Request("GET", "/_ml/trained_models/" + modelId + "/_stats");
+        request.setOptions(request.getOptions().toBuilder().setWarningsHandler(PERMISSIVE).build());
+        var response = client().performRequest(request);
+        assertOK(response);
+        return response;
+    }
+
+    private Response infer(String input, String modelId) throws IOException {
+        Request request = new Request("POST", "/_ml/trained_models/" + modelId + "/deployment/_infer");
+        request.setJsonEntity(Strings.format("""
+            {  "docs": [{"input":"%s"}] }
+            """, input));
+        request.setOptions(request.getOptions().toBuilder().setWarningsHandler(PERMISSIVE).build());
+        var response = client().performRequest(request);
+        assertOK(response);
+        return response;
+    }
+
+    private void putModelDefinition(String modelId) throws IOException {
+        Request request = new Request("PUT", "_ml/trained_models/" + modelId + "/definition/0");
+        request.setJsonEntity(Strings.format("""
+            {"total_definition_length":%s,"definition": "%s","total_parts": 1}""", RAW_MODEL_SIZE, BASE_64_ENCODED_MODEL));
+        client().performRequest(request);
+    }
+
+    private void putVocabulary(List<String> vocabulary, String modelId) throws IOException {
+        List<String> vocabularyWithPad = new ArrayList<>();
+        vocabularyWithPad.add("[PAD]");
+        vocabularyWithPad.add("[UNK]");
+        vocabularyWithPad.addAll(vocabulary);
+        String quotedWords = vocabularyWithPad.stream().map(s -> "\"" + s + "\"").collect(Collectors.joining(","));
+
+        Request request = new Request("PUT", "_ml/trained_models/" + modelId + "/vocabulary");
+        request.setJsonEntity(Strings.format("""
+            { "vocabulary": [%s] }
+            """, quotedWords));
+        client().performRequest(request);
+    }
+
+    private void setupDeployments() throws Exception {
+        createTrainedModel("old_memory_format", 0, 0);
+        putModelDefinition("old_memory_format");
+        putVocabulary(List.of("these", "are", "my", "words"), "old_memory_format");
+        startDeployment("old_memory_format");
+
+        createTrainedModel("new_memory_format", ByteSizeValue.ofMb(300).getBytes(), ByteSizeValue.ofMb(10).getBytes());
+        putModelDefinition("new_memory_format");
+        putVocabulary(List.of("these", "are", "my", "words"), "new_memory_format");
+        startDeployment("new_memory_format");
+    }
+
+    private void cleanupDeployments() throws IOException {
+        stopDeployment("old_memory_format");
+        deleteTrainedModel("old_memory_format");
+        stopDeployment("new_memory_format");
+        deleteTrainedModel("new_memory_format");
+    }
+
+    private void createTrainedModel(String modelId, long perDeploymentMemoryBytes, long perAllocationMemoryBytes) throws IOException {
+        Request request = new Request("PUT", "/_ml/trained_models/" + modelId);
+        if (perAllocationMemoryBytes > 0 && perDeploymentMemoryBytes > 0) {
+            request.setJsonEntity(Strings.format("""
+                {
+                   "description": "simple model for testing",
+                   "model_type": "pytorch",
+                   "inference_config": {
+                     "pass_through": {
+                       "tokenization": {
+                         "bert": {
+                           "with_special_tokens": false
+                         }
+                       }
+                     }
+                   },
+                   "metadata": {
+                     "per_deployment_memory_bytes": %s,
+                     "per_allocation_memory_bytes": %s
+                   }
+                 }""", perDeploymentMemoryBytes, perAllocationMemoryBytes));
+        } else {
+            request.setJsonEntity("""
+                {
+                   "description": "simple model for testing",
+                   "model_type": "pytorch",
+                   "inference_config": {
+                     "pass_through": {
+                       "tokenization": {
+                         "bert": {
+                           "with_special_tokens": false
+                         }
+                       }
+                     }
+                   }
+                 }""");
+        }
+        client().performRequest(request);
+    }
+
+    private void deleteTrainedModel(String modelId) throws IOException {
+        Request request = new Request("DELETE", "_ml/trained_models/" + modelId);
+        client().performRequest(request);
+    }
+
+    private Response startDeployment(String modelId) throws IOException {
+        return startDeployment(modelId, "started");
+    }
+
+    private Response startDeployment(String modelId, String waitForState) throws IOException {
+        Request request = new Request(
+            "POST",
+            "/_ml/trained_models/"
+                + modelId
+                + "/deployment/_start?timeout=40s&wait_for="
+                + waitForState
+                + "&inference_threads=1&model_threads=1"
+        );
+        request.setOptions(request.getOptions().toBuilder().setWarningsHandler(PERMISSIVE).build());
+        var response = client().performRequest(request);
+        assertOK(response);
+        return response;
+    }
+
+    private void stopDeployment(String modelId) throws IOException {
+        String endpoint = "/_ml/trained_models/" + modelId + "/deployment/_stop";
+        Request request = new Request("POST", endpoint);
+        client().performRequest(request);
+    }
+}