Skip to content

Commit

Permalink
perf(transformer): arrow function transform: reduce stack memory usage (
Browse files Browse the repository at this point in the history
#5940)

Arrow function transform maintains a stack for blocks which may (or may not) need a `var _this = this;` statement added to them.

This stack was `Vec<Option<BoundIdentifier>>` (24 bytes per block). Most blocks won't need a statement added, so most entries are `None`.

Introduce an abstraction `SparseStack`. This stores the stack split into 2 arrays. First array is `Vec<bool>` indicating if a statement needs to be added or not. Only if a statement *does* need to be added, then its details are pushed to a separate array `Vec<BoundIdentifier>`.

This means the memory taken up by the stack will be roughly 1 byte per block, instead of 24 bytes per block (assuming very few blocks need statements added).
  • Loading branch information
overlookmotel committed Sep 23, 2024
1 parent 97a2c41 commit 618e89e
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 20 deletions.
38 changes: 18 additions & 20 deletions crates/oxc_transformer/src/es2015/arrow_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,10 @@ use oxc_syntax::{
use oxc_traverse::{Ancestor, Traverse, TraverseCtx};
use serde::Deserialize;

use crate::{context::Ctx, helpers::bindings::BoundIdentifier};
use crate::{
context::Ctx,
helpers::{bindings::BoundIdentifier, stack::SparseStack},
};

#[derive(Debug, Default, Clone, Deserialize)]
pub struct ArrowFunctionsOptions {
Expand All @@ -97,17 +100,16 @@ pub struct ArrowFunctionsOptions {
pub struct ArrowFunctions<'a> {
ctx: Ctx<'a>,
_options: ArrowFunctionsOptions,
this_var_stack: std::vec::Vec<Option<BoundIdentifier<'a>>>,
this_var_stack: SparseStack<BoundIdentifier<'a>>,
}

impl<'a> ArrowFunctions<'a> {
pub fn new(options: ArrowFunctionsOptions, ctx: Ctx<'a>) -> Self {
Self {
ctx,
_options: options,
// Initial entry for `Program` scope
this_var_stack: vec![None],
}
// Init stack with empty entry for `Program` (instead of pushing entry in `enter_program`)
let mut this_var_stack = SparseStack::new();
this_var_stack.push(None);

Self { ctx, _options: options, this_var_stack }
}
}

Expand All @@ -118,10 +120,10 @@ impl<'a> Traverse<'a> for ArrowFunctions<'a> {
/// Insert `var _this = this;` for the global scope.
fn exit_program(&mut self, program: &mut Program<'a>, _ctx: &mut TraverseCtx<'a>) {
assert!(self.this_var_stack.len() == 1);
let this_var = self.this_var_stack.pop().unwrap();
if let Some(this_var) = this_var {
if let Some(this_var) = self.this_var_stack.take() {
self.insert_this_var_statement_at_the_top_of_statements(&mut program.body, &this_var);
}
debug_assert!(self.this_var_stack.len() == 1);
}

fn enter_function(&mut self, _func: &mut Function<'a>, _ctx: &mut TraverseCtx<'a>) {
Expand All @@ -140,8 +142,7 @@ impl<'a> Traverse<'a> for ArrowFunctions<'a> {
/// ```
/// Insert the var _this = this; statement outside the arrow function
fn exit_function(&mut self, func: &mut Function<'a>, _ctx: &mut TraverseCtx<'a>) {
let this_var = self.this_var_stack.pop().unwrap();
if let Some(this_var) = this_var {
if let Some(this_var) = self.this_var_stack.pop() {
let Some(body) = &mut func.body else { unreachable!() };

self.insert_this_var_statement_at_the_top_of_statements(
Expand All @@ -156,8 +157,7 @@ impl<'a> Traverse<'a> for ArrowFunctions<'a> {
}

fn exit_static_block(&mut self, block: &mut StaticBlock<'a>, _ctx: &mut TraverseCtx<'a>) {
let this_var = self.this_var_stack.pop().unwrap();
if let Some(this_var) = this_var {
if let Some(this_var) = self.this_var_stack.pop() {
self.insert_this_var_statement_at_the_top_of_statements(&mut block.body, &this_var);
}
}
Expand Down Expand Up @@ -221,8 +221,7 @@ impl<'a> ArrowFunctions<'a> {
// `this` can be in scope at a time. We could create a single `_this` UID and reuse it in each
// scope. But this does not match output for some of Babel's test cases.
// <https://github.com/oxc-project/oxc/pull/5840>
let this_var = self.this_var_stack.last_mut().unwrap();
if this_var.is_none() {
let this_var = self.this_var_stack.get_or_init(|| {
let target_scope_id = ctx
.scopes()
.ancestors(arrow_scope_id)
Expand All @@ -236,14 +235,13 @@ impl<'a> ArrowFunctions<'a> {
})
.unwrap();

this_var.replace(BoundIdentifier::new_uid(
BoundIdentifier::new_uid(
"this",
target_scope_id,
SymbolFlags::FunctionScopedVariable,
ctx,
));
}
let this_var = this_var.as_ref().unwrap();
)
});
Some(this_var.create_spanned_read_reference(span, ctx))
}

Expand Down
115 changes: 115 additions & 0 deletions crates/oxc_transformer/src/helpers/stack.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/// Stack which is sparsely filled.
///
/// Functionally equivalent to a stack implemented as `Vec<Option<T>>`, but more memory-efficient
/// in cases where majority of entries in the stack will be empty (`None`).
///
/// The stack is stored as 2 arrays:
/// 1. `has_values` - Records whether an entry on the stack has a value or not (`Some` or `None`).
/// 2. `values` - Where the stack entry *does* have a value, it's stored in this array.
///
/// Memory is only consumed for values where values exist.
///
/// Where value (`T`) is large, and most entries have no value, this will be a significant memory saving.
///
/// e.g. if `T` is 24 bytes, and 90% of stack entries have no values:
/// * `Vec<Option<T>>` is 24 bytes per entry (or 32 bytes if `T` has no niche).
/// * `SparseStack<T>` is 4 bytes per entry.
///
/// When the stack grows and reallocates, `SparseStack` has less memory to copy, which is a performance
/// win too.
pub struct SparseStack<T> {
has_values: Vec<bool>,
values: Vec<T>,
}

impl<T> SparseStack<T> {
/// Create new `SparseStack`.
pub fn new() -> Self {
Self { has_values: vec![], values: vec![] }
}

/// Push an entry to the stack.
#[inline]
pub fn push(&mut self, value: Option<T>) {
let has_value = if let Some(value) = value {
self.values.push(value);
true
} else {
false
};
self.has_values.push(has_value);
}

/// Pop last entry from the stack.
///
/// # Panics
/// Panics if the stack is empty.
pub fn pop(&mut self) -> Option<T> {
let has_value = self.has_values.pop().unwrap();
if has_value {
debug_assert!(!self.values.is_empty());
// SAFETY: Last `self.has_values` is only `true` if there's a corresponding value in `self.values`.
// This invariant is maintained in `push`, `take`, and `get_or_init`.
// We maintain it here too because we just popped from `self.has_values`, so that `true`
// has been consumed at the same time we consume its corresponding value from `self.values`.
let value = unsafe { self.values.pop().unwrap_unchecked() };
Some(value)
} else {
None
}
}

/// Take value from last entry on the stack.
///
/// # Panics
/// Panics if the stack is empty.
pub fn take(&mut self) -> Option<T> {
let has_value = self.has_values.last_mut().unwrap();
if *has_value {
*has_value = false;

debug_assert!(!self.values.is_empty());
// SAFETY: Last `self.has_values` is only `true` if there's a corresponding value in `self.values`.
// This invariant is maintained in `push`, `pop`, and `get_or_init`.
// We maintain it here too because we just set last `self.has_values` to `false`
// at the same time as we consume the corresponding value from `self.values`.
let value = unsafe { self.values.pop().unwrap_unchecked() };
Some(value)
} else {
None
}
}

/// Initialize the value for top entry on the stack, if it has no value already.
/// Return reference to value.
///
/// # Panics
/// Panics if the stack is empty.
pub fn get_or_init<I: FnOnce() -> T>(&mut self, init: I) -> &T {
let has_value = self.has_values.last_mut().unwrap();
if !*has_value {
*has_value = true;
self.values.push(init());
}

debug_assert!(!self.values.is_empty());
// SAFETY: Last `self.has_values` is only `true` if there's a corresponding value in `self.values`.
// This invariant is maintained in `push`, `pop`, and `take`.
// Here either last `self.has_values` was already `true`, or it's just been set to `true`
// and a value pushed to `self.values` above.
unsafe { self.values.last().unwrap_unchecked() }
}

/// Get number of entries on the stack.
#[inline]
pub fn len(&self) -> usize {
self.has_values.len()
}

/// Returns `true` if stack is empty.
#[inline]
#[expect(dead_code)]
pub fn is_empty(&self) -> bool {
self.has_values.is_empty()
}
}
1 change: 1 addition & 0 deletions crates/oxc_transformer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ mod typescript;
mod helpers {
pub mod bindings;
pub mod module_imports;
pub mod stack;
}

use std::{path::Path, rc::Rc};
Expand Down

0 comments on commit 618e89e

Please sign in to comment.