- Notifications
You must be signed in to change notification settings - Fork 63
perf: Automatically condense internal expression representation #516
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
10 commits Select commit Hold shift + click to select a range
0beb855 perf: Automatically squash internal projection nodes and use internal…
TrevorBergeron 3681b45 Merge remote-tracking branch 'github/main' into auto_merge_project
TrevorBergeron 38e36d0 Merge remote-tracking branch 'github/main' into auto_merge_project
TrevorBergeron 374b9d1 squash less aggressively
TrevorBergeron 542ae49 improve order baking
TrevorBergeron 8c899fa move rewrite to ArrayValue init
TrevorBergeron b49a816 fix condition in bake_ordering
TrevorBergeron 5cdc94a Merge remote-tracking branch 'github/main' into auto_merge_project
TrevorBergeron 63f0465 fix squash logic
TrevorBergeron 3e384c6 Merge branch 'main' into auto_merge_project
TrevorBergeron File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -35,16 +35,21 @@ class SquashedSelect: | |
| columns: Tuple[Tuple[scalar_exprs.Expression, str], ...] | ||
| predicate: Optional[scalar_exprs.Expression] | ||
| ordering: Tuple[order.OrderingExpression, ...] | ||
| reverse_root: bool = False | ||
| | ||
| @classmethod | ||
| def from_node(cls, node: nodes.BigFrameNode) -> SquashedSelect: | ||
| def from_node( | ||
| cls, node: nodes.BigFrameNode, projections_only: bool = False | ||
| ) -> SquashedSelect: | ||
| if isinstance(node, nodes.ProjectionNode): | ||
| return cls.from_node(node.child).project(node.assignments) | ||
| elif isinstance(node, nodes.FilterNode): | ||
| return cls.from_node(node.child, projections_only=projections_only).project( | ||
| node.assignments | ||
| ) | ||
| elif not projections_only and isinstance(node, nodes.FilterNode): | ||
| return cls.from_node(node.child).filter(node.predicate) | ||
| elif isinstance(node, nodes.ReversedNode): | ||
| elif not projections_only and isinstance(node, nodes.ReversedNode): | ||
| return cls.from_node(node.child).reverse() | ||
| elif isinstance(node, nodes.OrderByNode): | ||
| elif not projections_only and isinstance(node, nodes.OrderByNode): | ||
| return cls.from_node(node.child).order_with(node.by) | ||
| else: | ||
| selection = tuple( | ||
| | @@ -63,7 +68,9 @@ def project( | |
| new_columns = tuple( | ||
| (expr.bind_all_variables(self.column_lookup), id) for expr, id in projection | ||
| ) | ||
| return SquashedSelect(self.root, new_columns, self.predicate, self.ordering) | ||
| return SquashedSelect( | ||
| self.root, new_columns, self.predicate, self.ordering, self.reverse_root | ||
| ) | ||
| | ||
| def filter(self, predicate: scalar_exprs.Expression) -> SquashedSelect: | ||
| if self.predicate is None: | ||
| | @@ -72,18 +79,24 @@ def filter(self, predicate: scalar_exprs.Expression) -> SquashedSelect: | |
| new_predicate = ops.and_op.as_expr( | ||
| self.predicate, predicate.bind_all_variables(self.column_lookup) | ||
| ) | ||
| return SquashedSelect(self.root, self.columns, new_predicate, self.ordering) | ||
| return SquashedSelect( | ||
| self.root, self.columns, new_predicate, self.ordering, self.reverse_root | ||
| ) | ||
| | ||
| def reverse(self) -> SquashedSelect: | ||
| new_ordering = tuple(expr.with_reverse() for expr in self.ordering) | ||
| return SquashedSelect(self.root, self.columns, self.predicate, new_ordering) | ||
| return SquashedSelect( | ||
| self.root, self.columns, self.predicate, new_ordering, not self.reverse_root | ||
| ) | ||
| | ||
| def order_with(self, by: Tuple[order.OrderingExpression, ...]): | ||
| adjusted_orderings = [ | ||
| order_part.bind_variables(self.column_lookup) for order_part in by | ||
| ] | ||
| new_ordering = (*adjusted_orderings, *self.ordering) | ||
| return SquashedSelect(self.root, self.columns, self.predicate, new_ordering) | ||
| return SquashedSelect( | ||
| self.root, self.columns, self.predicate, new_ordering, self.reverse_root | ||
| ) | ||
| | ||
| def maybe_join( | ||
| self, right: SquashedSelect, join_def: join_defs.JoinDefinition | ||
| | @@ -126,8 +139,10 @@ def maybe_join( | |
| new_columns = remap_names(join_def, lselection, rselection) | ||
| | ||
| # Reconstruct ordering | ||
| reverse_root = self.reverse_root | ||
| if join_type == "right": | ||
| new_ordering = right.ordering | ||
| reverse_root = right.reverse_root | ||
| elif join_type == "outer": | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need to redefine the Contributor Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. outer join we treat as left order taking precedence, so it inherits reverse_root from the self. | ||
| if lmask is not None: | ||
| prefix = order.OrderingExpression(lmask, order.OrderingDirection.DESC) | ||
| | @@ -158,18 +173,31 @@ def maybe_join( | |
| new_ordering = self.ordering | ||
| else: | ||
| raise ValueError(f"Unexpected join type {join_type}") | ||
| return SquashedSelect(self.root, new_columns, new_predicate, new_ordering) | ||
| return SquashedSelect( | ||
| self.root, new_columns, new_predicate, new_ordering, reverse_root | ||
| ) | ||
| | ||
| def expand(self) -> nodes.BigFrameNode: | ||
| # Safest to apply predicates first, as it may filter out inputs that cannot be handled by other expressions | ||
| root = self.root | ||
| if self.reverse_root: | ||
| root = nodes.ReversedNode(child=root) | ||
| if self.predicate: | ||
| root = nodes.FilterNode(child=root, predicate=self.predicate) | ||
| if self.ordering: | ||
| root = nodes.OrderByNode(child=root, by=self.ordering) | ||
| return nodes.ProjectionNode(child=root, assignments=self.columns) | ||
| | ||
| | ||
| def maybe_squash_projection(node: nodes.BigFrameNode) -> nodes.BigFrameNode: | ||
| if isinstance(node, nodes.ProjectionNode) and isinstance( | ||
| node.child, nodes.ProjectionNode | ||
| ): | ||
| # Conservative approach, only squash consecutive projections, even though could also squash filters, reorderings | ||
| return SquashedSelect.from_node(node, projections_only=True).expand() | ||
| return node | ||
| | ||
| | ||
| def maybe_rewrite_join(join_node: nodes.JoinNode) -> nodes.BigFrameNode: | ||
| left_side = SquashedSelect.from_node(join_node.left_child) | ||
| right_side = SquashedSelect.from_node(join_node.right_child) | ||
| | ||
Add this suggestion to a batch that can be applied as a single commit. This suggestion is invalid because no changes were made to the code. Suggestions cannot be applied while the pull request is closed. Suggestions cannot be applied while viewing a subset of changes. Only one suggestion per line can be applied in a batch. Add this suggestion to a batch that can be applied as a single commit. Applying suggestions on deleted lines is not supported. You must change the existing code in this line in order to create a valid suggestion. Outdated suggestions cannot be applied. This suggestion has been applied or marked resolved. Suggestions cannot be applied from pending reviews. Suggestions cannot be applied on multi-line comments. Suggestions cannot be applied while the pull request is queued to merge. Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it the intention to omit the existing
_hidden_ordering_columnshere?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, we are rewriting the hidden columns here, so we are recreating the set of hidden columns and discarding the old set.