Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
sungwy committed Aug 14, 2024
1 parent 821ab9c commit 76b4822
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1464,8 +1464,10 @@ def project_batches(self, tasks: Iterable[FileScanTask]) -> Iterator[pa.RecordBa
def _project_batches_from_scan_tasks_and_deletes(
self, tasks: Iterable[FileScanTask], deletes_per_file: Dict[str, List[ChunkedArray]]
) -> Iterator[pa.RecordBatch]:
limit = self._limit
total_row_count = 0
for task in tasks:
if self._limit is not None and total_row_count >= self._limit:
break
batches = _task_to_record_batches(
self._fs,
task,
Expand All @@ -1478,12 +1480,13 @@ def _project_batches_from_scan_tasks_and_deletes(
self._use_large_types,
)
for batch in batches:
if limit is not None:
if len(batch) >= limit:
yield batch.slice(0, limit)
if self._limit is not None:
if total_row_count >= self._limit:
break
limit -= len(batch)
elif total_row_count + len(batch) >= self._limit:
batch = batch.slice(0, self._limit - total_row_count)
yield batch
total_row_count += len(batch)


@deprecated(
Expand Down

0 comments on commit 76b4822

Please sign in to comment.