What language are you using?
Rust
Which feature gates did you use?
"polars-io", "parquet", "lazy", "dtype-struct"
Have you tried latest version of polars?
What version of polars are you using?
Latest, master
branch.
What operating system are you using polars on?
macOS Monterey 12.3.1
What language version are you using
$ rustc --version
rustc 1.64.0-nightly (495b21669 2022-07-03)
$ cargo --version
cargo 1.64.0-nightly (dbff32b27 2022-06-24)
Describe your bug.
Reading nested struct panics with OutOfSpec
error.
What are the steps to reproduce the behavior?
Given the attached parquet file with only 2 rows: nested_struct_OutOfSpec.snappy.parquet.zip
Running the following code:
let file_location = "nested_struct_OutOfSpec.snappy.parquet".to_string();
let df = LazyFrame::scan_parquet(
file_location,
ScanArgsParquet::default())
.unwrap()
.select([all()])
.collect()
.unwrap();
dbg!(df);
Results in this panic error:
thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: OutOfSpec("The children
DataTypes of a StructArray must equal the children data types.\n However, the
values 1 has a length of 11, which is different from values 0, 2.")',
/.../.cargo/git/checkouts/arrow2-945af624853845da/eeddfac/src/array/struct_/mod.rs:118:52
What is the actual behavior?
The result is a panic error with this output:
thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: OutOfSpec("The children
DataTypes of a StructArray must equal the children data types.\n However, the
values 1 has a length of 11, which is different from values 0, 2.")',
/.../.cargo/git/checkouts/arrow2-945af624853845da/eeddfac/src/array/struct_/mod.rs:118:52
stack backtrace:
0: rust_begin_unwind
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/std/src/panicking.rs:584:5
1: core::panicking::panic_fmt
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/panicking.rs:142:14
2: core::result::unwrap_failed
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/result.rs:1805:5
3: core::result::Result<T,E>::unwrap
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/result.rs:1098:23
4: arrow2::array::struct_::StructArray::new
at /.../.cargo/git/checkouts/arrow2-945af624853845da/eeddfac/src/array/struct_/mod.rs:118:9
5: arrow2::array::struct_::StructArray::from_data
at /.../.cargo/git/checkouts/arrow2-945af624853845da/eeddfac/src/array/struct_/mod.rs:127:9
6: <arrow2::io::parquet::read::deserialize::struct_::StructIterator as core::iter::traits::iterator::Iterator>::next
at /.../.cargo/git/checkouts/arrow2-945af624853845da/eeddfac/src/io/parquet/read/deserialize/struct_.rs:50:22
7: <alloc::boxed::Box<I,A> as core::iter::traits::iterator::Iterator>::next
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/alloc/src/boxed.rs:1868:9
8: <arrow2::io::parquet::read::deserialize::struct_::StructIterator as core::iter::traits::iterator::Iterator>::next::{{closure}}
at /.../.cargo/git/checkouts/arrow2-945af624853845da/eeddfac/src/io/parquet/read/deserialize/struct_.rs:26:25
9: core::iter::adapters::map::map_fold::{{closure}}
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/adapters/map.rs:84:28
10: core::iter::traits::iterator::Iterator::fold
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/traits/iterator.rs:2414:21
11: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::fold
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/adapters/map.rs:124:9
12: core::iter::traits::iterator::Iterator::for_each
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/traits/iterator.rs:831:9
13: <alloc::vec::Vec<T,A> as alloc::vec::spec_extend::SpecExtend<T,I>>::spec_extend
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/alloc/src/vec/spec_extend.rs:40:17
14: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter_nested::SpecFromIterNested<T,I>>::from_iter
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/alloc/src/vec/spec_from_iter_nested.rs:62:9
15: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter::SpecFromIter<T,I>>::from_iter
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/alloc/src/vec/spec_from_iter.rs:33:9
16: <alloc::vec::Vec<T> as core::iter::traits::collect::FromIterator<T>>::from_iter
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/alloc/src/vec/mod.rs:2648:9
17: core::iter::traits::iterator::Iterator::collect
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/traits/iterator.rs:1836:9
18: <arrow2::io::parquet::read::deserialize::struct_::StructIterator as core::iter::traits::iterator::Iterator>::next
at /.../.cargo/git/checkouts/arrow2-945af624853845da/eeddfac/src/io/parquet/read/deserialize/struct_.rs:23:22
19: <alloc::boxed::Box<I,A> as core::iter::traits::iterator::Iterator>::next
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/alloc/src/boxed.rs:1868:9
20: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::next
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/adapters/map.rs:103:9
21: <alloc::boxed::Box<I,A> as core::iter::traits::iterator::Iterator>::next
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/alloc/src/boxed.rs:1868:9
22: core::iter::traits::iterator::Iterator::try_fold
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/traits/iterator.rs:2237:29
23: <core::iter::adapters::GenericShunt<I,R> as core::iter::traits::iterator::Iterator>::try_fold
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/adapters/mod.rs:191:9
24: core::iter::traits::iterator::Iterator::try_for_each
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/adapters/mod.rs:174:9
25: <core::iter::adapters::GenericShunt<I,R> as core::iter::traits::iterator::Iterator>::next
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/adapters/mod.rs:174:9
26: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter_nested::SpecFromIterNested<T,I>>::from_iter
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/alloc/src/vec/spec_from_iter_nested.rs:26:32
27: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter::SpecFromIter<T,I>>::from_iter
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/alloc/src/vec/spec_from_iter.rs:33:9
28: <alloc::vec::Vec<T> as core::iter::traits::collect::FromIterator<T>>::from_iter
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/alloc/src/vec/mod.rs:2648:9
29: core::iter::traits::iterator::Iterator::collect
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/result.rs:2092:49
30: <core::result::Result<V,E> as core::iter::traits::collect::FromIterator<core::result::Result<A,E>>>::from_iter::{{closure}}
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/result.rs:2092:49
31: core::iter::adapters::try_process
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/adapters/mod.rs:160:17
32: <core::result::Result<V,E> as core::iter::traits::collect::FromIterator<core::result::Result<A,E>>>::from_iter
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/result.rs:2092:9
33: core::iter::traits::iterator::Iterator::collect
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/traits/iterator.rs:1836:9
34: polars_io::parquet::read_impl::array_iter_to_series
at /.../github/polars/polars/polars-io/src/parquet/read_impl.rs:47:17
35: polars_io::parquet::read_impl::column_idx_to_series
at /.../github/polars/polars/polars-io/src/parquet/read_impl.rs:36:9
36: polars_io::parquet::read_impl::rg_to_dfs::{{closure}}
at /.../github/polars/polars/polars-io/src/parquet/read_impl.rs:126:21
37: core::iter::adapters::map::map_try_fold::{{closure}}
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/adapters/map.rs:91:28
38: core::iter::traits::iterator::Iterator::try_fold
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/traits/iterator.rs:2238:21
39: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::try_fold
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/adapters/map.rs:117:9
40: <core::iter::adapters::GenericShunt<I,R> as core::iter::traits::iterator::Iterator>::try_fold
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/adapters/mod.rs:191:9
41: core::iter::traits::iterator::Iterator::try_for_each
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/adapters/mod.rs:174:9
42: <core::iter::adapters::GenericShunt<I,R> as core::iter::traits::iterator::Iterator>::next
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/adapters/mod.rs:174:9
43: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter_nested::SpecFromIterNested<T,I>>::from_iter
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/alloc/src/vec/spec_from_iter_nested.rs:26:32
44: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter::SpecFromIter<T,I>>::from_iter
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/alloc/src/vec/spec_from_iter.rs:33:9
45: <alloc::vec::Vec<T> as core::iter::traits::collect::FromIterator<T>>::from_iter
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/alloc/src/vec/mod.rs:2648:9
46: core::iter::traits::iterator::Iterator::collect
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/result.rs:2092:49
47: <core::result::Result<V,E> as core::iter::traits::collect::FromIterator<core::result::Result<A,E>>>::from_iter::{{closure}}
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/result.rs:2092:49
48: core::iter::adapters::try_process
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/adapters/mod.rs:160:17
49: <core::result::Result<V,E> as core::iter::traits::collect::FromIterator<core::result::Result<A,E>>>::from_iter
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/result.rs:2092:9
50: core::iter::traits::iterator::Iterator::collect
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/iter/traits/iterator.rs:1836:9
51: polars_io::parquet::read_impl::rg_to_dfs
at /.../github/polars/polars/polars-io/src/parquet/read_impl.rs:123:13
52: polars_io::parquet::read_impl::read_parquet
at /.../github/polars/polars/polars-io/src/parquet/read_impl.rs:249:63
53: polars_io::parquet::read::ParquetReader<R>::_finish_with_scan_ops
at /.../github/polars/polars/polars-io/src/parquet/read.rs:60:9
54: polars_lazy::physical_plan::executors::scan::parquet::ParquetExec::read
at /.../github/polars/polars/polars-lazy/src/physical_plan/executors/scan/parquet.rs:39:9
55: <polars_lazy::physical_plan::executors::scan::parquet::ParquetExec as polars_lazy::physical_plan::Executor>::execute::{{closure}}
at /.../github/polars/polars/polars-lazy/src/physical_plan/executors/scan/parquet.rs:61:68
56: polars_lazy::physical_plan::file_cache::FileCache::read
at /.../github/polars/polars/polars-lazy/src/physical_plan/file_cache.rs:40:13
57: <polars_lazy::physical_plan::executors::scan::parquet::ParquetExec as polars_lazy::physical_plan::Executor>::execute
at /.../github/polars/polars/polars-lazy/src/physical_plan/executors/scan/parquet.rs:59:9
58: <polars_lazy::physical_plan::executors::udf::UdfExec as polars_lazy::physical_plan::Executor>::execute
at /.../github/polars/polars/polars-lazy/src/physical_plan/executors/udf.rs:12:18
59: polars_lazy::frame::LazyFrame::collect
at /.../github/polars/polars/polars-lazy/src/frame/mod.rs:718:19
60: gyrfalcon::main
at ./src/main.rs:21:14
61: core::ops::function::FnOnce::call_once
at /rustc/7b46aa594c4bdc507fbd904b6777ca30c37a9209/library/core/src/ops/function.rs:248:5
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
What is the expected behavior?
The parquet file should have been correctly loaded.
The parquet-tools
util shows it property. Also, Apache Spark properly reads it and processes it.
bug