o
    _~#g'                     @   s  d Z ddlZddlmZ ddlmZ ddlZddlZddlZ	ddl
Z
ddlmZ ddlmZ ddlmZ ddlmZmZmZ ddlZddlmZ dd	lmZ dd
lmZmZmZm Z m!Z! zddl"Z"dZ#W n e$yo   dZ#Y nw zddl%Z%dZ&W n e$y   dZ&Y nw e
j'(de
j'(dgZ)e
j*e
j+de
j'j,e& peddddkddde
j+de
j'j,e# dddgddd Z-e
j*dd Z.e
j*dd Z/e
j*dd  Z0e
j*d!d" Z1e
j*d#d$ Z2e
j*ej3ej4j5ej3ej4j6ej3ej4j7ej8d%d&ej8d'd&ej8d(d&ej8d)d&gdd*d+ Z9									,dKd-d.Z:d/d0 Z;d1d2 Z<d3d4 Z=d5d6 Z>d7d8 Z?d9d: Z@d;d< ZAd=d> ZBd?d@ ZCdAdB ZDG dCdD dDZEG dEdF dFeEZFG dGdH dHeEZGG dIdJ dJeEZHdS )Lz test parquet compat     N)Decimal)BytesIO)using_copy_on_write)_get_option)is_platform_windows)pa_version_under11p0pa_version_under13p0pa_version_under15p0)Version)FastParquetImplPyArrowImpl
get_engineread_parquet
to_parquetTFz2ignore:DataFrame._data is deprecated:FutureWarningz=ignore:Passing a BlockManager to DataFrame:DeprecationWarningfastparquetmode.data_managersilentarrayz4fastparquet is not installed or ArrayManager is usedreason)markspyarrowpyarrow is not installed)paramsc                 C      | j S Nparamrequest r!   /var/www/static.ux5.de/https/Moving-Object-Detection-with-OpenCV/env/lib/python3.10/site-packages/pandas/tests/io/test_parquet.pyengine;   s   r#   c                   C   s   t std dS )Nr   r   )_HAVE_PYARROWpytestskipr!   r!   r!   r"   paQ   s   
r'   c                   C   s0   t s	td dS tddddkrtd dS )Nzfastparquet is not installedr   Tr   r   z.ArrayManager is not supported with fastparquetr   )_HAVE_FASTPARQUETr%   r&   r   r!   r!   r!   r"   fpX   s   

r)   c                   C   s   t g dddS )N         fooAB)pd	DataFramer!   r!   r!   r"   	df_compata   s   r4   c               
   C   sB   t tdttddtjddddg dt jd	d
dd} | S )Nabcr+            @      @float64dtypeTFT20130101r-   periods)abdef)r2   r3   listrangenparange
date_range)dfr!   r!   r"   df_cross_compatf   s   rK   c                   C   s   t tddtjdgg dg dg dttddtd	d
dtjdddddtjdgg dt jdd	dt 	dt j
t 	dgdS )Nr5   r@   cr@   NrL   )   foo   bars   bazr.   barbazr+   r6   r-      u1r7   r8   r9   r:          @      @r<   r=   r>   20130103)stringstring_with_nanstring_with_nonebytesunicodeintuintfloatfloat_with_nanbooldatetimedatetime_with_nat)r2   r3   rE   rG   nanrF   rH   astyperI   	TimestampNaTr!   r!   r!   r"   df_fullx   s$   

rh   z2019-01-04T16:41:24+0200z%Y-%m-%dT%H:%M:%S%zz2019-01-04T16:41:24+0215z2019-01-04T16:41:24-0200z2019-01-04T16:41:24-0215c                 C   r   r   r   r   r!   r!   r"   timezone_aware_date_list   s   ri   r,   c
                    s   pddip	i du r|r|d< |d<  fdd}
du rFt  |
|	 W d   dS 1 s?w   Y  dS |
|	 dS )a  Verify parquet serializer and deserializer produce the same results.

    Performs a pandas to disk and disk to pandas round trip,
    then compares the 2 resulting DataFrames to verify equality.

    Parameters
    ----------
    df: Dataframe
    engine: str, optional
        'pyarrow' or 'fastparquet'
    path: str, optional
    write_kwargs: dict of str:str, optional
    read_kwargs: dict of str:str, optional
    expected: DataFrame, optional
        Expected deserialization result, otherwise will be equal to `df`
    check_names: list of str, optional
        Closed set of column names to be compared
    check_like: bool, optional
        If True, ignore the order of index & columns.
    repeat: int, optional
        How many times to repeat the test
    compressionNr#   c                    sZ   t | D ]&}jfi  tfi }dv r d jd< tj| d qd S )NrY   r+   rY   )check_names
check_likecheck_dtype)rF   r   r   loctmassert_frame_equal)repeat_actualrn   rm   rl   rJ   expectedpathread_kwargswrite_kwargsr!   r"   compare   s   
z!check_round_trip.<locals>.compare)rp   ensure_clean)rJ   r#   rw   ry   rx   rv   rl   rm   rn   rr   rz   r!   ru   r"   check_round_trip   s   "

"r|   c                 C   s0   ddl m} |j| dd}|jjj|ksJ dS )zCheck partitions of a parquet file are as expected.

    Parameters
    ----------
    path: str
        Path of the dataset.
    expected: iterable of str
        Expected partition names.
    r   Nhive)partitioning)pyarrow.datasetdatasetr~   schemanames)rw   rv   dsr   r!   r!   r"   check_partition_names   s   
r   c                 C   sD   d}t jt|d t| dd W d    d S 1 sw   Y  d S )Nz.engine must be one of 'pyarrow', 'fastparquet'matchr.   rQ   )r%   raises
ValueErrorr|   )r4   msgr!   r!   r"   test_invalid_engine   s   "r   c                 C   :   t dd t|  W d    d S 1 sw   Y  d S )Nio.parquet.enginer   r2   option_contextr|   )r4   r'   r!   r!   r"   test_options_py      
"r   c                 C   r   )Nr   r   r   )r4   r)   r!   r!   r"   test_options_fp   r   r   c                 C   r   )Nr   autor   )r4   r)   r'   r!   r!   r"   test_options_auto  r   r   c                 C   sP  t tdts	J t tdtsJ tdd# t tdts"J t tdts+J t tdts4J W d    n1 s>w   Y  tdd# t tdtsSJ t tdts\J t tdtseJ W d    n1 sow   Y  tdd$ t tdtsJ t tdtsJ t tdtsJ W d    d S 1 sw   Y  d S )Nr   r   r   r   )
isinstancer   r   r   r2   r   )r)   r'   r!   r!   r"   test_options_get_engine  s"   "r   c                  C   s  ddl m}  | d}| d}tsdnttjt|k }ts!dnttjt|k }to.| }to3| }|s|s|r\d| d}t	j
t|d td	 W d    n1 sVw   Y  nd
}t	j
t|d td	 W d    n1 stw   Y  |rd| d}t	j
t|d td	 W d    d S 1 sw   Y  d S d}t	j
t|d td	 W d    d S 1 sw   Y  d S d S d S )Nr   )VERSIONSr   r   FzPandas requires version .z. or newer of .pyarrow.r   r   z%Missing optional dependency .pyarrow.z. or newer of .fastparquet.z)Missing optional dependency .fastparquet.)pandas.compat._optionalr   getr$   r
   r   __version__r(   r   r%   r   ImportErrorr   )r   
pa_min_ver
fp_min_verhave_pa_bad_versionhave_fp_bad_versionhave_usable_pahave_usable_fpr   r!   r!   r"   "test_get_engine_auto_error_message   sD   






"
"r   c                 C   s   | }t  0}|j||d d t||d}t || t||ddgd}t ||ddg  W d    d S 1 s9w   Y  d S N)r#   rj   r#   r@   rB   )r#   columnsrp   r{   r   r   rq   rK   r'   r)   rJ   rw   resultr!   r!   r"   test_cross_engine_pa_fpK  s   
"r   c                 C   s   | }t  0}|j||d d t||d}t || t||ddgd}t ||ddg  W d    d S 1 s9w   Y  d S r   r   r   r!   r!   r"   test_cross_engine_fp_paY  s   
"r   c              	   C   s   t dg di}d}t 0}tjt|ddd |||  W d    n1 s*w   Y  W d    d S W d    d S 1 sBw   Y  d S )Nr@   r*   zqStarting with pandas version 3.0 all arguments of to_parquet except for the argument 'path' will be keyword-only.F)r   check_stacklevelraise_on_extra_warnings)r2   r3   rp   r{   assert_produces_warningFutureWarningr   )r#   rJ   r   rw   r!   r!   r"   !test_parquet_pos_args_deprecationf  s   
"r   c                   @   s4   e Zd Zdd Zdd Zejjejjdd Z	dS )Basec              	   C   s|   t  0}tj||d t|||d d W d    n1 sw   Y  W d    d S W d    d S 1 s7w   Y  d S )Nr   rj   )rp   r{   r%   r   r   )selfrJ   r#   excerr_msgrw   r!   r!   r"   check_error_on_writex  s   
"zBase.check_error_on_writec              	   C   sx   t  .}t | t|||d d W d    n1 sw   Y  W d    d S W d    d S 1 s5w   Y  d S )Nr   )rp   r{   external_error_raisedr   )r   rJ   r#   r   rw   r!   r!   r"   check_external_error_on_write~  s   
"z"Base.check_external_error_on_writec                 C   sr   |dkr	t | t|dddddd}|j| d t|j}W d    n1 s,w   Y  t|| d S )	Nr   iodataparquetzsimple.parquetrb)mode)content)	r%   importorskipopenserve_contentreadr   urlrp   rq   )r   
httpserverdatapathr4   r#   rD   rJ   r!   r!   r"   test_parquet_read_from_url  s   
zBase.test_parquet_read_from_urlN)
__name__
__module____qualname__r   r   r%   marknetwork
single_cpur   r!   r!   r!   r"   r   w  s    r   c                   @   s   e Zd Zdd Zdd Zejdg ddd Zd	d
 Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zejd!g d"d#d$ Zd%S )&	TestBasicc                 C   sF   t g dddt dtg dfD ]}d}| ||t| qd S )Nr*   r+   r.   r=   z+to_parquet only supports IO with DataFrames)r2   Seriesrf   rG   r   r   r   )r   r#   objr   r!   r!   r"   
test_error  s   zTestBasic.test_errorc                 C   s6   t tdttddd}ddg|_t|| d S )Nr5   r+   r6   rX   r]   r.   rQ   )r2   r3   rE   rF   r   r|   )r   r#   rJ   r!   r!   r"   test_columns_dtypes  s   
zTestBasic.test_columns_dtypesrj   )Ngzipsnappybrotlic                 C   s(   t dg di}t||d|id d S )Nr0   r*   rj   ry   r2   r3   r|   )r   r#   rj   rJ   r!   r!   r"   test_compression  s   zTestBasic.test_compressionc                 C   sJ   t tdttddd}t dtdi}t|||ddgid d S )Nr5   r+   r6   r   rX   r   rv   rx   r2   r3   rE   rF   r|   )r   r#   rJ   rv   r!   r!   r"   test_read_columns  s
   
zTestBasic.test_read_columnsc              	   C   sX   t ttdtdd}t dddgi}t||||ddgid	gdgd
dd d S )Nr6   aabb)r]   partr]   r   r+   partition_colsr   )r   ==r@   )filtersr   )rw   rv   ry   rx   rr   r   )r   r#   tmp_pathrJ   rv   r!   r!   r"   test_read_filters  s   

zTestBasic.test_read_filtersc                 C   s   |dk}t dg di}t|| g dt jdddtdg d	g}|D ]}||_t|t jr7|jd |_t|||d
 q%g d|_d|j_	t|| d S )Nr   r0   r*   )r,   r-   r6   r=   r-   r>   r5   )r+   r-   r6   )rl   )r   r+   r,   r.   )
r2   r3   r|   rI   rE   indexr   DatetimeIndex
_with_freqname)r   r#   rl   rJ   indexesr   r!   r!   r"   test_write_index  s    

zTestBasic.test_write_indexc                 C   s:   |}t dg di}t jg d}||_t|| d S )Nr0   r*   )r@   r+   )r@   r,   )rA   r+   )r2   r3   
MultiIndexfrom_tuplesr   r|   )r   r'   r#   rJ   r   r!   r!   r"   test_write_multiindex  s
   zTestBasic.test_write_multiindexc                 C   s   |}t jdddd}t jtjddt| dftdd}t j	j
d	d
g|gddgd}|jd d}||fD ]}||_t|| t||dddgi|ddg d q8d S )Nz01-Jan-2018z01-Dec-2018MS)freqr,   r-   ABCr   Level1Level2leveldate)r   r   r0   r1   rx   rv   )r2   rI   r3   rG   randomdefault_rngstandard_normallenrE   r   from_productcopyr   r|   )r   r'   r#   datesrJ   index1index2r   r!   r!   r"   test_multiindex_with_columns  s"   
z&TestBasic.test_multiindex_with_columnsc                 C   s   t g dg dd}d dd}|jdd}t||||d t jg dg ddg d	d
}t||||d g dg dg}t jttddd tdD d|d
}|jdd}t||||d d S )Nr*   )qrs)r@   rA   F)rj   r   T)dropry   rv   )zyxwvutsrr   rQ   rQ   rR   rR   r.   r.   quxr  onetwor
  r  r
  r  r
  r     c                 S   s   g | ]}| qS r!   r!   ).0ir!   r!   r"   
<listcomp>  s    z7TestBasic.test_write_ignoring_index.<locals>.<listcomp>)r
  r  )r2   r3   reset_indexr|   rE   rF   )r   r#   rJ   ry   rv   arraysr!   r!   r"   test_write_ignoring_index  s    
 z#TestBasic.test_write_ignoring_indexc                 C   sb   t jg d}t jtjdd|d}|dkr$| ||t	d d S |dkr/t
|| d S d S )Nr   r,   )r6   r-   r   r   Column name must be a stringr   )r2   r   r   r3   rG   r   r   r   r   	TypeErrorr|   )r   r#   
mi_columnsrJ   r!   r!   r"   test_write_column_multiindex  s   z&TestBasic.test_write_column_multiindexc                 C   sn   g dg dg}t jtjdd|d}ddg|j_|dkr*| ||t	d	 d S |d
kr5t
|| d S d S )Nr  )r+   r,   r+   r,   r+   r,   r+   r,   r,   r  r  r   r   r   r   zColumn namer   )r2   r3   rG   r   r   r   r   r   r   r   r|   r   r#   r  rJ   r!   r!   r"   &test_write_column_multiindex_nonstring%  s   z0TestBasic.test_write_column_multiindex_nonstringc                 C   sJ   |}g dg dg}t jtjdd|d}ddg|j_t|| d S )Nr  r	  r,   r  r   	ColLevel1	ColLevel2)	r2   r3   rG   r   r   r   r   r   r|   r   r'   r#   r  rJ   r!   r!   r"   #test_write_column_multiindex_string6  s   z-TestBasic.test_write_column_multiindex_stringc                 C   s>   |}g d}t jtjdd|d}d|j_t|| d S )N)rQ   rR   r.   r  r,   r  r6   r   	StringCol)	r2   r3   rG   r   r   r   r   r   r|   r  r!   r!   r"   test_write_column_index_stringG  s   z(TestBasic.test_write_column_index_stringc                 C   sV   g d}t jtjdd|d}d|j_|dkr$| ||t	d d S t
|| d S )Nr+   r,   r-   r6   r,   r  r   NonStringColr   r  )r2   r3   rG   r   r   r   r   r   r   r  r|   r  r!   r!   r"   !test_write_column_index_nonstringU  s   z+TestBasic.test_write_column_index_nonstringc           
      C   s  t d}|dkrt jjdd}|| ttg ddtg ddtg dtg d	tg d
dtg ddtg ddd}t	 }|
|| t||d}t||dd}W d    n1 slw   Y  |d jtdks}J ttjg dddtjg dddtjg dddtjg d	ddtjg d
ddtjg dddtjg dddd}	|dkr|jddd}|	jddd}	t||	 d S )Nzpyarrow.parquetr   z.Fastparquet nullable dtype support is disabledr   r+   r,   r-   Nint64uint8)r@   rA   rL   N)TFTNr!  )      ?rU   rV   Nfloat32r9   )r@   rA   rL   rB   rC   rD   gr   numpy_nullabler#   dtype_backendr@   Int64r:   UInt8rX   booleanFloat32Float64rL   r+   )axis)r%   r   r   xfailapplymarkerr   tabler   rp   r{   write_tabler   r;   rG   r2   r3   r  rq   )
r   r#   r    pqr   r5  rw   result1result2rv   r!   r!   r"   test_dtype_backende  sH   


zTestBasic.test_dtype_backendr;   )	r-  r.  r/  objectzdatetime64[ns, UTC]r_   z	period[D]r1  rX   c                 C   sT   t dt jg |di}d }|dkrt dt jg ddi}t||ddi|d d S )Nvaluer:   r_   r1  r,  r*  r   )r2   r3   r   r|   )r   r'   r;   rJ   rv   r!   r!   r"   test_read_empty_array  s   
zTestBasic.test_read_empty_arrayN)r   r   r   r   r   r%   r   parametrizer   r   r   r   r   r   r  r  r  r  r   r#  r:  r=  r!   r!   r!   r"   r     s,    
		 1r   c                   @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	e
jje dde
jje dde
jdeejgdd Zdd Ze
jjdd Ze
jjdd Ze
jje
jddgg gdd Zdd  Zd!d" Zd#d$ Zd%d& Ze
jjded'd( gd)d*gd+d,d- Zd.d/ Zd0d1 Z d2d3 Z!d4d5 Z"d6d7 Z#d8d9 Z$d:d; Z%d<d= Z&d>d? Z'd@dA Z(dBdC Z)dDdE Z*dFdG Z+dHdI Z,dJdK Z-e
jje.dLddMdN Z/dOdP Z0dQS )RTestParquetPyArrowc                 C   s@   |}t jdddd}|d }||d< g d|d< t|| d S )Nr=   r-   Europe/Brusselsr?   tzdatetime_tzTNTbool_with_none)r2   rI   r   r|   )r   r'   rh   rJ   dtir!   r!   r"   
test_basic  s   
zTestParquetPyArrow.test_basicc                 C   s<   |}t jdddd|d< t|||ddg dddgid	 d S )
Nr=   r-   r@  rA  rC  rX   r]   r   r   )r2   rI   r|   )r   r'   rh   rJ   r!   r!   r"   test_basic_subset_columns  s   


z,TestParquetPyArrow.test_basic_subset_columnsc                 C   sL   |j |d}t|tsJ t|}t|}| }d |jd< t|| d S )Nr   rk   )	r   r   r[   r   r   r   ro   rp   rq   )r   r'   rh   	buf_bytes
buf_streamresrv   r!   r!   r"   *test_to_bytes_without_path_or_buf_provided  s   
z=TestParquetPyArrow.test_to_bytes_without_path_or_buf_providedc                 C   s8   t jtdddtdd }| ||td d S )N   r6   r-   aaar   zDuplicate column names found	r2   r3   rG   rH   reshaperE   r   r   r   r   r'   rJ   r!   r!   r"   test_duplicate_columns  s   $z)TestParquetPyArrow.test_duplicate_columnsc                 C   s&   t dt jdddi}t|| d S )Nr@   1 dayr-   r>   )r2   r3   timedelta_ranger|   rQ  r!   r!   r"   test_timedelta     z!TestParquetPyArrow.test_timedeltac                 C   s&   t dg di}| ||tj d S )Nr@   r@   r+   rU   )r2   r3   r   r   ArrowExceptionrQ  r!   r!   r"   test_unsupported  s   z#TestParquetPyArrow.test_unsupportedc                 C   sH   t jddt jd}tj|dgd}tr| ||tj d S t	|| d S )Nr,   
   r:   fp16r   r   )
rG   rH   float16r2   r3   r	   r   r   rX  r|   )r   r'   r   rJ   r!   r!   r"   test_unsupported_float16  s
   z+TestParquetPyArrow.test_unsupported_float16zqPyArrow does not cleanup of partial files dumps when unsupported dtypes are passed to_parquet function in windowsr   zfloat16 works on 15	path_typec              	   C   s   t jddt jd}tj|dgd}t 2}||}ttj	 |j
||d W d    n1 s2w   Y  tj|r?J W d    d S 1 sJw   Y  d S )Nr,   rZ  r:   r[  r\  )rw   r#   )rG   rH   r]  r2   r3   rp   r{   r   r   rX  r   osrw   isfile)r   r'   r_  r   rJ   path_strrw   r!   r!   r"    test_unsupported_float16_cleanup  s   
"z3TestParquetPyArrow.test_unsupported_float16_cleanupc                 C   sd   t  }t td|d< t jg dt g dd|d< t jg dg dd	d
|d< t|| d S )Nabcdefr@   )rQ   r.   r.   rQ   NrQ   rP   r:   rA   )r@   rA   rL   r@   rL   rA   )rA   rL   rB   T)
categoriesorderedrL   )r2   r3   CategoricalrE   CategoricalDtyper|   rQ  r!   r!   r"   test_categorical  s   

z#TestParquetPyArrow.test_categoricalc                 C   s@   t d}|jdi |}d|i}t|||j d||d d S )Ns3fs
filesystem/pyarrow.parquetrw   rx   ry   r!   )r%   r   S3FileSystemr|   r   )r   r4   s3_public_bucketr'   s3sorj  s3kwr!   r!   r"   test_s3_roundtrip_explicit_fs   s   


z0TestParquetPyArrow.test_s3_roundtrip_explicit_fsc                 C   s(   d|i}t ||d|j d||d d S )Nstorage_optionss3://rl  rm  r|   r   )r   r4   ro  r'   rp  r!   r!   r"   test_s3_roundtrip-  s   
z$TestParquetPyArrow.test_s3_roundtrippartition_colr0   c              
   C   sr   t d | }|r |t|tj}d}|| |||< t|||d|j	 dd|i|d |dddd	 d S )
Nrj  categoryru  z/parquet_dirrt  )r   rj   rt  Tr+   )rv   rw   rx   ry   rm   rr   )
r%   r   r   re   dictfromkeysrG   int32r|   r   )r   r4   ro  r'   rx  rp  expected_dfpartition_col_typer!   r!   r"   test_s3_roundtrip_for_dir9  s*   

z,TestParquetPyArrow.test_s3_roundtrip_for_dirc                 C   s2   t d t }|| t|}t|| d S )Nr   )r%   r   r   r   r   rp   rq   )r   r4   bufferdf_from_bufr!   r!   r"   test_read_file_like_obj_support`  s
   

z2TestParquetPyArrow.test_read_file_like_obj_supportc                 C   s   t d |dd |dd t jtdd td W d    n1 s'w   Y  t jtdd |d W d    d S 1 sDw   Y  d S )Nr   HOMETestingUserUSERPROFILEz.*TestingUser.*r   z~/file.parquet)r%   r   setenvr   OSErrorr   r   )r   r4   monkeypatchr!   r!   r"   test_expand_userg  s   

"z#TestParquetPyArrow.test_expand_userc                 C   s>   ddg}|}|j ||d d t|| t|j|jksJ d S )Nra   r]   r   rj   r   r   r   shape)r   r   r'   rh   r   rJ   r!   r!   r"   test_partition_cols_supportedp  s
   
z0TestParquetPyArrow.test_partition_cols_supportedc                 C   s@   d}|g}|}|j ||d d t|| t|j|jksJ d S )Nra   r  r  )r   r   r'   rh   r   partition_cols_listrJ   r!   r!   r"   test_partition_cols_stringx  s   
z-TestParquetPyArrow.test_partition_cols_stringc                 C   s   | S r   r!   )xr!   r!   r"   <lambda>  s    zTestParquetPyArrow.<lambda>rX   zpathlib.Path)idsc           	      C   s<   d}|g}|}||}|j ||d t|j|jksJ d S )Nr1   )r   )r   r   r  )	r   r   r'   r4   r_  r   r  rJ   rw   r!   r!   r"   test_partition_cols_pathlib  s   z.TestParquetPyArrow.test_partition_cols_pathlibc                 C   s   t jg g d}t|| d S )N)r   r   r   rQ  r!   r!   r"   test_empty_dataframe  s   z'TestParquetPyArrow.test_empty_dataframec                 C   sV   dd l }tdddgi}||jd| dg}|t}t||d|i|d d S )Nr   r  r+   )typer   r  )	r   r2   r3   r   fieldbool_re   ra   r|   )r   r'   r   rJ   r   out_dfr!   r!   r"   test_write_with_schema  s
   
z)TestParquetPyArrow.test_write_with_schemac                 C   sz   t d ttjg dddtjg dddtjg dddd}t|| td	tjg d
ddi}t|| d S )Nr   r*   r-  r:   UInt32rM   rX   r@   rA   rL   r@   r$  )r%   r   r2   r3   r   r|   rQ  r!   r!   r"    test_additional_extension_arrays  s   

z3TestParquetPyArrow.test_additional_extension_arraysc              	   C   st   t d tdtjg dddi}td| t|||d| dd	 W d    d S 1 s3w   Y  d S )
Nr   r@   rM   string[pyarrow]r:   string_storagezstring[]rv   )r%   r   r2   r3   r   r   r|   re   )r   r'   r  rJ   r!   r!   r"    test_pyarrow_backed_string_array  s
   
"z3TestParquetPyArrow.test_pyarrow_backed_string_arrayc                 C   sV   t d ttjg dtjddddtjtjddddd}t	|| d S )	Nr   ))r   r+   )r+   r,   )r-   r6   z
2012-01-01r-   D)r?   r   r6   )rL   rB   rC   )
r%   r   r2   r3   IntervalIndexr   period_rangefrom_breaksrI   r|   rQ  r!   r!   r"   test_additional_extension_types  s   

z2TestParquetPyArrow.test_additional_extension_typesc                 C   s4   d}t dt jddddi}t||d|id d S )	Nz2.6r@   z
2017-01-011nsrZ  r   r?   versionr   )r2   r3   rI   r|   )r   r'   verrJ   r!   r!   r"   test_timestamp_nanoseconds  s   z-TestParquetPyArrow.test_timestamp_nanosecondsc                 C   sP   |j tjjkr|tjjdd d|g }tj	|d|id}t
||dd d S )Nzitemporary skip this test until it is properly resolved: https://github.com/pandas-dev/pandas/issues/37286r      index_as_colr   r   F)rn   )tzinforb   timezoneutcr4  r%   r   r3  r2   r3   r|   )r   r    r'   ri   idxrJ   r!   r!   r"   test_timezone_aware_index  s   
z,TestParquetPyArrow.test_timezone_aware_indexc                 C   sz   t d tdttdi}t }|j||d t	||dgd}W d    n1 s.w   Y  t
|dks;J d S )Nr   r@   r-   r   r@   r   r   r   r+   )r%   r   r2   r3   rE   rF   rp   r{   r   r   r   )r   r'   rJ   rw   r   r!   r!   r"   test_filter_row_groups  s   

z)TestParquetPyArrow.test_filter_row_groupsc                 C   s   t jtjddg dd}t }|j||d t	||}W d    n1 s+w   Y  |r?t
|jt jjjs=J d S t
|jt jjjsJJ d S )Nr,   )rZ  r-   )r0   r1   Cr   r   )r2   r3   rG   r   r   r   rp   r{   r   r   r   _mgrcore	internalsArrayManagerBlockManager)r   r'   using_array_managerrJ   rw   r   r!   r!   r"   test_read_parquet_manager  s   
z,TestParquetPyArrow.test_read_parquet_managerc                 C   s   dd l }|}tjdddd}|d }||d< g d|d< |j|}|jtjd	}trO|d
 	d|d
< |d 	d|d< |d 	t|j
ddd|d< t||ddi|d d S )Nr   r=   r-   r@  rA  rC  rD  rE  )types_mapperrb   ztimestamp[us][pyarrow]rc   us)unitrB  r,  r   r#   rx   rv   )r   r2   rI   r   Tablefrom_pandas	to_pandas
ArrowDtyper   re   	timestampr|   )r   r'   rh   r   rJ   rF  pa_tablerv   r!   r!   r"   &test_read_dtype_backend_pyarrow_config  s,   

z9TestParquetPyArrow.test_read_dtype_backend_pyarrow_configc                 C   sn   t jdddgit jddgdddd	}| }d
d l}t|jtdkr+|jd|_t	||ddi|d d S )Nr@   r+   r,   r-   r6   testr   zint64[pyarrow])r   r;   r   z11.0.0r,  r   r  )
r2   r3   Indexr   r   r
   r   r   re   r|   )r   r'   rJ   rv   r   r!   r!   r"   ,test_read_dtype_backend_pyarrow_config_index  s   
z?TestParquetPyArrow.test_read_dtype_backend_pyarrow_config_indexc                 C   s   t tdttddd}ddg|_t|| ddg|_tjtdd	 t|| W d    n1 s5w   Y  t		d
ddddt		d
ddddg|_t|| d S )Nr5   r+   r6   r   r   rN   rO   z|S3r     )
r2   r3   rE   rF   r   r|   r%   r   NotImplementedErrorrb   rQ  r!   r!   r"   test_columns_dtypes_not_invalid&  s   


z2TestParquetPyArrow.test_columns_dtypes_not_invalidc                 C   s(   t jt jg dddd}t|| d S )Nr  custom namer  r  r2   r3   r  r|   rQ  r!   r!   r"   test_empty_columns:  s   z%TestParquetPyArrow.test_empty_columnsc                 C   sR   |d }t jddgid}ddi|_|j||d t||d}|j|jks'J d S )Nztest_df_metadata.pr+   r   test_attributer   )r2   r3   attrsr   r   )r   r   r'   rw   rJ   new_dfr!   r!   r"   test_df_attrs_persistence?  s   
z,TestParquetPyArrow.test_df_attrs_persistencec                 C   s   |d }t jdddgiddgd}|j|dd t d	d
 t|dd}W d    n1 s/w   Y  t jdddgidt jddgddd}t|| d S )Nztest_string_inference.pr@   r  yrA   )r   r   r   r   future.infer_stringTstring[pyarrow_numpy]r:   )r   r;   r   )r2   r3   r   r   r   r  rp   rq   r   r   r'   rw   rJ   r   rv   r!   r!   r"   test_string_inferenceG  s   
z(TestParquetPyArrow.test_string_inferenceznot supported before 11.0c                 C   st   dd l }|d }tjdtdgidd}|j||d|dfgd t|}tjdd	gid
d}t	|| d S )Nr   z	decimal.pr@   z123.00r  r:   r  )r   123zstring[python])
r   r2   r3   r   r   r   
decimal128r   rp   rq   r  r!   r!   r"   test_roundtrip_decimalU  s    z)TestParquetPyArrow.test_roundtrip_decimalc                 C   s   dd l }dd lm} |d }|d|g d| i}||| tdd t	|}W d    n1 s8w   Y  tj
dg didtjdgddd	}t|| d S )
Nr   zlarge_string.pr@   )NrA   rL   r  Tr  r:   )r   r;   r   )r   pyarrow.parquetr   r5  r   large_stringr6  r2   r   r   r3   r  rp   rq   )r   r   r'   r7  rw   r5  r   rv   r!   r!   r"   #test_infer_string_large_string_typea  s   

z6TestParquetPyArrow.test_infer_string_large_string_typeN)1r   r   r   rG  rH  rL  rR  rU  rY  r^  r%   r   r3  r   skipifr	   r>  strpathlibPathrc  ri  r   rs  rw  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r!   r!   r!   r"   r?    sl    


		
	
r?  c                   @   s(  e Zd Zdd Zdd Zdd Zejje	e
je	dkdd	d
d Zdd Zdd Zdd Zejjdd Zdd Zdd Zdd Zdd Zejje dd	dd Zejjeoae	eje	dkd d	d!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Z d-d. Z!d/d0 Z"ejje dd	d1d2 Z#d3S )4TestParquetFastParquetc                 C   sF   |}t jdddd}|d }||d< t jddd|d< t|| d S )	Nr=   r-   z
US/EasternrA  rC  rS  r>   	timedelta)r2   rI   r   rT  r|   )r   r)   rh   rJ   rF  r!   r!   r"   rG    s   
z!TestParquetFastParquet.test_basicc                 C   s   t tdttddd}t}d}ddg|_| |||| ddg|_| |||| td	ddddtd	ddddg|_| |||| d S )
Nr5   r+   r6   r   r  r   rN   rO   r  )r2   r3   rE   rF   r  r   r   rb   )r   r)   rJ   errr   r!   r!   r"   test_columns_dtypes_invalid  s   

z2TestParquetFastParquet.test_columns_dtypes_invalidc                 C   s<   t jtdddtdd }d}| ||t| d S )NrM  r6   r-   rN  r   z9Cannot create parquet dataset with duplicate column namesrO  r   r)   rJ   r   r!   r!   r"   rR    s   $z-TestParquetFastParquet.test_duplicate_columnsz2.0.0z$fastparquet uses np.float_ in numpy2r   c                 C   s@   t dg di}t jddtjdgidd}t|||dd d S )	Nr@   )TNFr'  g        r]  r:   F)rv   rn   )r2   r3   rG   rd   r|   r   r)   rJ   rv   r!   r!   r"   test_bool_with_none  s   z*TestParquetFastParquet.test_bool_with_nonec                 C   sT   t dt jddddi}| ||td  t dg di}d}| ||t| d S )Nr@   2013Mr-   r  rW  z"Can't infer object conversion type)r2   r3   r  r   r   r  r!   r!   r"   rY    s
   z'TestParquetFastParquet.test_unsupportedc                 C   s&   t dt tdi}t|| d S )Nr@   r5   )r2   r3   rg  rE   r|   )r   r)   rJ   r!   r!   r"   ri    rV  z'TestParquetFastParquet.test_categoricalc                 C   sx   dt tdi}t|}t }|j||d dd t||dgd}W d    n1 s-w   Y  t|dks:J d S )Nr@   r-   r+   )r#   rj   row_group_offsetsr  r  )	rE   rF   r2   r3   rp   r{   r   r   r   )r   r)   rB   rJ   rw   r   r!   r!   r"   r    s   

z-TestParquetFastParquet.test_filter_row_groupsc                 C   s*   t ||d|j dd|id |dd d S )Nru  z/fastparquet.parquetrt  )rj   rt  rm  rv  )r   r4   ro  r)   rp  r!   r!   r"   rw    s   
z(TestParquetFastParquet.test_s3_roundtripc                 C   s\   ddg}|}|j |d|d d tj|sJ dd l}|t|dj}t|dks,J d S )Nra   r]   r   r#   r   rj   r   Fr,   	r   r`  rw   existsr   ParquetFiler  catsr   r   r   r)   rh   r   rJ   r   actual_partition_colsr!   r!   r"   r       z4TestParquetFastParquet.test_partition_cols_supportedc                 C   sX   d}|}|j |d|d d tj|sJ dd l}|t|dj}t|dks*J d S )Nra   r   r  r   Fr+   r  r  r!   r!   r"   r    s   z1TestParquetFastParquet.test_partition_cols_stringc                 C   s\   ddg}|}|j |dd |d tj|sJ dd l}|t|dj}t|dks,J d S )Nra   r]   r   )r#   rj   partition_onr   Fr,   r  r  r!   r!   r"   test_partition_on_supported  r  z2TestParquetFastParquet.test_partition_on_supportedc                 C   sX   ddg}|}d}t jt|d |j|dd ||d W d    d S 1 s%w   Y  d S )Nra   r]   zYCannot use both partition_on and partition_cols. Use partition_cols for partitioning datar   r   )r#   rj   r  r   )r%   r   r   r   )r   r   r)   rh   r   rJ   r   r!   r!   r"   3test_error_on_using_partition_cols_and_partition_on  s   "zJTestParquetFastParquet.test_error_on_using_partition_cols_and_partition_onzfastparquet writes into Indexc                 C   s"   t  }| }t|||d d S )Nr  )r2   r3   r   r|   r  r!   r!   r"   r    s   z+TestParquetFastParquet.test_empty_dataframez2022.12zCfastparquet bug, see https://github.com/dask/fastparquet/issues/929c                 C   s>   d|g }t j|d|id}| }d|j_t|||d d S )Nr  r  r  r   r  )r2   r3   r   r   r   r|   )r   r)   ri   r  rJ   rv   r!   r!   r"   r    s
   
z0TestParquetFastParquet.test_timezone_aware_indexc              
   C   s   t dddgi}t g}|| tjtdd$ tt	 t
|ddd W d    n1 s2w   Y  W d    n1 sAw   Y  tjtdd t
|dd	d
 W d    n1 s_w   Y  W d    d S W d    d S 1 sww   Y  d S )Nr@   r+   r,   z!not supported for the fastparquetr   r   T)r#   use_nullable_dtypesr   r+  )r2   r3   rp   r{   r   r%   r   r   r   r   r   )r   r)   rJ   rw   r!   r!   r"   &test_use_nullable_dtypes_not_supported*  s   

"z=TestParquetFastParquet.test_use_nullable_dtypes_not_supportedc              	   C   s   t d7}t|d tjtdd t|dd W d    n1 s&w   Y  t|j	dd W d    d S 1 s?w   Y  d S )	Ntest.parquets   breakit r   r   r   F)
missing_ok)
rp   r{   r  r  write_bytesr%   r   	Exceptionr   unlink)r   rw   r!   r!   r"   $test_close_file_handle_on_read_error5  s   "z;TestParquetFastParquet.test_close_file_handle_on_read_errorc              	   C   s   t jddgddgdd}td*}t| d}|| W d    n1 s*w   Y  t||d}W d    n1 s?w   Y  t|| d S )Nr   r+   r/   r  r  wbr   )	r2   r3   rp   r{   r   encoder   r   rq   )r   r#   rJ   rw   rD   r   r!   r!   r"   test_bytes_file_name=  s   z+TestParquetFastParquet.test_bytes_file_namec              	   C     t d tjddgddgdd}t '}t jtdd |j|ddd	 W d    n1 s1w   Y  W d    n1 s@w   Y  t 7}t	
|d
 t jtdd t|ddd	 W d    n1 skw   Y  W d    d S W d    d S 1 sw   Y  d S )Nr   r   r+   r/   r  zfilesystem is not implementedr   r.   r#   rk  rN   )r%   r   r2   r3   rp   r{   r   r  r   r  r  r  r   r   rJ   rw   r!   r!   r"   test_filesystem_notimplementedG  (   


"z5TestParquetFastParquet.test_filesystem_notimplementedc              	   C   r	  )Nr   r   r+   r/   r  z1filesystem must be a pyarrow or fsspec FileSystemr   r.   r
  rN   )r%   r   r2   r3   rp   r{   r   r   r   r  r  r  r   r  r!   r!   r"   test_invalid_filesystemW  r  z.TestParquetFastParquet.test_invalid_filesystemc              	   C   s(  t d}tjddgddgdd}t ,}t jtdd |j|d|	 d	d
id W d    n1 s6w   Y  W d    n1 sEw   Y  t <}t
|d t jtdd t|d|	 d	d
id W d    n1 suw   Y  W d    d S W d    d S 1 sw   Y  d S )Nz
pyarrow.fsr   r+   r/   r  z8storage_options not supported with a pyarrow FileSystem.r   r   r.   rQ   )r#   rk  rt  rN   )r%   r   r2   r3   rp   r{   r   r  r   LocalFileSystemr  r  r  r   )r   pa_fsrJ   rw   r!   r!   r"   .test_unsupported_pa_filesystem_storage_optionsg  s@   


"zETestParquetFastParquet.test_unsupported_pa_filesystem_storage_optionsc              	   C   s   d}t dttddi}td3}|| tjt	|d t
|dd W d    n1 s1w   Y  W d    d S W d    d S 1 sIw   Y  d S )	NzPdtype_backend numpy is invalid, only 'numpy_nullable' and 'pyarrow' are allowed.r]   r+   r6   ztmp.parquetr   numpy)r,  )r2   r3   rE   rF   rp   r{   r   r%   r   r   r   )r   r#   r   rJ   rw   r!   r!   r"   test_invalid_dtype_backend  s   
"z1TestParquetFastParquet.test_invalid_dtype_backendc                 C   sF   t jt jg dddd}t jt jg dddd}t|||d d S )Nr  r  r  r  r  r  r  r!   r!   r"   r    s   z)TestParquetFastParquet.test_empty_columnsN)$r   r   r   rG  r  rR  r%   r   r3  r
   rG   r   r  rY  ri  r  r   rw  r  r  r  r  r  r   r  r(   r   r  r  r  r  r  r  r  r  r  r!   r!   r!   r"   r    sD    	




	
r  )	NNNNNTFTr,   )I__doc__rb   decimalr   r   r   r`  r  r  rG   r%   pandas._configr   pandas._config.configr   pandas.compatr   pandas.compat.pyarrowr   r   r	   pandasr2   pandas._testing_testingrp   pandas.util.versionr
   pandas.io.parquetr   r   r   r   r   r   r$   r   r   r(   r   filterwarnings
pytestmarkfixturer   r  r#   r'   r)   r4   rK   rh   nowr  r  minmaxstrptimeri   r|   r   r   r   r   r   r   r   r   r   r   r   r   r?  r  r!   r!   r!   r"   <module>   s    
	







B+  +   L