
    h+                    H   d dl mZ d dlmZ d dlZd dlmZ d dlmZm	Z	m
Z
mZ d dlZd dlZd dlmZmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z%m&Z&m'Z' d dl(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z. d dl/mZm0Z0m1Z1m2Z2 d dl3m4Z4 d dl5m6Z6 d dl7m8Z8 d dl9m:Z:m;Z; d dl<m=Z=m>Z> d dl?m@Z@ d dlAmBZB d dlCmDZD d dlEmFZF d dlGmHZH er(d dlImJZJ d dlKZKd dlLmMZMmNZNmOZOmPZPmQZQmRZRmSZSmTZTmUZUmVZV d dlWmXZX e' G d de&             ZY G d  d!e8      ZZ G d" d#eZe@      Z[ G d$ d%e[      Z\y)&    )annotations)partialN)Path)TYPE_CHECKINGAnyLiteralcast)
get_optionusing_string_dtype)libmissing)NDArrayBacked)ensure_string_array)HAS_PYARROWpa_version_under10p1)function)doc)find_stack_level)ExtensionDtypeStorageExtensionDtyperegister_extension_dtype)is_array_likeis_bool_dtypeis_integer_dtypeis_object_dtypeis_string_dtypepandas_dtype)r   nanopsops	roperator)isin)masked_reductions)ExtensionArray)FloatingArrayFloatingDtype)IntegerArrayIntegerDtype)NumpyExtensionArray)extract_array)check_array_indexer)isna)printing)MutableMapping)
	ArrayLikeAxisIntDtypeDtypeObjNumpySorterNumpyValueArrayLikeScalarSelfnpttype_t)Seriesc                       e Zd ZdZedd       Zedd       ZdZdej                  f	 	 	 	 	 ddZ
ddZddZdd	Zd fd
Zd Zedd       Zedd       Z	 	 ddZddZ	 	 	 	 ddZ xZS )StringDtypea  
    Extension dtype for string data.

    .. warning::

       StringDtype is considered experimental. The implementation and
       parts of the API may change without warning.

    Parameters
    ----------
    storage : {"python", "pyarrow"}, optional
        If not given, the value of ``pd.options.mode.string_storage``.
    na_value : {np.nan, pd.NA}, default pd.NA
        Whether the dtype follows NaN or NA missing value semantics.

    Attributes
    ----------
    None

    Methods
    -------
    None

    Examples
    --------
    >>> pd.StringDtype()
    string[python]

    >>> pd.StringDtype(storage="pyarrow")
    string[pyarrow]
    c                >    | j                   t        j                  u ryy)Nstringstr)	_na_value
libmissingNAselfs    ?D:\jyotish\venv\Lib\site-packages\pandas/core/arrays/string_.pynamezStringDtype.name   s    >>Z]]*    c                    | j                   S N)r>   rA   s    rC   na_valuezStringDtype.na_value   s    ~~rE   )storager>   Nc                4   |@|t         j                  urt        d      }|dk(  rt        rd}nd}nt        d      }|dk(  rd}|dk(  r6t	        j
                  dt        t                      d}t        j                  }|dvrt        d	| d
      |dk(  rt        rt        d      t        |t              r&t        j                  |      rt        j                  }n |t         j                  urt        d|       t!        t"        |      | _        || _        y )Nzmode.string_storageautopyarrowpythonpyarrow_numpya]  The 'pyarrow_numpy' storage option name is deprecated and will be removed in pandas 3.0. Use 'pd.StringDtype(storage="pyarrow", na_value-np.nan)' to construct the same dtype.
Or enable the 'pd.options.future.infer_string = True' option globally and use the "str" alias as a shorthand notation to specify a dtype (instead of "string[pyarrow_numpy]").
stacklevel>   rM   rL   z+Storage must be 'python' or 'pyarrow'. Got z	 instead.z;pyarrow>=10.0.1 is required for PyArrow backed StringArray.z('na_value' must be np.nan or pd.NA, got )r?   r@   r
   r   warningswarnFutureWarningr   npnan
ValueErrorr   ImportError
isinstancefloatisnanr	   r=   rI   r>   )rB   rI   rH   s      rC   __init__zStringDtype.__init__   s    ?z}},$%:;f$""+"*$%:;f$&Go%MM8 +-	  GvvH //=gYiP  i$8M  h&288H+= vvHZ]]*GzRSSC)!rE   c                    | j                   t        j                  u r| j                   d| j                   dS | j                  dk(  rdnd}d| d| j                    dS )	N[]rL    zstorage='python', z<StringDtype(z	na_value=z)>)r>   r?   r@   rD   rI   )rB   rI   s     rC   __repr__zStringDtype.__repr__   sZ    >>Z]]*ii[$,,q11 LLI5b;OG"7)9T^^4DBGGrE   c                ,   t        |t              r'|dk(  s|| j                  k(  ry	 | j                  |      }t        |t        |             r3| j                  |j                  k(  xr | j                  |j                  u S y# t        t
        f$ r Y yw xY w)Nr<   TF)	rX   r=   rD   construct_from_string	TypeErrorrW   typerI   rH   )rB   others     rC   __eq__zStringDtype.__eq__   s     eS! ETYY$62259
 eT$Z(<<5==0TT]]enn5TT {+  s   B BBc                |    |j                  dd      | _        |j                  dt        j                        | _        y )NrI   rM   r>   )poprI   r?   r@   r>   )rB   states     rC   __setstate__zStringDtype.__setstate__   s*    yyH5;
>rE   c                     t         |          S rG   )super__hash__)rB   	__class__s    rC   rm   zStringDtype.__hash__   s    w!!rE   c                >    t         | j                  | j                  ffS rG   )r:   rI   rH   rA   s    rC   
__reduce__zStringDtype.__reduce__   s    T\\4==999rE   c                    t         S rG   )r=   rA   s    rC   rd   zStringDtype.type   s    
rE   c                @   t        |t              st        dt        |             |dk(  r |        S |dk(  r!t	               r | t
        j                        S |dk(  r	 | d      S |dk(  r	 | d	      S |d
k(  r	 | d      S t        d| j                   d| d      )a|  
        Construct a StringDtype from a string.

        Parameters
        ----------
        string : str
            The type of the name. The storage type will be taking from `string`.
            Valid options and their storage types are

            ========================== ==============================================
            string                     result storage
            ========================== ==============================================
            ``'string'``               pd.options.mode.string_storage, default python
            ``'string[python]'``       python
            ``'string[pyarrow]'``      pyarrow
            ========================== ==============================================

        Returns
        -------
        StringDtype

        Raise
        -----
        TypeError
            If the string is not a valid option.
        z.'construct_from_string' expects a string, got r<   r=   rH   zstring[python]rM   rI   zstring[pyarrow]rL   zstring[pyarrow_numpy]rN   zCannot construct a 'z' from '')rX   r=   rc   rd   r   rT   rU   __name__)clsr<   s     rC   rb   z!StringDtype.construct_from_string   s    8 &#&@fO  X5Lu_!3!5''''x((((y))..//23<<.PQRSSrE   c                    ddl m}m} | j                  dk(  r"| j                  t
        j                  u rt        S | j                  dk(  r| j                  t
        j                  u r|S | j                  dk(  rt        S |S )zq
        Return the array type associated with this dtype.

        Returns
        -------
        type
        r   )ArrowStringArrayArrowStringArrayNumpySemanticsrM   rL   )	pandas.core.arrays.string_arrowry   rz   rI   r>   r?   r@   StringArrayStringArrayNumpySemantics)rB   ry   rz   s      rC   construct_array_typez StringDtype.construct_array_type  sc    	

 <<8#*--(G\\Y&4>>Z]]+J##\\X%,,11rE   c                   t               }t               }|D ]s  }t        |t              r7|j                  |j                         |j                  |j
                         Jt        |t        j                        r|j                  dv rs y  t        |      dk(  rd}nt        t        |            }t        |      dk(  rt        j                  }nt        t        |            }t        ||      S )N)UT   rL   rI   rH   )setrX   r:   addrI   rH   rT   dtypekindlennextiterr?   r@   )rB   dtypesstorages	na_valuesr   rI   rH   s          rC   _get_common_dtypezStringDtype._get_common_dtype5  s    5E	E%-U]]+enn-E288,z1I  x=AG4>*G y>Q!}}HDO,H7X>>rE   c                L   | j                   dk(  r8| j                  t        j                  u rddlm}  ||      S ddlm}  ||      S ddl}t        ||j                        r|g}n|j                  }g }|D ]<  }|j                  d      }t        || j                        }|j                  |       > t        |      dk(  rt!        j"                  g t$        	      }nt!        j&                  |      }t(        j+                  t(              }t-        j.                  |||        |S )
zH
        Construct StringArray from pyarrow Array/ChunkedArray.
        rL   r   )ry   )rz   NF)zero_copy_onlyrs   r   )rI   r>   r?   r@   r{   ry   rz   rL   rX   Arraychunksto_numpyr   rH   appendr   rT   arrayobjectconcatenater|   __new__r   r[   )	rB   r   ry   rz   rL   r   resultsarrnew_string_arrays	            rC   __from_arrow__zStringDtype.__from_arrow__Q  s     <<9$~~.L'.. 6e<< %/ G ll%l8)#Fs#  v;!((2V,C..)C '..{;/d;rE   )returnr=   )r   libmissing.NAType | float)rI   z
str | NonerH   r   r   None)re   r   r   bool)ri   zMutableMapping[str, Any]r   r   )r   int)r   z	type[str])r   r5   )r   ztype_t[BaseStringArray])r   zlist[DtypeObj]r   zDtypeObj | None)r   z$pyarrow.Array | pyarrow.ChunkedArrayr   BaseStringArray)rv   
__module____qualname____doc__propertyrD   rH   	_metadatar?   r@   r[   r`   rf   rj   rm   rp   rd   classmethodrb   r~   r   r   __classcell__rn   s   @rC   r:   r:   ^   s    @     )I #.8mm3"3" ,3" 
	3"jH"?
":   +T +T`2	 20?8+ 9+ 	+ rE   r:   c                       e Zd ZU dZded<   d Z eej                        d        Ze	dd       Z
dddZej                  dd	f	 	 	 dd
Z	 	 	 	 ddZej                  dd	f	 	 	 ddZdd fdZ xZS )r   z8
    Mixin class for StringArray, ArrowStringArray.
    r:   r   c                   |t         j                  t         j                  t         j                  fv rt	        |t
        j                        r|j                  t        k(  ro|j                  dd  j                  d      }t        j                  d| d| j                   dt        t                       ||| j                  t                    S t         S )N   _ru   z'' operations between boolean dtype and z| are deprecated and will raise in a future version. Explicitly cast the strings to a boolean dtype before operating instead.rO   )r    ror_rand_rxorrX   rT   ndarrayr   r   rv   striprQ   rR   DeprecationWarningr   astypeNotImplemented)rB   re   opop_names       rC   _logical_methodzBaseStringArray._logical_method  s    9>>9??INNCC5"**-t# kk!"o++C0GMMG9CDJJ< PP P #+- eT[[.//rE   c                    | j                   dkD  r| D cg c]  }|j                          c}S t        | j                               S c c}w )Nr   )ndimtolistlistr   )rB   xs     rC   r   zBaseStringArray.tolist  s>    99q=(,-1AHHJ--DMMO$$ .s   Ac                f    t        j                  |d      dvrt        | j                  ||      S )NTskipna)r<   emptyr   )r   infer_dtyperV   _from_sequence)rw   scalarsr   s      rC   _from_scalarszBaseStringArray._from_scalars  s2    ??7408KK!!'!77rE   c                @    t        t        j                  d|       }|S )N)	
)escape_charsquote_strings)r   r,   pprint_thing)rB   boxed	formatters      rC   
_formatterzBaseStringArray._formatter  s%    !!+#)
	
 rE   NTc                   | j                   j                  t        j                  u r| j	                  ||||      S ddlm} || j                   }|t        j                  u r| j                   j                  }t        |       }t        j                  |       }t        |      st        |      rt        |      rt        }n|}t        |      }	|	rd}n#|t        j                   d      k(  rt        |      }t        j                  |||j!                  d      d|t        j                   t#        t$        |                  }
|	sd|d d   ||
|      S | j'                  |||||      S )	N)rH   r   convertr   )BooleanArrayr   r   uint8Fr   rH   r   )r   rH   rT   rU   _str_map_nan_semanticspandas.arraysr   r   
no_defaultr+   asarrayr   r   r&   r   map_infer_maskviewr	   rd   _str_map_str_or_object)rB   frH   r   r   r   maskr   constructorna_value_is_naresults              rC   _str_mapzBaseStringArray._str_map  s?    ::"&&(..HE7 /   	/=JJEs~~%zz**HDzjjE"mE&:&**!(^N"((6**>''		'"! hhtD%01
F "Qvt,, ..uhQMMrE   c                n   t        |      rt        |      szt        j                  |||j	                  d      d|      }| j
                  j                  dk(  r'dd l}|j                  |||j                         d      } t        |       |      S t        j                  |||j	                  d            S )Nr   F)r   rH   rL   r   T)r   rd   from_pandas)r   r   r   r   r   r   rI   rL   r   large_stringrd   )rB   r   rH   r   r   r   r   pas           rC   r   z&BaseStringArray._str_map_str_or_object  s     5!/%*@''Q		'*EHF zz!!Y.$BOO,=4 "  4:f%% %%c1dii.@AArE   c                v   || j                   }|t        j                  u r$t        |      rd}n| j                   j                  }t        |       }t        j                  |       }t        |      st        |      rt        |      }|rt        |      rd}nd}t        j                  |||j                  d      d|t        j                   t        t        |                  }|r?t        |      r4|j                         r$|j                  d      }t        j                  ||<   |S | j!                  |||||      S )NFr   r   r   float64)r   r   r   r   rH   r+   rT   r   r   r   r   r	   rd   anyr   rU   r   )	rB   r   rH   r   r   r   r   r   r   s	            rC   r   z&BaseStringArray._str_map_nan_semantics  s    =JJEs~~%U# ::..DzjjE"mE&:!(^N#E* H  %H''		'"!hhtD%01F "25"9dhhj  y1!vvtM ..uhQMMrE   c                >    |t        d      t        | 	  |      S )Nz)Cannot change data-type for string array.r   )rc   rl   r   )rB   r   rn   s     rC   r   zBaseStringArray.view4  s&    GHHw|%|((rE   )r   r1   r   r5   F)r   r   )r   Dtype | Noner   r   )r   z
np.ndarrayr   npt.NDArray[np.bool_]rG   )r   r   r   r.   )rv   r   r   r   __annotations__r   r   r#   r   r   r   r   r   r   r   r   r   r   r   r   s   @rC   r   r     s     $ 			%  %
 8 8 "5N 	5N
 5NnB 	B $BB ".N 	.N
 .N`) )rE   r   c                      e Zd ZU dZdZdZej                  Zde	d<   d&d' fdZ
d Zd	 Zed
ddd(d       Zed
dd	 	 	 d(d       Zed)d       Zd*dZd+dZd Zd, fdZd-dZd.dZd/dZd0d1 fdZdddd	 	 	 	 	 	 	 d2dZddd3dZd4 fdZd5d6dZd5d6dZd
ddd 	 	 	 	 	 	 	 d7d!Zd0d8d"Zd&d9d#Z  e!e"jF                        	 	 d:	 	 	 	 	 	 	 d; fd$       Z#d% Z$e$Z% xZ&S )<r|   aY  
    Extension array for string data.

    .. warning::

       StringArray is considered experimental. The implementation and
       parts of the API may change without warning.

    Parameters
    ----------
    values : array-like
        The array of data.

        .. warning::

           Currently, this expects an object-dtype ndarray
           where the elements are Python strings
           or nan-likes (``None``, ``np.nan``, ``NA``).
           This may change without warning in the future. Use
           :meth:`pandas.array` with ``dtype="string"`` for a stable way of
           creating a `StringArray` from any sequence.

        .. versionchanged:: 1.5.0

           StringArray now accepts array-likes containing
           nan-likes(``None``, ``np.nan``) for the ``values`` parameter
           in addition to strings and :attr:`pandas.NA`

    copy : bool, default False
        Whether to copy the array of data.

    Attributes
    ----------
    None

    Methods
    -------
    None

    See Also
    --------
    :func:`pandas.array`
        The recommended function for creating a StringArray.
    Series.str
        The string methods are available on Series backed by
        a StringArray.

    Notes
    -----
    StringArray returns a BooleanArray for comparison methods.

    Examples
    --------
    >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string")
    <StringArray>
    ['This is', 'some text', <NA>, 'data.']
    Length: 4, dtype: string

    Unlike arrays instantiated with ``dtype="object"``, ``StringArray``
    will convert the values to strings.

    >>> pd.array(['1', 1], dtype="object")
    <NumpyExtensionArray>
    ['1', 1]
    Length: 2, dtype: object
    >>> pd.array(['1', 1], dtype="string")
    <StringArray>
    ['1', '1']
    Length: 2, dtype: string

    However, instantiating StringArrays directly with non-strings will raise an error.

    For comparison methods, `StringArray` returns a :class:`pandas.BooleanArray`:

    >>> pd.array(["a", None, "c"], dtype="string") == "a"
    <BooleanArray>
    [True, <NA>, False]
    Length: 3, dtype: boolean
    	extensionrM   r   r>   Fc                   t        |      }t        | 	  ||       t        |t	        |             s| j                          t        j                  | | j                  t        | j                  | j                               y )Ncopyr   )r)   rl   r[   rX   rd   	_validater   _ndarrayr:   _storager>   )rB   valuesr   rn   s      rC   r[   zStringArray.__init__  sa    v&d+&$t*-NNMMG	
rE   c                   t        | j                        r,t        j                  | j                  d      st	        d      | j                  j
                  dk7  r#t	        d| j                  j
                   d      | j                  j                  dkD  r/t        j                  | j                  j                  d             y	t        j                  | j                         y	)
z*Validate that we only store NA or strings.Tr   z7StringArray requires a sequence of strings or pandas.NAr   z>StringArray requires a sequence of strings or pandas.NA. Got '' dtype instead.r   KN)	r   r   r   is_string_arrayrV   r   r   convert_nans_to_NAravelrA   s    rC   r   zStringArray._validate  s    t}}c&9&9$--PT&UVWW==(*MM''((8: 
 ==!""4==#6#6s#;<""4==1rE   c           
         t        |      r| j                  j                  S t        |t              s2t        d| d| j                   dt        |      j                   d      |S )NInvalid value '' for dtype '3'. Value should be a string or missing value, got '
' instead.)r+   r   rH   rX   r=   rc   rd   rv   rB   values     rC   _validate_scalarzStringArray._validate_scalar  sf    ;::&&&E3'!%djj\ B115e1E1E0FjR  rE   Nr   r   c               l   |rAt        |t              r|dk(  s,t        |      }t        |t              r|j                  dk(  s4J t               rt        dt        j                        }nt        d      }ddlm	} |j                  }t        ||      r6|j                  }|j                  }t        j                  ||d      }|||<   nBt        j                  |      rt        j                   |      }t        j                  |||	      }| j#                  |       }t%        j&                  |||       |S )
Nr<   rM   r   rt   r   )BaseMaskedArrayF)r   convert_na_value)rH   r   )rX   r=   r   r:   rI   r   rT   rU   pandas.core.arrays.maskedr	  rH   _mask_datar   r   is_pyarrow_arrayr   r   r   r[   )	rw   r   r   r   r	  rH   r   r   r   s	            rC   r   zStringArray._from_sequence  s    *UC0Uh5F 'Ee[1emmx6OOO!##HrvvF#H5=>>g/I]]F,,V$QVWF (F9 ##G, ((7+,,WxdSF ;;s+/?rE   c               *    | j                  |||      S )Nr  )r   )rw   stringsr   r   s       rC   _from_sequence_of_stringsz%StringArray._from_sequence_of_strings  s     !!'T!BBrE   c                    t        j                  |t              }t        j                  |d d   | |      j                  |d      S )Nr   Fr   )rT   r   r   r?   r@   r   )rw   shaper   r   s       rC   _emptyzStringArray._empty  s9    %v.MMq	6{!!%e!44rE   c                    ddl }||j                         }| j                  j                         }d|| j	                         <   |j                  ||d      S )z6
        Convert myself into a pyarrow Array.
        r   NT)rd   r   )rL   r<   r   r   r+   r   )rB   rd   r   r   s       rC   __arrow_array__zStringArray.__arrow_array__  sN     	<99;D##%"tyy{xxTtx<<rE   c                f    | j                   j                         }|| j                  j                  fS rG   )r   r   r   rH   )rB   r   s     rC   _values_for_factorizez!StringArray._values_for_factorize  s)    mm  "DJJ''''rE   c           
     @   t        j                  |      rgt        |      r| j                  j                  }|S t        |t              s2t        d| d| j                   dt        |      j                   d      |S t        |d      }t        |      st        j                  |t              }n@t        |j                  t        | j                              r|S t        j                  |      }t        |      r"t        j                   |d      st        d	      |S )
z-Maybe convert value to be pyarrow compatible.r   r  r  r  T)extract_numpyr   r   z]Invalid value for dtype 'str'. Value should be a string or missing value (or array of those).)r   	is_scalarr+   r   rH   rX   r=   rc   rd   rv   r)   r   rT   r   r   r   r   r  s     rC   _maybe_convert_setitem_valuez(StringArray._maybe_convert_setitem_value  s    ==E{

++, +  s+%eWM$** F::>u+:N:N9O P (  "%t<E '

57EKKdjj)9: 

5)5z#"5"5eD"IC  rE   c                   | j                  |      }t        | |      }t        j                  |      }t        j                  |      }|r|st	        d      |s|j
                  | j
                  k(  r|j                  }nbt        j                  |      }t        |      }|j                         r2|j                         }| j
                  j                  |t        |      <   t        | 9  ||       y )Nz)setting an array element with a sequence.)r  r*   r   r  rV   r   r   rT   r   r+   r   r   rH   rl   __setitem__)rB   keyr  
scalar_keyscalar_valuer   rn   s         rC   r  zStringArray.__setitem__  s    11%8!$,]]3'
}}U+lHII{{djj(

5)E{88:!JJLE)-)<)<E$u+&C'rE   c                2    t        j                  | ||       y rG   )r#   _putmaskrB   r   r  s      rC   r#  zStringArray._putmask-  s     	dE2rE   c                0    t        j                  | ||      S rG   )r#   _wherer$  s      rC   r&  zStringArray._where3  s     $$T477rE   c           	        t        |t              s%t        |t              r3t        |j                        r|j                  | j                  d      }nt        j                  t        j                  |      d      sut        j                  |D cg c]   }t        |t              st        |      s|" c}t              }t        |      s%t        j                  | j                   t"              S | j%                  || j                        }t'        t        j                  |       t        j                  |            S c c}w )NFr   Tr   r   )rX   r   r#   r   r   r   r   r   rT   r   r   r=   r+   r   r   zerosr  r   r   r!   )rB   r   vals      rC   r!   zStringArray.isin9  s    fo.v~.?6<<3P]]4::E]:F&&rzz&'9$G$*PFSjc.Bd3iSFP  6{88DJJd;;((tzz(BFBJJt$bjj&899 Qs    E2ETc                L   t        |      }|| j                  k(  r|r| j                         S | S t        |t              rX| j
                  j                         }| j                         }d||<   |j                  |j                        }t        ||d      S t        |t              rN| j                         }| j                         }d||<   |j                  |j                        }t        ||d      S t        |t              rt        j                  | ||      S t        j                  |t        j                         rU| j
                  j                         }| j                         }d||<   |j                  |      }t        j"                  ||<   |S t$        |   ||      S )Nr   Fr   0)r   r   r   rX   r'   r   r+   r   numpy_dtyper&   r%   r$   r   r#   rT   
issubdtypefloatingrU   rl   )rB   r   r   r   r   r   rn   s         rC   r   zStringArray.astypeK  sT   U#DJJyy{"K|,--$$&C99;DCIZZ 1 12F599}-))+C99;DCIZZ 1 12F E::~.!((ud;;]]5"++.--$$&C99;DCIZZ&F66F4LMw~eT**rE   r   )r   keepdimsaxisc               x   | j                   j                  t        j                  u rK|dv rG|dk(  r!t	        j
                  | j                  |      S t	        j                  | j                  |      S |dv r7 t        | |      d	||d|}|r| j                  |g| j                         S |S t        d| d      )
N)r   allr   r   )minmaxargminargmaxsum)r   r0  r   zCannot perform reduction 'z' with string dtype )r   rH   rT   rU   r   nananyr   nanallgetattrr   rc   )rB   rD   r   r/  r0  kwargsr   s          rC   _reducezStringArray._reducel  s     ::"&&(T^-Cu}}}T]]6BB}}T]]6BB<<(WT4(LTLVLF**F84::*FFM4TF:MNOOrE   r   c                  |dk(  rd| d| j                    d}t        |      d}d}| j                  }t        j                  t        j
                  j                  t        j                  j                  d|   }| j                  rt        dt        |            }t        j                  |      r t        |       |      S |r^|dk(  rt        j                  |d	|      }n|j                         }t        j                   |d
d       t        j                   |dd       nVt        j"                  |      }	t        j$                  t'        |      |	z
  d      }| j                   j(                  |dd |d|	 } ||      }
|t        j*                  |
|f      }
n-|+t        j                  || j                   j(                  |
      }
 t        |       |
      }|S )a  
        Return an ExtensionArray performing an accumulation operation.

        The underlying data type might change.

        Parameters
        ----------
        name : str
            Name of the function, supported values are:
            - cummin
            - cummax
            - cumsum
            - cumprod
        skipna : bool, default True
            If True, skip NA values.
        **kwargs
            Additional keyword arguments passed to the accumulation function.
            Currently, there is no supported kwarg.

        Returns
        -------
        array

        Raises
        ------
        NotImplementedError : subclass does not define accumulations
        cumprodzoperation 'z' not supported for dtype 'ru   N)cumsumcummincummaxr   r@  r_   padr   )methodr0  backfillr   r   )r   rc   r   rT   r@  minimum
accumulatemaximum_hasnar	   r+   r2  rd   wherer   r   pad_or_backfill_inplacer6  r   r   rH   hstack)rB   rD   r   r<  msgtailna_maskr   np_funcidx	np_resultr   s               rC   _accumulatezStringArray._accumulate  s   8 9v%@ANCC.  #'%)--iijj++jj++
 	 ;;2DMBGvvg!tDz'**8# hhwG<G &llnG33$
 33) ii(xxGs 2(C**--Q!$3- G$			9d"34I $***=*=yIIdI&rE   c                    | j                   j                  t        j                  u r"|t        j
                  u rt        j                  S t        |   ||      S rG   )r   rH   rT   rU   r?   r@   rl   _wrap_reduction_result)rB   r0  r   rn   s      rC   rU  z"StringArray._wrap_reduction_result  s?    ::"&&(Vz}}-D66Mw-dF;;rE   c                    t        j                  d|       t        j                  | j	                         | j                         |      }| j                  ||      S Nr8  )r   r   r   )nvvalidate_minr"   r3  r   r+   rU  rB   r0  r   r<  r   s        rC   r3  zStringArray.min  H    
F#"&&==?V
 **488rE   c                    t        j                  d|       t        j                  | j	                         | j                         |      }| j                  ||      S rW  )rX  validate_maxr"   r4  r   r+   rU  rZ  s        rC   r4  zStringArray.max  r[  rE   )r0  r   	min_countc                   t        j                  d|       t        j                  | j                  | j                         |      }| j                  ||      S rW  )rX  validate_sumr"   r7  r   r+   rU  )rB   r0  r   r^  r<  r   s         rC   r7  zStringArray.sum  sH     	F#"&&==tyy{6
 **488rE   c                D   ddl m}  || j                  |      j                  d      } || j                  d|      }|j                  j                  | j
                        |_        | j
                  j                  t        j                  u r|j                  d      }|S )Nr   )value_counts_internal)dropnaInt64F)sortrc  )	pandas.core.algorithmsrb  r   r   indexr   rH   r?   r@   )rB   rc  value_countsr   s       rC   rh  zStringArray.value_counts  su    PdmmF;BB7Kdmm%G||**4::6::*--/]]7+FrE   c                z    | j                   j                  }|r"|t        j                  | j                         z   S |S rG   )r   nbytesr   memory_usage_of_objects)rB   deepr   s      rC   memory_usagezStringArray.memory_usage  s3    %%C77FFFrE   c                V    | j                   rt        d      t        |   |||      S )NzOsearchsorted requires array to be sorted, which is impossible with NAs present.)r  sidesorter)rI  rV   rl   searchsorted)rB   r  ro  rp  rn   s       rC   rq  zStringArray.searchsorted  s7     ;;$  w#%d6#JJrE   c                   ddl m}m} t        |t              rR| j
                  j                  t        j                  ur,|j
                  j                  t        j                  u rt        S t        ||      rht        |t              rR| j
                  j                  t        j                  u r&|j
                  j                  t        j                  ust        S t        S t        |t              r|j                  }t        |       t        |      z  }| }t        j                  |      r_t        |      t        |       k7  r#t!        dt        |        dt        |             t#        |      st%        j&                  |      }||   }|j(                  t*        j,                  v rvt%        j.                  | j                  d      }| j
                  j                  ||<    || j                  |   |      ||<   t        |t0              r|S | j3                  |      S t%        j4                  t        | j                        d      } || j                  |   |      ||<    |||      }| j
                  j                  t$        j6                  u rU|t8        j:                  k(  r!|j=                  t$        j>                  d	      S |j=                  t$        j>                  d
	      S |S )Nr   )ArrowExtensionArrayr   z"Lengths of operands do not match: z != r   r   r   Trs   F) r   rs  r   rX   r   r   rH   r?   r@   r   r|   r   r+   r   is_list_liker   rV   r   rT   r   rv   r   ARITHMETIC_BINOPS
empty_liker   _from_backing_datar(  rU   operatorner   bool_)	rB   re   r   rs  r   r   validr   res_arrs	            rC   _cmp_methodzStringArray._cmp_method  s3   	
 uo.

##:==8$$
5 "!e01%1 JJ'':==8,,JMMA))%%e[)NNEDzDK'E"5zSY& 8T4E
|T 
 !'

5)%LE;;#///]]4==AF::..F4Lt}}U3U;F5M%&**622 XXc$--0?Ft}}U3U;F5M"640Gzz""bff,$"++BHHt+DD"++BHHu+EENrE   r   )r   r   r   r   )r   r   r   r   )r   r|   rG   )r   z,tuple[np.ndarray, libmissing.NAType | float]r   r   )r   r   r   r   )r   r   r   r5   )r   r.   r   r   )T)r   r   )rD   r=   r   r   r/  r   r0  AxisInt | None)rD   r=   r   r   r   r|   )r0  r  r   r   )NT)r   r   r   r4   )r0  r  r   r   r^  r   r   r4   )rc  r   r   r8   )rl  r   r   r   )leftN)r  z$NumpyValueArrayLike | ExtensionArrayro  zLiteral['left', 'right']rp  zNumpySorter | Noner   znpt.NDArray[np.intp] | np.intp)'rv   r   r   r   _typr   r?   r@   r>   r   r[   r   r  r   r   r  r  r  r  r  r  r#  r&  r!   r   r=  rS  rU  r3  r4  r7  rh  rm  r   r#   rq  r}  _arith_methodr   r   s   @rC   r|   r|   <  s   Nb DH+5==I(8

2 	 >BQV "  " H /3%C ,C;?C C
 5 5
=(
8(*38:$+J  PP 	P
 P P, 8< Pd<99  $9 9 	9
 9 
9	 		$	$% *0%)	K3K 'K #	K
 
(K &K>@  MrE   r|   c                  `     e Zd ZdZej
                  ZddZeddd	 	 	 	 	 d fd       Z	 xZ
S )	r}   rM   c                    t        | j                        r,t        j                  | j                  d      st	        d      | j                  j
                  dk7  r#t	        d| j                  j
                   d      y)z+Validate that we only store NaN or strings.Tr   z?StringArrayNumpySemantics requires a sequence of strings or NaNr   zFStringArrayNumpySemantics requires a sequence of strings or NaN. Got 'r   N)r   r   r   r   rV   r   rA   s    rC   r   z#StringArrayNumpySemantics._validate\  su    t}}c&9&9$--PT&UQ  ==(*MM''((8:  +rE   NFr  c               b    |t        dt        j                        }t        |   |||      S )NrM   r   r  )r:   rT   rU   rl   r   )rw   r   r   r   rn   s       rC   r   z(StringArrayNumpySemantics._from_sequencei  s1     =266BEw%gU%FFrE   r~  )r   r   r   r   r   r5   )rv   r   r   r   rT   rU   r>   r   r   r   r   r   s   @rC   r}   r}   X  sL    HI
 /3%G ,G;?G	G GrE   r}   )]
__future__r   	functoolsr   rx  pathlibr   typingr   r   r   r	   rQ   numpyrT   pandas._configr
   r   pandas._libsr   r   r?   pandas._libs.arraysr   pandas._libs.libr   pandas.compatr   r   pandas.compat.numpyr   rX  pandas.util._decoratorsr   pandas.util._exceptionsr   pandas.core.dtypes.baser   r   r   pandas.core.dtypes.commonr   r   r   r   r   r   pandas.corer   r   r    rf  r!   pandas.core.array_algosr"   pandas.core.arrays.baser#   pandas.core.arrays.floatingr$   r%   pandas.core.arrays.integerr&   r'   pandas.core.arrays.numpy_r(   pandas.core.constructionr)   pandas.core.indexersr*   pandas.core.missingr+   pandas.io.formatsr,   collections.abcr-   rL   pandas._typingr.   r/   r0   r1   r2   r3   r4   r5   r6   r7   pandasr8   r:   r   r|   r}   r8  rE   rC   <module>r     s    "      
 . 0 / ' 4 
   ( 5 2 : 2 4 $ &.    ] ' ]  ] @	x)n x)zY /#6 Y xG GrE   