import numpy as np
import pandas as pd


np.__version__

'1.23.5'


np.array([1,1.0,'2']) # 强制转换成同一种类型

array(['1', '1.0', '2'], dtype='<U32')


np.array([1,1.0,'2'],dtype=int)

array([1, 1, 2])


np.ones((3,5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])


np.full((3,5),1.231)

array([[1.231, 1.231, 1.231, 1.231, 1.231],
       [1.231, 1.231, 1.231, 1.231, 1.231],
       [1.231, 1.231, 1.231, 1.231, 1.231]])


np.arange(0, 20)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])


np.random.normal(0, 1, (3,3))

array([[ 1.27274173, -2.34781348, -0.31959194],
       [ 0.87566819,  0.09926286, -1.11582647],
       [ 1.5545384 ,  0.17286124,  1.50142145]])


np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])


np.tile?

Signature: np.tile(A, reps)
Docstring:
Construct an array by repeating A the number of times given by reps.

If `reps` has length ``d``, the result will have dimension of
``max(d, A.ndim)``.

If ``A.ndim < d``, `A` is promoted to be d-dimensional by prepending new
axes. So a shape (3,) array is promoted to (1, 3) for 2-D replication,
or shape (1, 1, 3) for 3-D replication. If this is not the desired
behavior, promote `A` to d-dimensions manually before calling this
function.

If ``A.ndim > d``, `reps` is promoted to `A`.ndim by pre-pending 1's to it.
Thus for an `A` of shape (2, 3, 4, 5), a `reps` of (2, 2) is treated as
(1, 1, 2, 2).

Note : Although tile may be used for broadcasting, it is strongly
recommended to use numpy's broadcasting operations and functions.

Parameters
----------
A : array_like
    The input array.
reps : array_like
    The number of repetitions of `A` along each axis.

Returns
-------
c : ndarray
    The tiled output array.

See Also
--------
repeat : Repeat elements of an array.
broadcast_to : Broadcast an array to a new shape

Examples
--------
>>> a = np.array([0, 1, 2])
>>> np.tile(a, 2)
array([0, 1, 2, 0, 1, 2])
>>> np.tile(a, (2, 2))
array([[0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2]])
>>> np.tile(a, (2, 1, 2))
array([[[0, 1, 2, 0, 1, 2]],
       [[0, 1, 2, 0, 1, 2]]])

>>> b = np.array([[1, 2], [3, 4]])
>>> np.tile(b, 2)
array([[1, 2, 1, 2],
       [3, 4, 3, 4]])
>>> np.tile(b, (2, 1))
array([[1, 2],
       [3, 4],
       [1, 2],
       [3, 4]])

>>> c = np.array([1,2,3,4])
>>> np.tile(c,(4,1))
array([[1, 2, 3, 4],
       [1, 2, 3, 4],
       [1, 2, 3, 4],
       [1, 2, 3, 4]])
File:      ~/opt/anaconda3/lib/python3.9/site-packages/numpy/lib/shape_base.py
Type:      function


np.tile(10,3)

array([10, 10, 10])


np.tile([1,2,3],3)

array([1, 2, 3, 1, 2, 3, 1, 2, 3])


np.repeat?

Signature: np.repeat(a, repeats, axis=None)
Docstring:
Repeat elements of an array.

Parameters
----------
a : array_like
    Input array.
repeats : int or array of ints
    The number of repetitions for each element.  `repeats` is broadcasted
    to fit the shape of the given axis.
axis : int, optional
    The axis along which to repeat values.  By default, use the
    flattened input array, and return a flat output array.

Returns
-------
repeated_array : ndarray
    Output array which has the same shape as `a`, except along
    the given axis.

See Also
--------
tile : Tile an array.
unique : Find the unique elements of an array.

Examples
--------
>>> np.repeat(3, 4)
array([3, 3, 3, 3])
>>> x = np.array([[1,2],[3,4]])
>>> np.repeat(x, 2)
array([1, 1, 2, 2, 3, 3, 4, 4])
>>> np.repeat(x, 3, axis=1)
array([[1, 1, 1, 2, 2, 2],
       [3, 3, 3, 4, 4, 4]])
>>> np.repeat(x, [1, 2], axis=0)
array([[1, 2],
       [3, 4],
       [3, 4]])
File:      ~/opt/anaconda3/lib/python3.9/site-packages/numpy/core/fromnumeric.py
Type:      function


np.asarray([1,2,3]).repeat(3)

array([1, 1, 1, 2, 2, 2, 3, 3, 3])


a = np.random.normal(0, 1, (3,3))
a

array([[-0.49915155,  0.8337671 ,  0.40341252],
       [-1.36387181, -0.18727799, -0.35112724],
       [ 0.77928131, -0.36764775,  0.59875779]])


a[0,1]

0.8337671001714373


a[0:2]

array([[-0.49915155,  0.8337671 ,  0.40341252],
       [-1.36387181, -0.18727799, -0.35112724]])


a[0:2,1]

array([ 0.8337671 , -0.18727799])


a[0,1] = 1000

a

array([[-4.99151549e-01,  1.00000000e+03,  4.03412521e-01],
       [-1.36387181e+00, -1.87277987e-01, -3.51127242e-01],
       [ 7.79281310e-01, -3.67647747e-01,  5.98757794e-01]])


a[0:3:2]

array([[-4.99151549e-01,  1.00000000e+03,  4.03412521e-01],
       [ 7.79281310e-01, -3.67647747e-01,  5.98757794e-01]])


x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
z = [21,21,21]
np.concatenate([x,y,z])

array([ 1,  2,  3,  3,  2,  1, 21, 21, 21])


a1 = np.ones((3,3))
a2 = np.random.normal(0, 1, (3,3))

a1

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

a2

array([[-2.0217118 ,  1.11949095, -1.25693857],
       [-0.51433944,  0.9210426 ,  0.31195337],
       [ 1.53190446,  2.61674289,  0.58387914]])


np.concatenate([a1,a2])

array([[ 1.        ,  1.        ,  1.        ],
       [ 1.        ,  1.        ,  1.        ],
       [ 1.        ,  1.        ,  1.        ],
       [-2.0217118 ,  1.11949095, -1.25693857],
       [-0.51433944,  0.9210426 ,  0.31195337],
       [ 1.53190446,  2.61674289,  0.58387914]])


np.concatenate([a1,a2], axis=1)

array([[ 1.        ,  1.        ,  1.        , -2.0217118 ,  1.11949095,
        -1.25693857],
       [ 1.        ,  1.        ,  1.        , -0.51433944,  0.9210426 ,
         0.31195337],
       [ 1.        ,  1.        ,  1.        ,  1.53190446,  2.61674289,
         0.58387914]])


a1 = np.array(range(10))
a1[0:3]

array([0, 1, 2])

a1

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])


a1.shape

(10,)


a1 = a1.reshape((10,1))


a1.shape

(10, 1)

a1

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])


a1 = a1.reshape((1,10))


a1.shape

(1, 10)

a1

array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])


a1.reshape((2,6))

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[39], line 1
----> 1 a1.reshape((2,6))

ValueError: cannot reshape array of size 10 into shape (2,6)


a1 = a1.reshape((2,5))

a1

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])


a2 = np.array(range(10,20)).reshape((2,5))

a2

array([[10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

a1

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])


np.vstack([a1,a2])

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])


np.hstack([a1,a2])

array([[ 0,  1,  2,  3,  4, 10, 11, 12, 13, 14],
       [ 5,  6,  7,  8,  9, 15, 16, 17, 18, 19]])


a1 + 1000

array([[1000, 1001, 1002, 1003, 1004],
       [1005, 1006, 1007, 1008, 1009]])


a1 * a2

array([[  0,  11,  24,  39,  56],
       [ 75,  96, 119, 144, 171]])


list1 = [1,2,3] # some manipulations is not possible with raw python
list2 = [4,5,6]


list1 * list2

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[50], line 1
----> 1 list1 * list2

TypeError: can't multiply sequence by non-int of type 'list'


list1 + list2

[1, 2, 3, 4, 5, 6]


x = np.array([[1, 2, 3], [4, 5, 6]])
x.ravel()

array([1, 2, 3, 4, 5, 6])


x.ravel(order='F')

array([1, 4, 2, 5, 3, 6])

a1

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])


a1.max()

9


a1.max(axis=1)

array([4, 9])


a1.max(axis=0)

array([5, 6, 7, 8, 9])


a1.sum()

45


a1.sum(axis=0)

array([ 5,  7,  9, 11, 13])


a1.sum(axis=1)

array([10, 35])


a1.mean()

4.5

a1

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])


a1.mean(axis=1)

array([2., 7.])


a1.mean(axis=0)

array([2.5, 3.5, 4.5, 5.5, 6.5])


import numpy.ma as ma


x = np.array([1,2,3,-1,4])


mx = ma.masked_array(x, mask=[0,0,0,1,0])

mx

masked_array(data=[1, 2, 3, --, 4],
             mask=[False, False, False,  True, False],
       fill_value=999999)


mx.mean()

2.5


# -999 as an invalid value, as sometimes seen in some databases
a = np.array([0,1,2,-999])
ma.masked_values(a, -999)

masked_array(data=[0, 1, 2, --],
             mask=[False, False, False,  True],
       fill_value=-999)


df = pd.DataFrame({'name':[chr(i) for i in range(ord('a'),ord('z')+1)],
                   'value':range(26)}) # string.ascii_lowercase provides the alphabet too.
df


data = [['tom', 10], ['nick', 15], ['juli', 14]]
df = pd.DataFrame(data, columns = ['Name', 'Age'], index=['a','b','c'])
df


import pandas._testing as tm


dir(tm)

['ALL_INT_EA_DTYPES',
 'ALL_INT_NUMPY_DTYPES',
 'ALL_NUMPY_DTYPES',
 'ALL_REAL_NUMPY_DTYPES',
 'BOOL_DTYPES',
 'BYTES_DTYPES',
 'BaseMaskedArray',
 'COMPLEX_DTYPES',
 'Callable',
 'Categorical',
 'CategoricalIndex',
 'ContextManager',
 'Counter',
 'DATETIME64_DTYPES',
 'DataFrame',
 'DatetimeIndex',
 'Decimal',
 'Dtype',
 'EMPTY_STRING_PATTERN',
 'ENDIAN',
 'ExtensionArray',
 'FLOAT_EA_DTYPES',
 'FLOAT_NUMPY_DTYPES',
 'Float64Index',
 'Index',
 'Int64Index',
 'IntervalIndex',
 'Iterable',
 'MultiIndex',
 'NARROW_NP_DTYPES',
 'NDArrayBackedExtensionArray',
 'NP_NAT_OBJECTS',
 'NULL_OBJECTS',
 'NumericIndex',
 'OBJECT_DTYPES',
 'PandasArray',
 'RNGContext',
 'RangeIndex',
 'SIGNED_INT_EA_DTYPES',
 'SIGNED_INT_NUMPY_DTYPES',
 'STRING_DTYPES',
 'Series',
 'SubclassedCategorical',
 'SubclassedDataFrame',
 'SubclassedSeries',
 'TIMEDELTA64_DTYPES',
 'TYPE_CHECKING',
 'UInt64Index',
 'UNSIGNED_INT_EA_DTYPES',
 'UNSIGNED_INT_NUMPY_DTYPES',
 '_K',
 '_N',
 '__all__',
 '__annotations__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_create_missing_idx',
 '_io',
 '_make_skipna_wrapper',
 '_random',
 '_testing_mode_warnings',
 '_warnings',
 'all_timeseries_index_generator',
 'annotations',
 'assert_almost_equal',
 'assert_attr_equal',
 'assert_categorical_equal',
 'assert_class_equal',
 'assert_contains_all',
 'assert_copy',
 'assert_datetime_array_equal',
 'assert_dict_equal',
 'assert_equal',
 'assert_extension_array_equal',
 'assert_frame_equal',
 'assert_index_equal',
 'assert_indexing_slices_equivalent',
 'assert_interval_array_equal',
 'assert_is_sorted',
 'assert_is_valid_plot_return_object',
 'assert_metadata_equivalent',
 'assert_numpy_array_equal',
 'assert_period_array_equal',
 'assert_produces_warning',
 'assert_series_equal',
 'assert_sp_array_equal',
 'assert_timedelta_array_equal',
 'asserters',
 'at',
 'bdate_range',
 'box_expected',
 'byteorder',
 'can_set_locale',
 'close',
 'collections',
 'compat',
 'contexts',
 'convert_rows_list_to_csv_str',
 'cython_table',
 'datetime',
 'decompress_file',
 'ensure_clean',
 'ensure_clean_dir',
 'ensure_safe_environment_variables',
 'equalContents',
 'external_error_raised',
 'extract_array',
 'getCols',
 'getMixedTypeDict',
 'getPeriodData',
 'getSeriesData',
 'getTimeSeriesData',
 'get_cython_table_params',
 'get_dtype',
 'get_locales',
 'get_obj',
 'get_op_from_name',
 'getitem',
 'iat',
 'iloc',
 'index_subclass_makers_generator',
 'is_float_dtype',
 'is_integer_dtype',
 'is_sequence',
 'is_unsigned_integer_dtype',
 'loc',
 'makeBoolIndex',
 'makeCategoricalIndex',
 'makeCustomDataframe',
 'makeCustomIndex',
 'makeDataFrame',
 'makeDateIndex',
 'makeFloatIndex',
 'makeFloatSeries',
 'makeIntIndex',
 'makeIntervalIndex',
 'makeMissingDataframe',
 'makeMixedDataFrame',
 'makeMultiIndex',
 'makeNumericIndex',
 'makeObjectSeries',
 'makePeriodFrame',
 'makePeriodIndex',
 'makePeriodSeries',
 'makeRangeIndex',
 'makeStringIndex',
 'makeStringSeries',
 'makeTimeDataFrame',
 'makeTimeSeries',
 'makeTimedeltaIndex',
 'makeUIntIndex',
 'make_rand_series',
 'maybe_produces_warning',
 'network',
 'np',
 'operator',
 'os',
 'pa_version_under1p01',
 'pandas_dtype',
 'pd',
 'raise_assert_detail',
 'randbool',
 'rands',
 'rands_array',
 're',
 'reset_display_options',
 'reset_testing_mode',
 'round_trip_localpath',
 'round_trip_pathlib',
 'round_trip_pickle',
 'set_locale',
 'set_testing_mode',
 'set_timezone',
 'setitem',
 'shares_memory',
 'string',
 'to_array',
 'use_numexpr',
 'warnings',
 'with_csv_dialect',
 'write_to_compressed']


tm.makeDataFrame()


tm.makeMixedDataFrame().info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   A       5 non-null      float64       
 1   B       5 non-null      float64       
 2   C       5 non-null      object        
 3   D       5 non-null      datetime64[ns]
dtypes: datetime64[ns](1), float64(2), object(1)
memory usage: 288.0+ bytes


df1 = tm.makeTimeDataFrame()
df1


df2 = tm.makePeriodFrame()
df2


df1.index

DatetimeIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06',
               '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12',
               '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18',
               '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24',
               '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28',
               '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03',
               '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09',
               '2000-02-10', '2000-02-11'],
              dtype='datetime64[ns]', freq='B')


df1.index.day_of_week

Int64Index([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1,
            2, 3, 4, 0, 1, 2, 3, 4],
           dtype='int64')


df1.index.shift(periods=1) # 时间序列index的shift，会按照指定的period或freq平移。注意：如果没有时间序列，则直接平移，见df1['A']例

DatetimeIndex(['2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07',
               '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13',
               '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19',
               '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25',
               '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31',
               '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04',
               '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10',
               '2000-02-11', '2000-02-14'],
              dtype='datetime64[ns]', freq='B')


df1.index.shift(freq='D')

DatetimeIndex(['2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07',
               '2000-01-08', '2000-01-11', '2000-01-12', '2000-01-13',
               '2000-01-14', '2000-01-15', '2000-01-18', '2000-01-19',
               '2000-01-20', '2000-01-21', '2000-01-22', '2000-01-25',
               '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-29',
               '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04',
               '2000-02-05', '2000-02-08', '2000-02-09', '2000-02-10',
               '2000-02-11', '2000-02-12'],
              dtype='datetime64[ns]', freq=None)


df1.index.shift(freq='2D')

DatetimeIndex(['2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08',
               '2000-01-09', '2000-01-12', '2000-01-13', '2000-01-14',
               '2000-01-15', '2000-01-16', '2000-01-19', '2000-01-20',
               '2000-01-21', '2000-01-22', '2000-01-23', '2000-01-26',
               '2000-01-27', '2000-01-28', '2000-01-29', '2000-01-30',
               '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-05',
               '2000-02-06', '2000-02-09', '2000-02-10', '2000-02-11',
               '2000-02-12', '2000-02-13'],
              dtype='datetime64[ns]', freq=None)


df1['A']

2000-01-03   -0.144370
2000-01-04   -2.145394
2000-01-05    0.282223
2000-01-06   -0.199454
2000-01-07    0.892563
2000-01-10    1.952667
2000-01-11    1.182459
2000-01-12   -0.367848
2000-01-13    0.023308
2000-01-14    1.410542
2000-01-17    0.783971
2000-01-18   -1.361430
2000-01-19    1.346466
2000-01-20    0.347398
2000-01-21   -1.510918
2000-01-24   -0.307359
2000-01-25    0.987366
2000-01-26    0.187069
2000-01-27   -1.191208
2000-01-28   -1.403417
2000-01-31   -0.786557
2000-02-01   -0.541403
2000-02-02   -1.331567
2000-02-03    0.665224
2000-02-04    2.076048
2000-02-07    0.936564
2000-02-08   -0.737788
2000-02-09    1.030261
2000-02-10    0.404236
2000-02-11    0.498774
Freq: B, Name: A, dtype: float64


df1['A'].shift()

2000-01-03         NaN
2000-01-04   -0.144370
2000-01-05   -2.145394
2000-01-06    0.282223
2000-01-07   -0.199454
2000-01-10    0.892563
2000-01-11    1.952667
2000-01-12    1.182459
2000-01-13   -0.367848
2000-01-14    0.023308
2000-01-17    1.410542
2000-01-18    0.783971
2000-01-19   -1.361430
2000-01-20    1.346466
2000-01-21    0.347398
2000-01-24   -1.510918
2000-01-25   -0.307359
2000-01-26    0.987366
2000-01-27    0.187069
2000-01-28   -1.191208
2000-01-31   -1.403417
2000-02-01   -0.786557
2000-02-02   -0.541403
2000-02-03   -1.331567
2000-02-04    0.665224
2000-02-07    2.076048
2000-02-08    0.936564
2000-02-09   -0.737788
2000-02-10    1.030261
2000-02-11    0.404236
Freq: B, Name: A, dtype: float64


a = df1.index.shift() - df1.index

a

TimedeltaIndex(['1 days', '1 days', '1 days', '1 days', '3 days', '1 days',
                '1 days', '1 days', '1 days', '3 days', '1 days', '1 days',
                '1 days', '1 days', '3 days', '1 days', '1 days', '1 days',
                '1 days', '3 days', '1 days', '1 days', '1 days', '1 days',
                '3 days', '1 days', '1 days', '1 days', '1 days', '3 days'],
               dtype='timedelta64[ns]', freq=None)


df1.index.values # index中内部的保存数据类型。包装成index后有对应于index的其他操作

array(['2000-01-03T00:00:00.000000000', '2000-01-04T00:00:00.000000000',
       '2000-01-05T00:00:00.000000000', '2000-01-06T00:00:00.000000000',
       '2000-01-07T00:00:00.000000000', '2000-01-10T00:00:00.000000000',
       '2000-01-11T00:00:00.000000000', '2000-01-12T00:00:00.000000000',
       '2000-01-13T00:00:00.000000000', '2000-01-14T00:00:00.000000000',
       '2000-01-17T00:00:00.000000000', '2000-01-18T00:00:00.000000000',
       '2000-01-19T00:00:00.000000000', '2000-01-20T00:00:00.000000000',
       '2000-01-21T00:00:00.000000000', '2000-01-24T00:00:00.000000000',
       '2000-01-25T00:00:00.000000000', '2000-01-26T00:00:00.000000000',
       '2000-01-27T00:00:00.000000000', '2000-01-28T00:00:00.000000000',
       '2000-01-31T00:00:00.000000000', '2000-02-01T00:00:00.000000000',
       '2000-02-02T00:00:00.000000000', '2000-02-03T00:00:00.000000000',
       '2000-02-04T00:00:00.000000000', '2000-02-07T00:00:00.000000000',
       '2000-02-08T00:00:00.000000000', '2000-02-09T00:00:00.000000000',
       '2000-02-10T00:00:00.000000000', '2000-02-11T00:00:00.000000000'],
      dtype='datetime64[ns]')


df1.index.values.to_period()

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[89], line 1
----> 1 df1.index.values.to_period()

AttributeError: 'numpy.ndarray' object has no attribute 'to_period'


df1.index.to_period()

PeriodIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06',
             '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12',
             '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18',
             '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24',
             '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28',
             '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03',
             '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09',
             '2000-02-10', '2000-02-11'],
            dtype='period[B]')


df2.index

PeriodIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06',
             '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12',
             '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18',
             '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24',
             '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28',
             '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03',
             '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09',
             '2000-02-10', '2000-02-11'],
            dtype='period[B]')


df2.index.values

array([Period('2000-01-03', 'B'), Period('2000-01-04', 'B'),
       Period('2000-01-05', 'B'), Period('2000-01-06', 'B'),
       Period('2000-01-07', 'B'), Period('2000-01-10', 'B'),
       Period('2000-01-11', 'B'), Period('2000-01-12', 'B'),
       Period('2000-01-13', 'B'), Period('2000-01-14', 'B'),
       Period('2000-01-17', 'B'), Period('2000-01-18', 'B'),
       Period('2000-01-19', 'B'), Period('2000-01-20', 'B'),
       Period('2000-01-21', 'B'), Period('2000-01-24', 'B'),
       Period('2000-01-25', 'B'), Period('2000-01-26', 'B'),
       Period('2000-01-27', 'B'), Period('2000-01-28', 'B'),
       Period('2000-01-31', 'B'), Period('2000-02-01', 'B'),
       Period('2000-02-02', 'B'), Period('2000-02-03', 'B'),
       Period('2000-02-04', 'B'), Period('2000-02-07', 'B'),
       Period('2000-02-08', 'B'), Period('2000-02-09', 'B'),
       Period('2000-02-10', 'B'), Period('2000-02-11', 'B')], dtype=object)


df2.index.week

Int64Index([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5,
            5, 5, 5, 6, 6, 6, 6, 6],
           dtype='int64')


df2.index.day_of_week

Int64Index([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1,
            2, 3, 4, 0, 1, 2, 3, 4],
           dtype='int64')


df2.index.weekofyear

Int64Index([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5,
            5, 5, 5, 6, 6, 6, 6, 6],
           dtype='int64')


df2.index.to_timestamp()

DatetimeIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06',
               '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12',
               '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18',
               '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24',
               '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28',
               '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03',
               '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09',
               '2000-02-10', '2000-02-11'],
              dtype='datetime64[ns]', freq='B')


df2.index.shift() - df2.index

Index([<BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>,
       <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>,
       <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>,
       <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>,
       <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>,
       <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>,
       <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>,
       <BusinessDay>, <BusinessDay>],
      dtype='object')


df2.index[1:].append(pd.Index([np.nan])) # 因为shift直接平移日期。若需直接平移数值，可用这个替代。

PeriodIndex(['2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07',
             '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13',
             '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19',
             '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25',
             '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31',
             '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04',
             '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10',
             '2000-02-11',        'NaT'],
            dtype='period[B]')


df2.index[1:].append(pd.Index([np.nan])).asfreq('D') - df2.index.asfreq('D')

Index([     <Day>,      <Day>,      <Day>,      <Day>, <3 * Days>,      <Day>,
            <Day>,      <Day>,      <Day>, <3 * Days>,      <Day>,      <Day>,
            <Day>,      <Day>, <3 * Days>,      <Day>,      <Day>,      <Day>,
            <Day>, <3 * Days>,      <Day>,      <Day>,      <Day>,      <Day>,
       <3 * Days>,      <Day>,      <Day>,      <Day>,      <Day>,        NaT],
      dtype='object')


df = pd.read_pickle('../../../data/stk_df_2015_2021.pkl')

df


display(df)


temp = pd.read_csv('../../../data/idx_df_csv.csv',dtype={'ticker':str})
temp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1365 entries, 0 to 1364
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   ticker         1365 non-null   object 
 1   secShortName   1365 non-null   object 
 2   tradeDate      1365 non-null   object 
 3   preCloseIndex  1365 non-null   float64
 4   openIndex      1365 non-null   float64
 5   highestIndex   1365 non-null   float64
 6   lowestIndex    1365 non-null   float64
 7   closeIndex     1365 non-null   float64
 8   turnoverVol    1365 non-null   int64  
dtypes: float64(5), int64(1), object(3)
memory usage: 96.1+ KB


temp


# 如果读取csv或其他类型的文件, 可能会遇到一些格式转换的问题
pd.read_csv('../../../data/idx_df_csv.csv')


df.head(10)


df.tail()


df.describe()


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5838750 entries, 0 to 5838749
Data columns (total 12 columns):
 #   Column          Dtype  
---  ------          -----  
 0   secID           object 
 1   secShortName    object 
 2   exchangeCD      object 
 3   tradeDate       object 
 4   preClosePrice   float64
 5   closePrice      float64
 6   turnoverVol     int64  
 7   turnoverValue   float64
 8   dealAmount      float64
 9   turnoverRate    float64
 10  negMarketValue  float64
 11  marketValue     float64
dtypes: float64(7), int64(1), object(4)
memory usage: 534.6+ MB


df['secShortName'].nunique() # 多少只股票

4853


df['turnoverValue'].mean()

200899200.96656865


df.select_dtypes(include=np.number)


df.select_dtypes(include=np.number).mean()

preClosePrice     1.368568e+02
closePrice        1.368763e+02
turnoverVol       5.525219e+06
turnoverValue     2.008992e+08
dealAmount        1.078089e+04
turnoverRate      2.986831e-02
negMarketValue    1.324705e+10
marketValue       1.835989e+10
dtype: float64


df.select_dtypes(include=np.number).mean(axis=1)

0          4.314767e+10
1          4.237068e+10
2          4.147094e+10
3          4.002552e+10
4          4.055811e+10
               ...     
5838745    4.227324e+07
5838746    4.214321e+07
5838747    4.166574e+07
5838748    4.199687e+07
5838749    4.207461e+07
Length: 5838750, dtype: float64


df_pingan = df[df['secShortName']=='平安银行'].copy()


df_pingan


x = np.array(['Testing', 'a', 'string'], dtype='|S7') # 长度为7的字符串。|是byteorder指示符
y = np.array(['Testing', 'a', 'string'], dtype=object)


x[1] = 'a really really really long string'
x

array([b'Testing', b'a reall', b'string'], dtype='|S7')


y[1] = 'a really really really long string'
y

array(['Testing', 'a really really really long string', 'string'],
      dtype=object)


temp = pd.DataFrame({'a':range(5)})


temp['a'].dtype

dtype('int64')


temp['a'].astype(object).dtype

dtype('O')


temp['a'].astype(str).dtype

dtype('O')


temp = tm.makeTimeDataFrame(3)
display(temp)


temp2 = temp.stack().to_frame().reset_index()
temp2.columns = ['date','variable','value']


temp2


temp2.sort_values(by='value')


temp2.sort_values(by='variable',ascending=False)


temp2.duplicated('variable')

0     False
1     False
2     False
3     False
4      True
5      True
6      True
7      True
8      True
9      True
10     True
11     True
dtype: bool


temp2.drop_duplicates('variable')


temp2.drop_duplicates('variable',keep='last')


temp = tm.makeDataFrame()
temp['A'] = np.random.randint(0, 100, size=temp.shape[0])


temp.drop(['B','C','D'],axis=1, inplace=True)


temp.columns = ['score']


temp


?pd.cut

Signature:
pd.cut(
    x,
    bins,
    right: 'bool' = True,
    labels=None,
    retbins: 'bool' = False,
    precision: 'int' = 3,
    include_lowest: 'bool' = False,
    duplicates: 'str' = 'raise',
    ordered: 'bool' = True,
)
Docstring:
Bin values into discrete intervals.

Use `cut` when you need to segment and sort data values into bins. This
function is also useful for going from a continuous variable to a
categorical variable. For example, `cut` could convert ages to groups of
age ranges. Supports binning into an equal number of bins, or a
pre-specified array of bins.

Parameters
----------
x : array-like
    The input array to be binned. Must be 1-dimensional.
bins : int, sequence of scalars, or IntervalIndex
    The criteria to bin by.

    * int : Defines the number of equal-width bins in the range of `x`. The
      range of `x` is extended by .1% on each side to include the minimum
      and maximum values of `x`.
    * sequence of scalars : Defines the bin edges allowing for non-uniform
      width. No extension of the range of `x` is done.
    * IntervalIndex : Defines the exact bins to be used. Note that
      IntervalIndex for `bins` must be non-overlapping.

right : bool, default True
    Indicates whether `bins` includes the rightmost edge or not. If
    ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]``
    indicate (1,2], (2,3], (3,4]. This argument is ignored when
    `bins` is an IntervalIndex.
labels : array or False, default None
    Specifies the labels for the returned bins. Must be the same length as
    the resulting bins. If False, returns only integer indicators of the
    bins. This affects the type of the output container (see below).
    This argument is ignored when `bins` is an IntervalIndex. If True,
    raises an error. When `ordered=False`, labels must be provided.
retbins : bool, default False
    Whether to return the bins or not. Useful when bins is provided
    as a scalar.
precision : int, default 3
    The precision at which to store and display the bins labels.
include_lowest : bool, default False
    Whether the first interval should be left-inclusive or not.
duplicates : {default 'raise', 'drop'}, optional
    If bin edges are not unique, raise ValueError or drop non-uniques.
ordered : bool, default True
    Whether the labels are ordered or not. Applies to returned types
    Categorical and Series (with Categorical dtype). If True,
    the resulting categorical will be ordered. If False, the resulting
    categorical will be unordered (labels must be provided).

    .. versionadded:: 1.1.0

Returns
-------
out : Categorical, Series, or ndarray
    An array-like object representing the respective bin for each value
    of `x`. The type depends on the value of `labels`.

    * None (default) : returns a Series for Series `x` or a
      Categorical for all other inputs. The values stored within
      are Interval dtype.

    * sequence of scalars : returns a Series for Series `x` or a
      Categorical for all other inputs. The values stored within
      are whatever the type in the sequence is.

    * False : returns an ndarray of integers.

bins : numpy.ndarray or IntervalIndex.
    The computed or specified bins. Only returned when `retbins=True`.
    For scalar or sequence `bins`, this is an ndarray with the computed
    bins. If set `duplicates=drop`, `bins` will drop non-unique bin. For
    an IntervalIndex `bins`, this is equal to `bins`.

See Also
--------
qcut : Discretize variable into equal-sized buckets based on rank
    or based on sample quantiles.
Categorical : Array type for storing data that come from a
    fixed set of values.
Series : One-dimensional array with axis labels (including time series).
IntervalIndex : Immutable Index implementing an ordered, sliceable set.

Notes
-----
Any NA values will be NA in the result. Out of bounds values will be NA in
the resulting Series or Categorical object.

Reference :ref:`the user guide <reshaping.tile.cut>` for more examples.

Examples
--------
Discretize into three equal-sized bins.

>>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3)
... # doctest: +ELLIPSIS
[(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ...
Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ...

>>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True)
... # doctest: +ELLIPSIS
([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ...
Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ...
array([0.994, 3.   , 5.   , 7.   ]))

Discovers the same bins, but assign them specific labels. Notice that
the returned Categorical's categories are `labels` and is ordered.

>>> pd.cut(np.array([1, 7, 5, 4, 6, 3]),
...        3, labels=["bad", "medium", "good"])
['bad', 'good', 'medium', 'medium', 'good', 'bad']
Categories (3, object): ['bad' < 'medium' < 'good']

``ordered=False`` will result in unordered categories when labels are passed.
This parameter can be used to allow non-unique labels:

>>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3,
...        labels=["B", "A", "B"], ordered=False)
['B', 'B', 'A', 'A', 'B', 'B']
Categories (2, object): ['A', 'B']

``labels=False`` implies you just want the bins back.

>>> pd.cut([0, 1, 1, 2], bins=4, labels=False)
array([0, 1, 1, 3])

Passing a Series as an input returns a Series with categorical dtype:

>>> s = pd.Series(np.array([2, 4, 6, 8, 10]),
...               index=['a', 'b', 'c', 'd', 'e'])
>>> pd.cut(s, 3)
... # doctest: +ELLIPSIS
a    (1.992, 4.667]
b    (1.992, 4.667]
c    (4.667, 7.333]
d     (7.333, 10.0]
e     (7.333, 10.0]
dtype: category
Categories (3, interval[float64, right]): [(1.992, 4.667] < (4.667, ...

Passing a Series as an input returns a Series with mapping value.
It is used to map numerically to intervals based on bins.

>>> s = pd.Series(np.array([2, 4, 6, 8, 10]),
...               index=['a', 'b', 'c', 'd', 'e'])
>>> pd.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False)
... # doctest: +ELLIPSIS
(a    1.0
 b    2.0
 c    3.0
 d    4.0
 e    NaN
 dtype: float64,
 array([ 0,  2,  4,  6,  8, 10]))

Use `drop` optional when bins is not unique

>>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True,
...        right=False, duplicates='drop')
... # doctest: +ELLIPSIS
(a    1.0
 b    2.0
 c    3.0
 d    3.0
 e    NaN
 dtype: float64,
 array([ 0,  2,  4,  6, 10]))

Passing an IntervalIndex for `bins` results in those categories exactly.
Notice that values not covered by the IntervalIndex are set to NaN. 0
is to the left of the first bin (which is closed on the right), and 1.5
falls between two bins.

>>> bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)])
>>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins)
[NaN, (0.0, 1.0], NaN, (2.0, 3.0], (4.0, 5.0]]
Categories (3, interval[int64, right]): [(0, 1] < (2, 3] < (4, 5]]
File:      ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/reshape/tile.py
Type:      function


temp['score'][3] = 0


bins = [0, 25, 50, 75, 100]
pd.cut(temp['score'], bins=bins, labels=['D','C','B','A'])

leMJDtok1y      B
tHQMKBjt4g      A
UHQZfBqS9C      C
HnNcYZiPS3    NaN
Q8E0jeFgcZ      B
V3O7nacCx6      D
A21m7MIYO0      A
jPRB2EYMJ9      D
lyVA6FSPDQ      C
xRVJg7Noqk      A
GWdYMjt64S      B
tsy4vUUXuW      C
aqO7XSrLqj      B
ozmzHJ0hio      C
R5iMCRIpOp      B
f2WbBxzk3u      B
NmyCgrqM22      A
kn8e4znWFQ      B
61oSoW6COF      C
slhMjph9xG      B
P6px9D4zmM      D
hZPrwuiO1U      C
dY9aIT4EX3      B
eT1vVe47Lq      A
7HcEWFCsMB      A
3Wj1dUlcme      B
AuRMTRsaSF      A
R7dGWFgtRe      A
Y5Y887ynFq      C
xWjpVzhPye      B
Name: score, dtype: category
Categories (4, object): ['D' < 'C' < 'B' < 'A']


bins = [0, 25, 50, 75, 100]
pd.cut(temp['score'], bins=bins, labels=['D','C','B','A'],include_lowest=True)

leMJDtok1y    B
tHQMKBjt4g    A
UHQZfBqS9C    C
HnNcYZiPS3    D
Q8E0jeFgcZ    B
V3O7nacCx6    D
A21m7MIYO0    A
jPRB2EYMJ9    D
lyVA6FSPDQ    C
xRVJg7Noqk    A
GWdYMjt64S    B
tsy4vUUXuW    C
aqO7XSrLqj    B
ozmzHJ0hio    C
R5iMCRIpOp    B
f2WbBxzk3u    B
NmyCgrqM22    A
kn8e4znWFQ    B
61oSoW6COF    C
slhMjph9xG    B
P6px9D4zmM    D
hZPrwuiO1U    C
dY9aIT4EX3    B
eT1vVe47Lq    A
7HcEWFCsMB    A
3Wj1dUlcme    B
AuRMTRsaSF    A
R7dGWFgtRe    A
Y5Y887ynFq    C
xWjpVzhPye    B
Name: score, dtype: category
Categories (4, object): ['D' < 'C' < 'B' < 'A']


df_pingan


df_pingan.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1705 entries, 0 to 1704
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   secID           1705 non-null   object 
 1   secShortName    1705 non-null   object 
 2   exchangeCD      1705 non-null   object 
 3   tradeDate       1705 non-null   object 
 4   preClosePrice   1705 non-null   float64
 5   closePrice      1705 non-null   float64
 6   turnoverVol     1705 non-null   int64  
 7   turnoverValue   1705 non-null   float64
 8   dealAmount      1705 non-null   float64
 9   turnoverRate    1705 non-null   float64
 10  negMarketValue  1705 non-null   float64
 11  marketValue     1705 non-null   float64
dtypes: float64(7), int64(1), object(4)
memory usage: 173.2+ KB


df_pingan['tradeDate'] = pd.to_datetime(df_pingan['tradeDate'], format='%Y-%m-%d')


df_pingan.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1705 entries, 0 to 1704
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   secID           1705 non-null   object        
 1   secShortName    1705 non-null   object        
 2   exchangeCD      1705 non-null   object        
 3   tradeDate       1705 non-null   datetime64[ns]
 4   preClosePrice   1705 non-null   float64       
 5   closePrice      1705 non-null   float64       
 6   turnoverVol     1705 non-null   int64         
 7   turnoverValue   1705 non-null   float64       
 8   dealAmount      1705 non-null   float64       
 9   turnoverRate    1705 non-null   float64       
 10  negMarketValue  1705 non-null   float64       
 11  marketValue     1705 non-null   float64       
dtypes: datetime64[ns](1), float64(7), int64(1), object(3)
memory usage: 173.2+ KB


df_pingan.set_index('tradeDate', inplace=True)


df_pingan


df_pingan.resample('BM').last()


df_pingan.resample('M').mean(numeric_only=True)


df_pingan.loc['2015']


df_pingan.loc['2015-01':'2018-03']

df


df.select_dtypes(np.number).apply(lambda x: x**2+3)


df.groupby('secShortName')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f99b254a5e0>


df.groupby('secShortName').last()


df.groupby('secShortName').first()


df.groupby('secShortName').head(3)


df.groupby('secShortName').nth(-2)


arrays = [
    np.array(["bar", "bar", 'bar', "baz", "baz", "baz", "foo", "foo", "foo", "qux", "qux", "qux"]),
    np.array([1,2,3, 1,2,3, 1,2,3, 1,2,3]),
]
temp = pd.DataFrame(np.random.randn(12, 4), index=arrays)


temp


temp.loc[('bar',2):('foo',3)]


df['tradeDate'] = pd.to_datetime(df['tradeDate'], format='%Y-%m-%d')


df.set_index(['secShortName','tradeDate'])


df.set_index(['secShortName','tradeDate'],inplace=True)

df


df.groupby(['secShortName','tradeDate']).last()


df.loc['平安银行','2021']

/var/folders/gh/26025ywx7w128zfds279s_9r0000gn/T/ipykernel_12830/224712730.py:1: PerformanceWarning: indexing past lexsort depth may impact performance.
  df.loc['平安银行','2021']
/var/folders/gh/26025ywx7w128zfds279s_9r0000gn/T/ipykernel_12830/224712730.py:1: FutureWarning: Indexing a DataFrame with a datetimelike index using a single string to slice the rows, like `frame[string]`, is deprecated and will be removed in a future version. Use `frame.loc[string]` instead.
  df.loc['平安银行','2021']


df.loc[('平安银行','2021')] # a tuple is interpreted as one multi-level key

/var/folders/gh/26025ywx7w128zfds279s_9r0000gn/T/ipykernel_12830/2348492628.py:1: PerformanceWarning: indexing past lexsort depth may impact performance.
  df.loc[('平安银行','2021')] # a tuple is interpreted as one multi-level key
/var/folders/gh/26025ywx7w128zfds279s_9r0000gn/T/ipykernel_12830/2348492628.py:1: FutureWarning: Indexing a DataFrame with a datetimelike index using a single string to slice the rows, like `frame[string]`, is deprecated and will be removed in a future version. Use `frame.loc[string]` instead.
  df.loc[('平安银行','2021')] # a tuple is interpreted as one multi-level key


df.loc[['平安银行','2016']]# a list is used to specify several keys [on the same level]

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In[172], line 1
----> 1 df.loc[['平安银行','2016']]

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1073, in _LocationIndexer.__getitem__(self, key)
   1070 axis = self.axis or 0
   1072 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1073 return self._getitem_axis(maybe_callable, axis=axis)

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1301, in _LocIndexer._getitem_axis(self, key, axis)
   1298     if hasattr(key, "ndim") and key.ndim > 1:
   1299         raise ValueError("Cannot index with multidimensional key")
-> 1301     return self._getitem_iterable(key, axis=axis)
   1303 # nested tuple slicing
   1304 if is_nested_tuple(key, labels):

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1239, in _LocIndexer._getitem_iterable(self, key, axis)
   1236 self._validate_key(key, axis)
   1238 # A collection of keys
-> 1239 keyarr, indexer = self._get_listlike_indexer(key, axis)
   1240 return self.obj._reindex_with_indexers(
   1241     {axis: [keyarr, indexer]}, copy=True, allow_dups=True
   1242 )

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1432, in _LocIndexer._get_listlike_indexer(self, key, axis)
   1429 ax = self.obj._get_axis(axis)
   1430 axis_name = self.obj._get_axis_name(axis)
-> 1432 keyarr, indexer = ax._get_indexer_strict(key, axis_name)
   1434 return keyarr, indexer

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/multi.py:2623, in MultiIndex._get_indexer_strict(self, key, axis_name)
   2620 if len(keyarr) and not isinstance(keyarr[0], tuple):
   2621     indexer = self._get_indexer_level_0(keyarr)
-> 2623     self._raise_if_missing(key, indexer, axis_name)
   2624     return self[indexer], indexer
   2626 return super()._get_indexer_strict(key, axis_name)

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/multi.py:2641, in MultiIndex._raise_if_missing(self, key, indexer, axis_name)
   2639 cmask = check == -1
   2640 if cmask.any():
-> 2641     raise KeyError(f"{keyarr[cmask]} not in index")
   2642 # We get here when levels still contain values which are not
   2643 # actually in Index anymore
   2644 raise KeyError(f"{keyarr} not in index")

KeyError: "['2016'] not in index"


df.loc[('平安银行','2015':'2016')]

  Cell In[173], line 1
    df.loc[('平安银行','2015':'2016')]
                         ^
SyntaxError: invalid syntax


df.loc[('平安银行',pd.Timestamp('2015-01-05')):('平安银行', pd.Timestamp('2015-01-28'))]

---------------------------------------------------------------------------
UnsortedIndexError                        Traceback (most recent call last)
Cell In[174], line 1
----> 1 df.loc[('平安银行',pd.Timestamp('2015-01-05')):('平安银行', pd.Timestamp('2015-01-28'))]

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1073, in _LocationIndexer.__getitem__(self, key)
   1070 axis = self.axis or 0
   1072 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1073 return self._getitem_axis(maybe_callable, axis=axis)

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1290, in _LocIndexer._getitem_axis(self, key, axis)
   1288 if isinstance(key, slice):
   1289     self._validate_key(key, axis)
-> 1290     return self._get_slice_axis(key, axis=axis)
   1291 elif com.is_bool_indexer(key):
   1292     return self._getbool_axis(key, axis=axis)

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1324, in _LocIndexer._get_slice_axis(self, slice_obj, axis)
   1321     return obj.copy(deep=False)
   1323 labels = obj._get_axis(axis)
-> 1324 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step)
   1326 if isinstance(indexer, slice):
   1327     return self.obj._slice(indexer, axis=axis)

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:6602, in Index.slice_indexer(self, start, end, step, kind)
   6559 """
   6560 Compute the slice indexer for input labels and step.
   6561 
   (...)
   6598 slice(1, 3, None)
   6599 """
   6600 self._deprecated_arg(kind, "kind", "slice_indexer")
-> 6602 start_slice, end_slice = self.slice_locs(start, end, step=step)
   6604 # return a slice
   6605 if not is_scalar(start_slice):

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/multi.py:2775, in MultiIndex.slice_locs(self, start, end, step, kind)
   2772 self._deprecated_arg(kind, "kind", "slice_locs")
   2773 # This function adds nothing to its parent implementation (the magic
   2774 # happens in get_slice_bound method), but it adds meaningful doc.
-> 2775 return super().slice_locs(start, end, step)

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:6810, in Index.slice_locs(self, start, end, step, kind)
   6808 start_slice = None
   6809 if start is not None:
-> 6810     start_slice = self.get_slice_bound(start, "left")
   6811 if start_slice is None:
   6812     start_slice = 0

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/multi.py:2714, in MultiIndex.get_slice_bound(self, label, side, kind)
   2712 if not isinstance(label, tuple):
   2713     label = (label,)
-> 2714 return self._partial_tup_index(label, side=side)

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/multi.py:2779, in MultiIndex._partial_tup_index(self, tup, side)
   2777 def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left"):
   2778     if len(tup) > self._lexsort_depth:
-> 2779         raise UnsortedIndexError(
   2780             f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth "
   2781             f"({self._lexsort_depth})"
   2782         )
   2784     n = len(tup)
   2785     start, end = 0, len(self)

UnsortedIndexError: 'Key length (2) was greater than MultiIndex lexsort depth (0)'


df2 = df.sort_index() # 必须先排序才可做slicing


df2.loc[('平安银行',pd.Timestamp('2015-01-05')):('平安银行', pd.Timestamp('2015-01-28'))]


df2.loc[pd.IndexSlice[('宁德时代','2015-01'):('平安银行','2020-02')]]


df.loc['平安银行'].loc['2015':'2016']


df1 = pd.DataFrame({'a':[1,2,3],'b':[4,5,6],'c':[10,20,30]})
df2 = pd.DataFrame({'a':np.array([1,2,6]),
                    'b':[s for s in 'abc'],
                    'd':[s for s in 'zxz']})

df1

df2


pd.merge(df1,df2,on='a')


pd.merge(df1,df2,on='a',how='outer')


pd.merge(df1,df2,on='a',how='left')


pd.merge(df1,df2,on='a',how='right')


temp = tm.makeTimeDataFrame(3)
display(temp)


pd.DataFrame(temp.stack()).reset_index()


temp2 = pd.DataFrame(temp.stack()).reset_index()
temp2.columns=['date','variable','value']


temp2


def unpivot(frame):
    """
    Essentially the same as 'stack'
    """
    N, K = frame.shape
    data = {
        "value": frame.to_numpy().ravel("F"),
        "variable": np.asarray(frame.columns).repeat(N),
        "date": np.tile(np.asarray(frame.index), K),
    }
    return pd.DataFrame(data, columns=["date", "variable", "value"])


temp2 = unpivot(temp)


temp2


temp2.set_index(['date','variable'])


temp2.set_index(['date','variable']).unstack()


temp2.pivot(index='date',columns='variable',values='value')


temp2['value2'] = temp2['value']**2


temp2


temp2.pivot(index='date',columns='variable')


df.reset_index()


df.reset_index(inplace=True)

df


df.pivot(index='tradeDate',columns='secID',values='closePrice').iloc[:,0:4]


df.reset_index().pivot(index='tradeDate',columns='secID',values='closePrice').iloc[:,0:4].plot()

<AxesSubplot: xlabel='tradeDate'>


import matplotlib as mpl
import matplotlib.pyplot as plt


plt.subplots?

Signature:
plt.subplots(
    nrows=1,
    ncols=1,
    *,
    sharex=False,
    sharey=False,
    squeeze=True,
    width_ratios=None,
    height_ratios=None,
    subplot_kw=None,
    gridspec_kw=None,
    **fig_kw,
)
Docstring:
Create a figure and a set of subplots.

This utility wrapper makes it convenient to create common layouts of
subplots, including the enclosing figure object, in a single call.

Parameters
----------
nrows, ncols : int, default: 1
    Number of rows/columns of the subplot grid.

sharex, sharey : bool or {'none', 'all', 'row', 'col'}, default: False
    Controls sharing of properties among x (*sharex*) or y (*sharey*)
    axes:

    - True or 'all': x- or y-axis will be shared among all subplots.
    - False or 'none': each subplot x- or y-axis will be independent.
    - 'row': each subplot row will share an x- or y-axis.
    - 'col': each subplot column will share an x- or y-axis.

    When subplots have a shared x-axis along a column, only the x tick
    labels of the bottom subplot are created. Similarly, when subplots
    have a shared y-axis along a row, only the y tick labels of the first
    column subplot are created. To later turn other subplots' ticklabels
    on, use `~matplotlib.axes.Axes.tick_params`.

    When subplots have a shared axis that has units, calling
    `~matplotlib.axis.Axis.set_units` will update each axis with the
    new units.

squeeze : bool, default: True
    - If True, extra dimensions are squeezed out from the returned
      array of `~matplotlib.axes.Axes`:

      - if only one subplot is constructed (nrows=ncols=1), the
        resulting single Axes object is returned as a scalar.
      - for Nx1 or 1xM subplots, the returned object is a 1D numpy
        object array of Axes objects.
      - for NxM, subplots with N>1 and M>1 are returned as a 2D array.

    - If False, no squeezing at all is done: the returned Axes object is
      always a 2D array containing Axes instances, even if it ends up
      being 1x1.

width_ratios : array-like of length *ncols*, optional
    Defines the relative widths of the columns. Each column gets a
    relative width of ``width_ratios[i] / sum(width_ratios)``.
    If not given, all columns will have the same width.  Equivalent
    to ``gridspec_kw={'width_ratios': [...]}``.

height_ratios : array-like of length *nrows*, optional
    Defines the relative heights of the rows. Each row gets a
    relative height of ``height_ratios[i] / sum(height_ratios)``.
    If not given, all rows will have the same height. Convenience
    for ``gridspec_kw={'height_ratios': [...]}``.

subplot_kw : dict, optional
    Dict with keywords passed to the
    `~matplotlib.figure.Figure.add_subplot` call used to create each
    subplot.

gridspec_kw : dict, optional
    Dict with keywords passed to the `~matplotlib.gridspec.GridSpec`
    constructor used to create the grid the subplots are placed on.

**fig_kw
    All additional keyword arguments are passed to the
    `.pyplot.figure` call.

Returns
-------
fig : `.Figure`

ax : `~.axes.Axes` or array of Axes
    *ax* can be either a single `~.axes.Axes` object, or an array of Axes
    objects if more than one subplot was created.  The dimensions of the
    resulting array can be controlled with the squeeze keyword, see above.

    Typical idioms for handling the return value are::

        # using the variable ax for single a Axes
        fig, ax = plt.subplots()

        # using the variable axs for multiple Axes
        fig, axs = plt.subplots(2, 2)

        # using tuple unpacking for multiple Axes
        fig, (ax1, ax2) = plt.subplots(1, 2)
        fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)

    The names ``ax`` and pluralized ``axs`` are preferred over ``axes``
    because for the latter it's not clear if it refers to a single
    `~.axes.Axes` instance or a collection of these.

See Also
--------
.pyplot.figure
.pyplot.subplot
.pyplot.axes
.Figure.subplots
.Figure.add_subplot

Examples
--------
::

    # First create some toy data:
    x = np.linspace(0, 2*np.pi, 400)
    y = np.sin(x**2)

    # Create just a figure and only one subplot
    fig, ax = plt.subplots()
    ax.plot(x, y)
    ax.set_title('Simple plot')

    # Create two subplots and unpack the output array immediately
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
    ax1.plot(x, y)
    ax1.set_title('Sharing Y axis')
    ax2.scatter(x, y)

    # Create four polar axes and access them through the returned array
    fig, axs = plt.subplots(2, 2, subplot_kw=dict(projection="polar"))
    axs[0, 0].plot(x, y)
    axs[1, 1].scatter(x, y)

    # Share a X axis with each column of subplots
    plt.subplots(2, 2, sharex='col')

    # Share a Y axis with each row of subplots
    plt.subplots(2, 2, sharey='row')

    # Share both X and Y axes with all subplots
    plt.subplots(2, 2, sharex='all', sharey='all')

    # Note that this is the same as
    plt.subplots(2, 2, sharex=True, sharey=True)

    # Create figure number 10 with a single subplot
    # and clears it if it already exists.
    fig, ax = plt.subplots(num=10, clear=True)
File:      ~/opt/anaconda3/lib/python3.9/site-packages/matplotlib/pyplot.py
Type:      function


fig = plt.figure()  # an empty figure with no Axes
fig, ax = plt.subplots()  # a figure with a single Axes
fig, axs = plt.subplots(2, 2)  # a figure with a 2x2 grid of Axes

<Figure size 640x480 with 0 Axes>


fig, ax = plt.subplots()
ax.plot([1, 2, 3, 4], [1, 4, 2, 3])

[<matplotlib.lines.Line2D at 0x7f97eb2adfa0>]


temp2


fig, axs = plt.subplots(1,2)
x = range(temp2.shape[0])
y1 = temp2['value']
y2 = temp2['value']**2
axs[0].plot(x, y1)
axs[1].plot(x, y2)
axs[0].plot(x, y1**3)

[<matplotlib.lines.Line2D at 0x7f97eb3938e0>]

fig


x = pd.date_range(start='2021-01-03',periods=temp2.shape[0])
plt.plot(x,temp2['value'], label='random value')
plt.plot(x,temp2['value']**2, label='quadratic of random value')
plt.xlabel('date')
plt.ylabel('value')
plt.title('Example')
plt.legend()

<matplotlib.legend.Legend at 0x7f97eb2fae50>


df_pingan.select_dtypes(np.number).corr()


corr = df_pingan.select_dtypes(np.number).corr()


corr


np.ones_like?

Signature: np.ones_like(a, dtype=None, order='K', subok=True, shape=None)
Docstring:
Return an array of ones with the same shape and type as a given array.

Parameters
----------
a : array_like
    The shape and data-type of `a` define these same attributes of
    the returned array.
dtype : data-type, optional
    Overrides the data type of the result.

    .. versionadded:: 1.6.0
order : {'C', 'F', 'A', or 'K'}, optional
    Overrides the memory layout of the result. 'C' means C-order,
    'F' means F-order, 'A' means 'F' if `a` is Fortran contiguous,
    'C' otherwise. 'K' means match the layout of `a` as closely
    as possible.

    .. versionadded:: 1.6.0
subok : bool, optional.
    If True, then the newly created array will use the sub-class
    type of `a`, otherwise it will be a base-class array. Defaults
    to True.
shape : int or sequence of ints, optional.
    Overrides the shape of the result. If order='K' and the number of
    dimensions is unchanged, will try to keep order, otherwise,
    order='C' is implied.

    .. versionadded:: 1.17.0

Returns
-------
out : ndarray
    Array of ones with the same shape and type as `a`.

See Also
--------
empty_like : Return an empty array with shape and type of input.
zeros_like : Return an array of zeros with shape and type of input.
full_like : Return a new array with shape of input filled with value.
ones : Return a new array setting values to one.

Examples
--------
>>> x = np.arange(6)
>>> x = x.reshape((2, 3))
>>> x
array([[0, 1, 2],
       [3, 4, 5]])
>>> np.ones_like(x)
array([[1, 1, 1],
       [1, 1, 1]])

>>> y = np.arange(3, dtype=float)
>>> y
array([0., 1., 2.])
>>> np.ones_like(y)
array([1.,  1.,  1.])
File:      ~/opt/anaconda3/lib/python3.9/site-packages/numpy/core/numeric.py
Type:      function


np.ones_like(corr, dtype=bool)

array([[ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True]])


np.triu?

Signature: np.triu(m, k=0)
Docstring:
Upper triangle of an array.

Return a copy of an array with the elements below the `k`-th diagonal
zeroed. For arrays with ``ndim`` exceeding 2, `triu` will apply to the
final two axes.

Please refer to the documentation for `tril` for further details.

See Also
--------
tril : lower triangle of an array

Examples
--------
>>> np.triu([[1,2,3],[4,5,6],[7,8,9],[10,11,12]], -1)
array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 0,  8,  9],
       [ 0,  0, 12]])

>>> np.triu(np.arange(3*4*5).reshape(3, 4, 5))
array([[[ 0,  1,  2,  3,  4],
        [ 0,  6,  7,  8,  9],
        [ 0,  0, 12, 13, 14],
        [ 0,  0,  0, 18, 19]],
       [[20, 21, 22, 23, 24],
        [ 0, 26, 27, 28, 29],
        [ 0,  0, 32, 33, 34],
        [ 0,  0,  0, 38, 39]],
       [[40, 41, 42, 43, 44],
        [ 0, 46, 47, 48, 49],
        [ 0,  0, 52, 53, 54],
        [ 0,  0,  0, 58, 59]]])
File:      ~/opt/anaconda3/lib/python3.9/site-packages/numpy/lib/twodim_base.py
Type:      function


# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))


mask

array([[ True,  True,  True,  True,  True,  True,  True,  True],
       [False,  True,  True,  True,  True,  True,  True,  True],
       [False, False,  True,  True,  True,  True,  True,  True],
       [False, False, False,  True,  True,  True,  True,  True],
       [False, False, False, False,  True,  True,  True,  True],
       [False, False, False, False, False,  True,  True,  True],
       [False, False, False, False, False, False,  True,  True],
       [False, False, False, False, False, False, False,  True]])


import seaborn as sns


# Set up the matplotlib figure
fig, ax = plt.subplots(figsize=(18, 18))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0, annot=True,
            square=True, linewidths=.5, annot_kws={"size": 10},cbar_kws={"shrink": .5})

<AxesSubplot: >


df_pingan_num = df_pingan.select_dtypes(np.number)


df_pingan_num


df_pingan_num.plot()

<AxesSubplot: xlabel='tradeDate'>


plt.rcParams['figure.figsize'] = (16.0, 9.0)


df_pingan_num.drop(['negMarketValue','marketValue'],axis=1).plot()

<AxesSubplot: xlabel='tradeDate'>


def standardization(col):
    return (col - col.mean()) / col.std()


df_pingan_num.apply(standardization).plot()

<AxesSubplot: xlabel='tradeDate'>


df_pingan_num.apply(standardization)[['preClosePrice','turnoverValue','marketValue']].plot(style=['bs-', 'ro-', 'y^-'])

<AxesSubplot: xlabel='tradeDate'>


df_pingan_num.plot(subplots=True)

array([<AxesSubplot: xlabel='tradeDate'>,
       <AxesSubplot: xlabel='tradeDate'>,
       <AxesSubplot: xlabel='tradeDate'>,
       <AxesSubplot: xlabel='tradeDate'>,
       <AxesSubplot: xlabel='tradeDate'>,
       <AxesSubplot: xlabel='tradeDate'>,
       <AxesSubplot: xlabel='tradeDate'>,
       <AxesSubplot: xlabel='tradeDate'>], dtype=object)

	A	B	C	D
VHunICcLPJ	-0.795996	-0.351695	-1.385583	-0.753557
NESKybUEDH	-1.269138	0.863860	-0.976737	0.766969
qMAMB3xDut	1.472684	0.478087	-0.239865	0.353478
JRlRCQUzuu	-0.447649	-0.333359	0.119227	-0.246769
U5jtekM3k2	-0.132406	-0.737123	1.728766	-1.153852
PrBiVPdIaJ	0.382802	0.364979	1.206549	-2.258635
unVAHNdKYu	-1.072163	0.740952	-0.389145	1.039403
MotvwNxPmd	0.066330	-1.973623	1.012141	-1.102447
89m1l3btYy	0.016252	0.555613	0.298718	0.980350
9HTnDXF40x	-0.108969	-1.131245	-1.315161	0.365393
LRkcwE7Lmx	0.696143	-1.422110	1.337381	-1.690665
psnMfLek7U	-0.553044	-1.167765	-1.129249	0.267534
HeABMPzPXF	-0.115510	0.721029	-0.249622	1.093956
2xHL40vnf2	0.406484	0.816362	0.198217	-0.515139
KSCjLRTaae	-0.521772	0.186359	-1.287775	0.931270
PZMnNoQpiJ	1.906866	0.014740	-0.441951	-0.433909
iJbHYwo277	0.999935	0.812229	0.436503	-0.571638
qc8tNYVRCg	-0.015067	-0.199682	-1.991939	-1.358378
zK2FmNbFSj	-0.098908	0.357793	-0.701900	0.500967
vooqmpNXRy	0.214110	-0.027231	-0.392369	-1.650658
Ghj8CJjGha	-1.161240	0.359830	-0.860943	1.392138
ehhvv804ye	0.819537	1.287188	-0.581673	0.636301
9jtiXBZ1y7	-0.128448	0.599718	1.732266	0.492705
RDhWwjCJDA	0.216929	0.210923	0.136244	-0.541380
71Bkvd5XbY	-1.188419	0.324790	1.285649	0.176251
kZPHgLtScF	-0.667199	0.597654	0.400407	0.139640
jpEWrtGEYv	-0.327816	3.228324	-1.162103	1.216709
kE3hNb4qjr	-0.570806	-0.630258	0.648322	0.823451
XNQiSNBQGJ	0.466998	-0.074483	-0.559314	1.242169
k6gL5YU8dQ	0.325523	-0.585791	-0.758267	0.014827

	A	B	C	D
2000-01-03	-0.144370	0.201565	0.516506	1.144586
2000-01-04	-2.145394	0.692739	-0.207220	1.570583
2000-01-05	0.282223	-0.412550	-0.375129	1.004230
2000-01-06	-0.199454	-1.049554	-0.416481	0.601689
2000-01-07	0.892563	-0.422116	-0.497795	2.062563
2000-01-10	1.952667	-2.366110	-0.335015	0.092206
2000-01-11	1.182459	-2.054859	-0.543254	0.323785
2000-01-12	-0.367848	-0.032513	0.021224	0.768913
2000-01-13	0.023308	-0.701854	-2.019472	-0.251018
2000-01-14	1.410542	-1.622660	0.443647	-0.933541
2000-01-17	0.783971	-0.007375	0.296998	-0.363226
2000-01-18	-1.361430	0.153546	-0.619399	1.146770
2000-01-19	1.346466	-1.668749	-1.303989	-0.543123
2000-01-20	0.347398	0.848440	1.952129	-0.167775
2000-01-21	-1.510918	1.309377	-1.564935	-0.795824
2000-01-24	-0.307359	0.502352	-0.047250	0.499364
2000-01-25	0.987366	0.992062	-0.777740	-0.188208
2000-01-26	0.187069	0.468695	1.160078	1.104165
2000-01-27	-1.191208	0.324158	-0.277405	-0.871390
2000-01-28	-1.403417	0.168218	-0.297890	-0.398667
2000-01-31	-0.786557	-0.161181	0.714900	0.916390
2000-02-01	-0.541403	-0.881791	1.270019	-0.222928
2000-02-02	-1.331567	-0.279310	2.249028	-0.024690
2000-02-03	0.665224	-1.476585	1.902657	-0.254340
2000-02-04	2.076048	1.329543	-0.446659	0.007978
2000-02-07	0.936564	-0.062680	-1.205261	-1.963803
2000-02-08	-0.737788	0.516168	-0.707155	0.394130
2000-02-09	1.030261	1.145068	-1.908743	-0.645290
2000-02-10	0.404236	0.528930	0.481302	-0.483042
2000-02-11	0.498774	-0.133681	-0.291691	1.570372

	A	B	C	D
2000-01-03	1.429051	-0.580283	0.990025	-0.257934
2000-01-04	-0.110579	1.306100	1.963598	0.894591
2000-01-05	0.350137	0.145053	-2.283949	-0.210955
2000-01-06	0.683855	0.319405	1.167300	0.572679
2000-01-07	-1.676209	-0.482623	0.261776	0.680530
2000-01-10	0.328323	-2.349310	0.426277	0.120216
2000-01-11	1.065787	0.320567	0.342133	0.311434
2000-01-12	-0.379241	0.143673	-1.389723	1.336481
2000-01-13	-1.042176	-0.138300	-0.909156	-1.352213
2000-01-14	-0.621671	1.315829	-0.465050	1.242555
2000-01-17	1.035397	0.310866	-0.353529	-1.049046
2000-01-18	1.115317	0.182585	-1.047423	-0.665544
2000-01-19	1.324550	-0.762196	1.100811	-0.287230
2000-01-20	0.824192	0.326103	0.232408	-0.035676
2000-01-21	0.073493	-0.343796	-0.876300	1.399121
2000-01-24	1.214491	1.377404	-0.186971	0.479992
2000-01-25	-0.558259	1.458651	-0.045784	-0.754432
2000-01-26	0.525250	-0.197330	-1.364182	-0.761588
2000-01-27	0.049187	-1.423574	1.432088	-1.856000
2000-01-28	0.342931	-1.934350	0.598586	-0.807426
2000-01-31	-0.103926	0.024246	-1.330241	-0.074468
2000-02-01	1.559401	1.679189	1.009991	-0.042107
2000-02-02	0.269948	1.031486	0.892775	-0.810168
2000-02-03	-0.560028	0.091427	1.024934	0.117938
2000-02-04	1.881646	0.610332	-0.231905	-0.371461
2000-02-07	-0.942971	-0.999706	-0.711167	2.733572
2000-02-08	0.618878	-0.563883	1.723752	0.194319
2000-02-09	-0.678689	0.367987	1.309123	0.827901
2000-02-10	-1.052600	1.226809	0.871817	-0.603279
2000-02-11	-0.998651	-0.635610	0.180419	0.739663

	secID	secShortName	exchangeCD	tradeDate	preClosePrice	closePrice	turnoverVol	turnoverValue	dealAmount	turnoverRate	negMarketValue	marketValue
0	000001.XSHE	平安银行	XSHE	2015-01-05	1293.044	1307.737	4966040	4.565388e+09	92478.0	0.0291	1.575841e+11	1.830268e+11
1	000001.XSHE	平安银行	XSHE	2015-01-06	1307.737	1288.146	3761152	3.453446e+09	80325.0	0.0220	1.552233e+11	1.802848e+11
2	000001.XSHE	平安银行	XSHE	2015-01-07	1288.146	1263.656	2951601	2.634796e+09	72697.0	0.0173	1.522723e+11	1.768574e+11
3	000001.XSHE	平安银行	XSHE	2015-01-08	1263.656	1221.208	2443951	2.128003e+09	68734.0	0.0143	1.471572e+11	1.709164e+11
4	000001.XSHE	平安银行	XSHE	2015-01-09	1221.208	1231.004	4355039	3.835378e+09	99882.0	0.0255	1.483376e+11	1.722874e+11
...	...	...	...	...	...	...	...	...	...	...	...	...
5838745	900957.XSHG	凌云B股	XSHG	2021-12-27	0.625	0.638	488511	3.081540e+05	226.0	0.0027	1.164720e+08	2.209170e+08
5838746	900957.XSHG	凌云B股	XSHG	2021-12-28	0.638	0.637	177702	1.118810e+05	64.0	0.0010	1.162880e+08	2.205680e+08
5838747	900957.XSHG	凌云B股	XSHG	2021-12-29	0.637	0.630	123550	7.733300e+04	58.0	0.0007	1.150000e+08	2.181250e+08
5838748	900957.XSHG	凌云B股	XSHG	2021-12-30	0.630	0.635	113600	7.130800e+04	41.0	0.0006	1.159200e+08	2.198700e+08
5838749	900957.XSHG	凌云B股	XSHG	2021-12-31	0.635	0.636	167800	1.059960e+05	74.0	0.0009	1.161040e+08	2.202190e+08

	secID	secShortName	exchangeCD	tradeDate	preClosePrice	closePrice	turnoverVol	turnoverValue	dealAmount	turnoverRate	negMarketValue	marketValue
0	000001.XSHE	平安银行	XSHE	2015-01-05	1293.044	1307.737	4966040	4.565388e+09	92478.0	0.0291	1.575841e+11	1.830268e+11
1	000001.XSHE	平安银行	XSHE	2015-01-06	1307.737	1288.146	3761152	3.453446e+09	80325.0	0.0220	1.552233e+11	1.802848e+11
2	000001.XSHE	平安银行	XSHE	2015-01-07	1288.146	1263.656	2951601	2.634796e+09	72697.0	0.0173	1.522723e+11	1.768574e+11
3	000001.XSHE	平安银行	XSHE	2015-01-08	1263.656	1221.208	2443951	2.128003e+09	68734.0	0.0143	1.471572e+11	1.709164e+11
4	000001.XSHE	平安银行	XSHE	2015-01-09	1221.208	1231.004	4355039	3.835378e+09	99882.0	0.0255	1.483376e+11	1.722874e+11
...	...	...	...	...	...	...	...	...	...	...	...	...
5838745	900957.XSHG	凌云B股	XSHG	2021-12-27	0.625	0.638	488511	3.081540e+05	226.0	0.0027	1.164720e+08	2.209170e+08
5838746	900957.XSHG	凌云B股	XSHG	2021-12-28	0.638	0.637	177702	1.118810e+05	64.0	0.0010	1.162880e+08	2.205680e+08
5838747	900957.XSHG	凌云B股	XSHG	2021-12-29	0.637	0.630	123550	7.733300e+04	58.0	0.0007	1.150000e+08	2.181250e+08
5838748	900957.XSHG	凌云B股	XSHG	2021-12-30	0.630	0.635	113600	7.130800e+04	41.0	0.0006	1.159200e+08	2.198700e+08
5838749	900957.XSHG	凌云B股	XSHG	2021-12-31	0.635	0.636	167800	1.059960e+05	74.0	0.0009	1.161040e+08	2.202190e+08

numpy¶

generating Array¶

Array indexing and slicing¶

Array concatenation¶

Array reshaping¶

Vectorized manipulations¶

Calculations with numpy¶

Masked arrays¶

pandas¶

Create a data frame¶

Quick production of dataframe examples with `pandas._testing`¶

Explorations on DatetimeIndex, PeriodIndex¶

read data¶

Quick summary¶

object vs. str¶

Sort, duplicates¶

Categorizing values¶

datetime type, time series as index¶

apply, groupby apply¶

multiindex (panel data)¶

merge¶

Reshaping and pivoting¶

Plotting¶

Matplotlib basics¶

an Seaborn example with masked data¶

Draw from pandas¶

	name	value
0	a	0
1	b	1
2	c	2
3	d	3
4	e	4
5	f	5
6	g	6
7	h	7
8	i	8
9	j	9
10	k	10
11	l	11
12	m	12
13	n	13
14	o	14
15	p	15
16	q	16
17	r	17
18	s	18
19	t	19
20	u	20
21	v	21
22	w	22
23	x	23
24	y	24
25	z	25

	ticker	secShortName	tradeDate	preCloseIndex	openIndex	highestIndex	lowestIndex	closeIndex	turnoverVol
0	000001	上证综指	2021-01-04	3473.0693	3474.6793	3511.6554	3457.2061	3502.9584	38079080000
1	000001	上证综指	2021-01-05	3502.9584	3492.1912	3528.6767	3484.7151	3528.6767	40799593400
2	000001	上证综指	2021-01-06	3528.6767	3530.9072	3556.8022	3513.1262	3550.8767	37023092600
3	000001	上证综指	2021-01-07	3550.8767	3552.9087	3576.2046	3526.6174	3576.2046	40534822600
4	000001	上证综指	2021-01-08	3576.2046	3577.6923	3588.0625	3544.8912	3570.1082	34555789600
...	...	...	...	...	...	...	...	...	...
1360	399106	深证综指	2022-02-15	2253.1310	2256.8670	2283.6760	2251.3000	2283.6300	37139160803
1361	399106	深证综指	2022-02-16	2283.6300	2293.0800	2304.2800	2285.6750	2296.9930	37530877510
1362	399106	深证综指	2022-02-17	2296.9930	2293.4480	2314.8270	2287.8960	2302.1620	42928561256
1363	399106	深证综指	2022-02-18	2302.1620	2287.2640	2311.8220	2284.1220	2311.7900	38837049683
1364	399106	深证综指	2022-02-21	2311.7900	2313.5060	2325.9670	2309.7340	2325.7960	44801214227

	preClosePrice	closePrice	turnoverVol	turnoverValue	dealAmount	turnoverRate	negMarketValue	marketValue
count	5.838750e+06	5.838750e+06	5.838750e+06	5.838750e+06	5.820845e+06	5.838750e+06	5.838750e+06	5.838750e+06
mean	1.368568e+02	1.368763e+02	5.525219e+06	2.008992e+08	1.078089e+04	2.986831e-02	1.324705e+10	1.835989e+10
std	2.086103e+03	2.085805e+03	2.405725e+07	4.866879e+08	1.874182e+04	4.649860e-02	5.686519e+10	7.522905e+10
min	3.800000e-02	3.800000e-02	2.000000e+00	1.800000e+01	1.000000e+00	0.000000e+00	3.800000e+06	1.045000e+07
25%	2.167700e+01	2.168600e+01	8.454962e+05	2.916335e+07	2.565000e+03	6.900000e-03	2.203200e+09	3.527868e+09
50%	3.972300e+01	3.973600e+01	1.864330e+06	7.414118e+07	5.410000e+03	1.510000e-02	4.304763e+09	6.153206e+09
75%	7.390000e+01	7.392400e+01	4.317285e+06	1.915373e+08	1.179500e+04	3.380000e-02	9.130280e+09	1.286893e+10
max	4.024782e+05	4.024782e+05	5.109897e+09	6.794128e+10	3.274456e+06	9.896000e-01	3.267370e+12	3.267370e+12

	A	B	C	D
2000-01-03	-0.051528	-0.582815	0.392680	-1.395863
2000-01-04	0.538363	-1.165366	-0.394174	-0.829585
2000-01-05	0.952806	1.578885	-1.252698	0.734604

	score
leMJDtok1y	70
tHQMKBjt4g	87
UHQZfBqS9C	29
HnNcYZiPS3	27
Q8E0jeFgcZ	63
V3O7nacCx6	5
A21m7MIYO0	93
jPRB2EYMJ9	6
lyVA6FSPDQ	35
xRVJg7Noqk	88
GWdYMjt64S	51
tsy4vUUXuW	37
aqO7XSrLqj	75
ozmzHJ0hio	48
R5iMCRIpOp	51
f2WbBxzk3u	53
NmyCgrqM22	79
kn8e4znWFQ	73
61oSoW6COF	39
slhMjph9xG	62
P6px9D4zmM	1
hZPrwuiO1U	28
dY9aIT4EX3	59
eT1vVe47Lq	85
7HcEWFCsMB	83
3Wj1dUlcme	62
AuRMTRsaSF	90
R7dGWFgtRe	92
Y5Y887ynFq	27
xWjpVzhPye	61

	secID	secShortName	exchangeCD	preClosePrice	closePrice	turnoverVol	turnoverValue	dealAmount	turnoverRate	negMarketValue	marketValue
tradeDate
2015-01-30	000001.XSHE	平安银行	XSHE	1134.678	1137.127	1614788	1.298736e+09	42061.0	0.0095	1.370254e+11	1.591488e+11
2015-02-27	000001.XSHE	平安银行	XSHE	1148.556	1142.025	1377727	1.115146e+09	36250.0	0.0081	1.376156e+11	1.598343e+11
2015-03-31	000001.XSHE	平安银行	XSHE	1278.350	1285.697	4745114	4.393135e+09	114032.0	0.0278	1.549282e+11	1.799421e+11
2015-04-30	000001.XSHE	平安银行	XSHE	1645.458	1650.399	4182058	4.899380e+09	120845.0	0.0245	1.971277e+11	2.289549e+11
2015-05-29	000001.XSHE	平安银行	XSHE	1531.808	1514.019	2915126	3.103544e+09	99260.0	0.0171	1.808381e+11	2.192089e+11
...	...	...	...	...	...	...	...	...	...	...	...
2021-08-31	000001.XSHE	平安银行	XSHE	2277.563	2287.846	1416461	2.072007e+09	119791.0	0.0061	3.454224e+11	3.454253e+11
2021-09-30	000001.XSHE	平安银行	XSHE	2332.832	2304.555	959216	1.424676e+09	70140.0	0.0041	3.479452e+11	3.479481e+11
2021-10-29	000001.XSHE	平安银行	XSHE	2534.625	2506.348	650958	1.052760e+09	76331.0	0.0028	3.784122e+11	3.784154e+11
2021-11-30	000001.XSHE	平安银行	XSHE	2250.572	2241.575	884472	1.280385e+09	65260.0	0.0038	3.384364e+11	3.384392e+11
2021-12-31	000001.XSHE	平安银行	XSHE	2161.886	2118.185	2110777	2.899617e+09	152423.0	0.0090	3.198068e+11	3.198095e+11

	preClosePrice	closePrice	turnoverVol	turnoverValue	dealAmount	turnoverRate	negMarketValue	marketValue
tradeDate
2015-01-31	1204.922300	1197.126450	2.685951e+06	2.293232e+09	64189.250000	0.015725	1.442554e+11	1.675461e+11
2015-02-28	1127.222600	1127.549133	1.506542e+06	1.202904e+09	37079.066667	0.008827	1.358712e+11	1.578083e+11
2015-03-31	1193.861227	1200.391773	2.966580e+06	2.552488e+09	75256.318182	0.017386	1.446489e+11	1.680031e+11
2015-04-30	1543.791762	1561.158524	4.287529e+06	4.712147e+09	138826.190476	0.025114	1.869631e+11	2.171492e+11
2015-05-31	1577.762100	1570.943100	2.897439e+06	3.198760e+09	96879.850000	0.016965	1.876373e+11	2.213028e+11
...	...	...	...	...	...	...	...	...
2021-08-31	2440.388364	2441.031045	1.143970e+06	1.800585e+09	96686.318182	0.004891	3.685506e+11	3.685537e+11
2021-09-30	2390.798950	2391.634400	1.659547e+06	2.550127e+09	126351.950000	0.007095	3.610926e+11	3.610956e+11
2021-10-31	2504.098750	2516.710812	1.081450e+06	1.752654e+09	97972.312500	0.004625	3.799768e+11	3.799800e+11
2021-11-30	2318.985409	2306.950273	1.188170e+06	1.768122e+09	94558.681818	0.005073	3.483068e+11	3.483098e+11
2021-12-31	2270.019130	2264.654348	1.342157e+06	1.968544e+09	96840.304348	0.005735	3.419210e+11	3.419238e+11

	preClosePrice	closePrice	turnoverVol	turnoverValue	dealAmount	turnoverRate	negMarketValue	marketValue
0	1.671966e+06	1.710179e+06	24661553281603	2.084277e+19	8.552180e+09	3.000847	2.483276e+22	3.349881e+22
1	1.710179e+06	1.659323e+06	14146264367107	1.192629e+19	6.452106e+09	3.000484	2.409428e+22	3.250262e+22
2	1.659323e+06	1.596829e+06	8711948463204	6.942152e+18	5.284854e+09	3.000299	2.318685e+22	3.127853e+22
3	1.596829e+06	1.491352e+06	5972896490404	4.528399e+18	4.724363e+09	3.000204	2.165525e+22	2.921242e+22
4	1.491352e+06	1.515374e+06	18966364691524	1.471013e+19	9.976414e+09	3.000650	2.200405e+22	2.968295e+22
...	...	...	...	...	...	...	...	...
5838745	3.390625e+00	3.407044e+00	238642997124	9.495889e+10	5.107900e+04	3.000007	1.356573e+16	4.880432e+16
5838746	3.407044e+00	3.405769e+00	31578000807	1.251736e+10	4.099000e+03	3.000001	1.352290e+16	4.865024e+16
5838747	3.405769e+00	3.396900e+00	15264602503	5.980393e+09	3.367000e+03	3.000000	1.322500e+16	4.757852e+16
5838748	3.396900e+00	3.403225e+00	12904960003	5.084831e+09	1.684000e+03	3.000000	1.343745e+16	4.834282e+16
5838749	3.403225e+00	3.404496e+00	28156840003	1.123515e+10	5.479000e+03	3.000001	1.348014e+16	4.849641e+16

	secID	exchangeCD	tradeDate	preClosePrice	closePrice	turnoverVol	turnoverValue	dealAmount	turnoverRate	negMarketValue	marketValue
secShortName
*ST万方	000638.XSHE	XSHE	2021-12-31	21.428	21.560	550679	1.185508e+07	794.0	0.0079	1.509872e+09	1.509872e+09
*ST上普	600680.XSHG	XSHG	2018-04-27	28.879	30.133	1272828	3.498398e+07	2843.0	0.0180	1.979601e+09	2.939313e+09
*ST东海A	000613.XSHE	XSHE	2021-12-31	5.678	5.836	3854756	2.219116e+07	1384.0	0.0207	1.088870e+09	1.485528e+09
*ST东海B	200613.XSHE	XSHE	2021-12-31	1.757	1.757	33909	6.009500e+04	13.0	0.0004	1.408000e+08	5.825600e+08
*ST东电	000585.XSHE	XSHE	2021-12-31	2.617	2.617	5993090	1.543082e+07	1268.0	0.0123	1.249313e+09	1.790408e+09
...	...	...	...	...	...	...	...	...	...	...	...
龙竹科技	831445.XBEI	XBEI	2021-12-31	20.824	21.098	456508	8.807597e+06	NaN	0.0068	1.297350e+09	2.217318e+09
龙腾光电	688055.XSHG	XSHG	2021-12-31	6.808	6.858	4664701	3.186770e+07	3830.0	0.0026	1.223867e+10	2.283333e+10
龙蟠科技	603906.XSHG	XSHG	2021-12-31	77.083	77.208	3700591	2.766969e+08	20421.0	0.0155	1.785667e+10	1.785667e+10
龙软科技	688078.XSHG	XSHG	2021-12-31	62.467	59.057	963837	5.722752e+07	2401.0	0.0339	1.657041e+09	4.117650e+09
龙高股份	605086.XSHG	XSHG	2021-12-31	23.830	23.620	833004	1.976806e+07	1403.0	0.0260	7.558400e+08	3.023360e+09

		0	1	2	3
bar	1	-1.742879	0.155123	-1.454690	-0.127365
	2	0.958199	0.372288	-1.108044	-0.195609
	3	0.379005	-1.867816	-0.234514	-1.230953
baz	1	-1.108997	-0.506065	-1.544889	0.692354
	2	0.137478	-0.073801	2.395381	0.457265
	3	-1.559135	0.570340	0.332941	0.428118
foo	1	-1.138488	0.911376	-0.733840	-2.143457
	2	0.706423	-0.352932	1.191399	0.198311
	3	-1.031937	-0.656603	1.706294	-0.295609
qux	1	0.470402	-0.726789	2.559832	1.079300
	2	0.914655	1.224235	1.376842	0.244068
	3	1.305349	0.966439	-0.521711	0.436194

	Name	Age
a	tom	10
b	nick	15
c	juli	14

	secID	exchangeCD	tradeDate	preClosePrice	closePrice	turnoverVol	turnoverValue	dealAmount	turnoverRate	negMarketValue	marketValue
secShortName
*ST万方	000638.XSHE	XSHE	2015-01-05	28.364	27.304	3103725	8.408389e+07	4279.0	0.0443	1.912092e+09	1.912092e+09
*ST上普	600680.XSHG	XSHG	2015-01-05	57.366	57.562	483414	2.566049e+07	1787.0	0.0069	3.781578e+09	5.614890e+09
*ST东海A	000613.XSHE	XSHE	2015-01-05	9.769	9.783	2242078	2.191941e+07	1159.0	0.0123	1.782263e+09	2.490444e+09
*ST东海B	200613.XSHE	XSHE	2015-01-05	6.642	6.576	164636	1.085171e+06	50.0	0.0021	5.271200e+08	2.180959e+09
*ST东电	000585.XSHE	XSHE	2015-01-05	5.183	5.272	9358955	4.800403e+07	4519.0	0.0192	2.516909e+09	3.607018e+09
...	...	...	...	...	...	...	...	...	...	...	...
龙竹科技	831445.XBEI	XBEI	2020-07-27	9.180	7.840	4804739	4.053933e+07	NaN	0.0730	5.157141e+08	8.976079e+08
龙腾光电	688055.XSHG	XSHG	2020-08-17	1.220	9.850	215585877	1.892487e+09	211520.0	0.8003	2.653273e+09	3.283333e+10
龙蟠科技	603906.XSHG	XSHG	2017-04-10	9.520	13.710	11500	1.576650e+05	15.0	0.0002	7.129200e+08	2.851680e+09
龙软科技	688078.XSHG	XSHG	2019-12-30	21.590	48.900	12454266	6.069590e+08	28417.0	0.7736	7.872460e+08	3.459675e+09
龙高股份	605086.XSHG	XSHG	2021-04-16	12.860	18.520	226150	4.186135e+06	345.0	0.0071	5.926400e+08	2.370560e+09

		secID	exchangeCD	preClosePrice	closePrice	turnoverVol	turnoverValue	dealAmount	turnoverRate	negMarketValue	marketValue
secShortName	tradeDate
*ST万方	2015-01-05	000638.XSHE	XSHE	28.364	27.304	3103725	84083888.60	4279.0	0.0443	1.912092e+09	1.912092e+09
	2015-01-06	000638.XSHE	XSHE	27.304	27.039	1926608	51665933.96	3060.0	0.0275	1.893528e+09	1.893528e+09
	2015-01-07	000638.XSHE	XSHE	27.039	26.995	1295798	35013368.40	1919.0	0.0185	1.890434e+09	1.890434e+09
	2015-01-08	000638.XSHE	XSHE	26.995	27.702	2224507	61274527.17	3560.0	0.0318	1.939938e+09	1.939938e+09
	2015-01-09	000638.XSHE	XSHE	27.702	27.348	1053730	29061439.79	1860.0	0.0151	1.915186e+09	1.915186e+09
...	...	...	...	...	...	...	...	...	...	...	...
龙高股份	2021-12-27	605086.XSHG	XSHG	23.320	23.340	711496	16590677.00	1586.0	0.0222	7.468800e+08	2.987520e+09
	2021-12-28	605086.XSHG	XSHG	23.340	23.540	819496	19232165.00	1664.0	0.0256	7.532800e+08	3.013120e+09
	2021-12-29	605086.XSHG	XSHG	23.540	23.510	656443	15410019.00	1646.0	0.0205	7.523200e+08	3.009280e+09
	2021-12-30	605086.XSHG	XSHG	23.510	23.830	1185853	28123837.00	2482.0	0.0371	7.625600e+08	3.050240e+09
	2021-12-31	605086.XSHG	XSHG	23.830	23.620	833004	19768062.00	1403.0	0.0260	7.558400e+08	3.023360e+09

	secID	exchangeCD	preClosePrice	closePrice	turnoverVol	turnoverValue	dealAmount	turnoverRate	negMarketValue	marketValue
tradeDate
2021-01-04	000001.XSHE	XSHE	2466.388	2372.018	1873816	2.891682e+09	119328.0	0.0080	3.609470e+11	3.609501e+11
2021-01-05	000001.XSHE	XSHE	2372.018	2317.181	2195884	3.284607e+09	187364.0	0.0094	3.526025e+11	3.526055e+11
2021-01-06	000001.XSHE	XSHE	2317.181	2494.444	2332835	3.648522e+09	154714.0	0.0100	3.795765e+11	3.795798e+11
2021-01-07	000001.XSHE	XSHE	2494.444	2537.804	1909948	3.111275e+09	123304.0	0.0082	3.861745e+11	3.861778e+11
2021-01-08	000001.XSHE	XSHE	2537.804	2531.427	1441303	2.348316e+09	95828.0	0.0062	3.852042e+11	3.852075e+11
...	...	...	...	...	...	...	...	...	...	...
2021-12-27	000001.XSHE	XSHE	2224.866	2213.298	881462	1.260455e+09	66911.0	0.0038	3.341671e+11	3.341699e+11
2021-12-28	000001.XSHE	XSHE	2213.298	2206.872	1358314	1.934461e+09	89671.0	0.0058	3.331968e+11	3.331996e+11
2021-12-29	000001.XSHE	XSHE	2206.872	2152.889	1771527	2.480535e+09	147243.0	0.0076	3.250464e+11	3.250491e+11
2021-12-30	000001.XSHE	XSHE	2152.889	2161.886	960485	1.342374e+09	66792.0	0.0041	3.264048e+11	3.264075e+11
2021-12-31	000001.XSHE	XSHE	2161.886	2118.185	2110777	2.899617e+09	152423.0	0.0090	3.198068e+11	3.198095e+11

	name	value
0	a	0
1	b	1
2	c	2
3	d	3
4	e	4
5	f	5
6	g	6
7	h	7
8	i	8
9	j	9
10	k	10
11	l	11
12	m	12
13	n	13
14	o	14
15	p	15
16	q	16
17	r	17
18	s	18
19	t	19
20	u	20
21	v	21
22	w	22
23	x	23
24	y	24
25	z	25

		secID	exchangeCD	preClosePrice	closePrice	turnoverVol	turnoverValue	dealAmount	turnoverRate	negMarketValue	marketValue
secShortName	tradeDate
宁德时代	2018-06-11	300750.XSHE	XSHE	25.140	36.200	78829	2.845472e+06	154.0	0.0004	7.864222e+09	7.864222e+10
	2018-06-12	300750.XSHE	XSHE	36.200	39.820	26579	1.058376e+06	86.0	0.0001	8.650644e+09	8.650644e+10
	2018-06-13	300750.XSHE	XSHE	39.820	43.800	45030	1.972314e+06	98.0	0.0002	9.515274e+09	9.515274e+10
	2018-06-14	300750.XSHE	XSHE	43.800	48.180	74267	3.578184e+06	211.0	0.0003	1.046680e+10	1.046680e+11
	2018-06-15	300750.XSHE	XSHE	48.180	53.000	256510	1.359503e+07	717.0	0.0012	1.151392e+10	1.151392e+11
...	...	...	...	...	...	...	...	...	...	...	...
平安银行	2020-01-20	000001.XSHE	XSHE	2055.130	2062.654	899493	1.226465e+09	42805.0	0.0038	3.192246e+11	3.192274e+11
	2020-01-21	000001.XSHE	XSHE	2062.654	2006.229	1080975	1.442171e+09	60130.0	0.0046	3.104920e+11	3.104947e+11
	2020-01-22	000001.XSHE	XSHE	2006.229	2017.514	867411	1.150933e+09	49245.0	0.0037	3.122386e+11	3.122412e+11
	2020-01-23	000001.XSHE	XSHE	2017.514	1948.549	1326911	1.723394e+09	71683.0	0.0057	3.015654e+11	3.015680e+11
	2020-02-03	000001.XSHE	XSHE	1948.549	1754.196	2723762	3.201454e+09	105935.0	0.0116	2.714865e+11	2.714888e+11

	A	B	C	D
2000-01-03	-0.522757	-1.607854	-0.000651	-0.264986
2000-01-04	-0.574578	-0.483190	-0.920019	0.332520
2000-01-05	1.490557	0.681432	-1.439317	-0.741802

		value
date	variable
2000-01-03	A	-0.522757
2000-01-04	A	-0.574578
2000-01-05	A	1.490557
2000-01-03	B	-1.607854
2000-01-04	B	-0.483190
2000-01-05	B	0.681432
2000-01-03	C	-0.000651
2000-01-04	C	-0.920019
2000-01-05	C	-1.439317
2000-01-03	D	-0.264986
2000-01-04	D	0.332520
2000-01-05	D	-0.741802

	date	variable	value	value2
0	2000-01-03	A	-0.522757	2.732746e-01
1	2000-01-04	A	-0.574578	3.301401e-01
2	2000-01-05	A	1.490557	2.221761e+00
3	2000-01-03	B	-1.607854	2.585194e+00
4	2000-01-04	B	-0.483190	2.334727e-01
5	2000-01-05	B	0.681432	4.643496e-01
6	2000-01-03	C	-0.000651	4.233486e-07
7	2000-01-04	C	-0.920019	8.464347e-01
8	2000-01-05	C	-1.439317	2.071633e+00
9	2000-01-03	D	-0.264986	7.021733e-02
10	2000-01-04	D	0.332520	1.105695e-01
11	2000-01-05	D	-0.741802	5.502701e-01

	preClosePrice	closePrice	turnoverVol	turnoverValue	dealAmount	turnoverRate	negMarketValue	marketValue
preClosePrice	1.000000	0.997575	0.064982	0.389881	0.601451	-0.039942	0.989830	0.994295
closePrice	0.997575	1.000000	0.075458	0.400851	0.606522	-0.030710	0.991942	0.996452
turnoverVol	0.064982	0.075458	1.000000	0.924077	0.752066	0.981827	0.027619	0.055961
turnoverValue	0.389881	0.400851	0.924077	1.000000	0.914040	0.862471	0.355060	0.383088
dealAmount	0.601451	0.606522	0.752066	0.914040	1.000000	0.665757	0.573258	0.596717
turnoverRate	-0.039942	-0.030710	0.981827	0.862471	0.665757	1.000000	-0.090386	-0.050985
negMarketValue	0.989830	0.991942	0.027619	0.355060	0.573258	-0.090386	1.000000	0.993615
marketValue	0.994295	0.996452	0.055961	0.383088	0.596717	-0.050985	0.993615	1.000000

numpy¶

generating Array¶

Array indexing and slicing¶

Array concatenation¶

Array reshaping¶

Vectorized manipulations¶

Calculations with numpy¶

Masked arrays¶

pandas¶

Create a data frame¶

Quick production of dataframe examples with pandas._testing¶

Explorations on DatetimeIndex, PeriodIndex¶

read data¶

Quick summary¶

object vs. str¶

Sort, duplicates¶

Categorizing values¶

datetime type, time series as index¶

apply, groupby apply¶

multiindex (panel data)¶

merge¶

Reshaping and pivoting¶

Plotting¶

Matplotlib basics¶

an Seaborn example with masked data¶

Draw from pandas¶

Quick production of dataframe examples with `pandas._testing`¶

	name	value
0	a	0
1	b	1
2	c	2
3	d	3
4	e	4
5	f	5
6	g	6
7	h	7
8	i	8
9	j	9
10	k	10
11	l	11
12	m	12
13	n	13
14	o	14
15	p	15
16	q	16
17	r	17
18	s	18
19	t	19
20	u	20
21	v	21
22	w	22
23	x	23
24	y	24
25	z	25