import numpy as np
import pandas as pd
np.__version__
'1.23.5'
numpy中的数据存储是array, array要求数据类型是一样的,这和python中的list不同
np.array([1,1.0,'2']) # 强制转换成同一种类型
array(['1', '1.0', '2'], dtype='<U32')
np.array([1,1.0,'2'],dtype=int)
array([1, 1, 2])
np.ones((3,5))
array([[1., 1., 1., 1., 1.], [1., 1., 1., 1., 1.], [1., 1., 1., 1., 1.]])
np.full((3,5),1.231)
array([[1.231, 1.231, 1.231, 1.231, 1.231], [1.231, 1.231, 1.231, 1.231, 1.231], [1.231, 1.231, 1.231, 1.231, 1.231]])
np.arange(0, 20)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
np.random.normal(0, 1, (3,3))
array([[ 1.27274173, -2.34781348, -0.31959194], [ 0.87566819, 0.09926286, -1.11582647], [ 1.5545384 , 0.17286124, 1.50142145]])
np.eye(3)
array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]])
np.tile?
Signature: np.tile(A, reps) Docstring: Construct an array by repeating A the number of times given by reps. If `reps` has length ``d``, the result will have dimension of ``max(d, A.ndim)``. If ``A.ndim < d``, `A` is promoted to be d-dimensional by prepending new axes. So a shape (3,) array is promoted to (1, 3) for 2-D replication, or shape (1, 1, 3) for 3-D replication. If this is not the desired behavior, promote `A` to d-dimensions manually before calling this function. If ``A.ndim > d``, `reps` is promoted to `A`.ndim by pre-pending 1's to it. Thus for an `A` of shape (2, 3, 4, 5), a `reps` of (2, 2) is treated as (1, 1, 2, 2). Note : Although tile may be used for broadcasting, it is strongly recommended to use numpy's broadcasting operations and functions. Parameters ---------- A : array_like The input array. reps : array_like The number of repetitions of `A` along each axis. Returns ------- c : ndarray The tiled output array. See Also -------- repeat : Repeat elements of an array. broadcast_to : Broadcast an array to a new shape Examples -------- >>> a = np.array([0, 1, 2]) >>> np.tile(a, 2) array([0, 1, 2, 0, 1, 2]) >>> np.tile(a, (2, 2)) array([[0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2]]) >>> np.tile(a, (2, 1, 2)) array([[[0, 1, 2, 0, 1, 2]], [[0, 1, 2, 0, 1, 2]]]) >>> b = np.array([[1, 2], [3, 4]]) >>> np.tile(b, 2) array([[1, 2, 1, 2], [3, 4, 3, 4]]) >>> np.tile(b, (2, 1)) array([[1, 2], [3, 4], [1, 2], [3, 4]]) >>> c = np.array([1,2,3,4]) >>> np.tile(c,(4,1)) array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]]) File: ~/opt/anaconda3/lib/python3.9/site-packages/numpy/lib/shape_base.py Type: function
np.tile(10,3)
array([10, 10, 10])
np.tile([1,2,3],3)
array([1, 2, 3, 1, 2, 3, 1, 2, 3])
np.repeat?
Signature: np.repeat(a, repeats, axis=None) Docstring: Repeat elements of an array. Parameters ---------- a : array_like Input array. repeats : int or array of ints The number of repetitions for each element. `repeats` is broadcasted to fit the shape of the given axis. axis : int, optional The axis along which to repeat values. By default, use the flattened input array, and return a flat output array. Returns ------- repeated_array : ndarray Output array which has the same shape as `a`, except along the given axis. See Also -------- tile : Tile an array. unique : Find the unique elements of an array. Examples -------- >>> np.repeat(3, 4) array([3, 3, 3, 3]) >>> x = np.array([[1,2],[3,4]]) >>> np.repeat(x, 2) array([1, 1, 2, 2, 3, 3, 4, 4]) >>> np.repeat(x, 3, axis=1) array([[1, 1, 1, 2, 2, 2], [3, 3, 3, 4, 4, 4]]) >>> np.repeat(x, [1, 2], axis=0) array([[1, 2], [3, 4], [3, 4]]) File: ~/opt/anaconda3/lib/python3.9/site-packages/numpy/core/fromnumeric.py Type: function
np.asarray([1,2,3]).repeat(3)
array([1, 1, 1, 2, 2, 2, 3, 3, 3])
a = np.random.normal(0, 1, (3,3))
a
array([[-0.49915155, 0.8337671 , 0.40341252], [-1.36387181, -0.18727799, -0.35112724], [ 0.77928131, -0.36764775, 0.59875779]])
a[0,1]
0.8337671001714373
a[0:2]
array([[-0.49915155, 0.8337671 , 0.40341252], [-1.36387181, -0.18727799, -0.35112724]])
a[0:2,1]
array([ 0.8337671 , -0.18727799])
a[0,1] = 1000
a
array([[-4.99151549e-01, 1.00000000e+03, 4.03412521e-01], [-1.36387181e+00, -1.87277987e-01, -3.51127242e-01], [ 7.79281310e-01, -3.67647747e-01, 5.98757794e-01]])
a[0:3:2]
array([[-4.99151549e-01, 1.00000000e+03, 4.03412521e-01], [ 7.79281310e-01, -3.67647747e-01, 5.98757794e-01]])
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
z = [21,21,21]
np.concatenate([x,y,z])
array([ 1, 2, 3, 3, 2, 1, 21, 21, 21])
a1 = np.ones((3,3))
a2 = np.random.normal(0, 1, (3,3))
a1
array([[1., 1., 1.], [1., 1., 1.], [1., 1., 1.]])
a2
array([[-2.0217118 , 1.11949095, -1.25693857], [-0.51433944, 0.9210426 , 0.31195337], [ 1.53190446, 2.61674289, 0.58387914]])
np.concatenate([a1,a2])
array([[ 1. , 1. , 1. ], [ 1. , 1. , 1. ], [ 1. , 1. , 1. ], [-2.0217118 , 1.11949095, -1.25693857], [-0.51433944, 0.9210426 , 0.31195337], [ 1.53190446, 2.61674289, 0.58387914]])
np.concatenate([a1,a2], axis=1)
array([[ 1. , 1. , 1. , -2.0217118 , 1.11949095, -1.25693857], [ 1. , 1. , 1. , -0.51433944, 0.9210426 , 0.31195337], [ 1. , 1. , 1. , 1.53190446, 2.61674289, 0.58387914]])
a1 = np.array(range(10))
a1[0:3]
array([0, 1, 2])
a1
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
a1.shape
(10,)
a1 = a1.reshape((10,1))
a1.shape
(10, 1)
a1
array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]])
a1 = a1.reshape((1,10))
a1.shape
(1, 10)
a1
array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
a1.reshape((2,6))
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[39], line 1 ----> 1 a1.reshape((2,6)) ValueError: cannot reshape array of size 10 into shape (2,6)
a1 = a1.reshape((2,5))
a1
array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])
a2 = np.array(range(10,20)).reshape((2,5))
a2
array([[10, 11, 12, 13, 14], [15, 16, 17, 18, 19]])
a1
array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])
np.vstack([a1,a2])
array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14], [15, 16, 17, 18, 19]])
np.hstack([a1,a2])
array([[ 0, 1, 2, 3, 4, 10, 11, 12, 13, 14], [ 5, 6, 7, 8, 9, 15, 16, 17, 18, 19]])
做numpy和pandas操作时的一个原则:能用向量操作尽量用向量操作,速度最快。尽量避免写元素的循环
a1 + 1000
array([[1000, 1001, 1002, 1003, 1004], [1005, 1006, 1007, 1008, 1009]])
a1 * a2
array([[ 0, 11, 24, 39, 56], [ 75, 96, 119, 144, 171]])
list1 = [1,2,3] # some manipulations is not possible with raw python
list2 = [4,5,6]
list1 * list2
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[50], line 1 ----> 1 list1 * list2 TypeError: can't multiply sequence by non-int of type 'list'
list1 + list2
[1, 2, 3, 4, 5, 6]
x = np.array([[1, 2, 3], [4, 5, 6]])
x.ravel()
array([1, 2, 3, 4, 5, 6])
x.ravel(order='F')
array([1, 4, 2, 5, 3, 6])
a1
array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])
a1.max()
9
axis: 0,从上往下(跨行)。1,从左往右(跨列)
也适用于pandas
a1.max(axis=1)
array([4, 9])
a1.max(axis=0)
array([5, 6, 7, 8, 9])
a1.sum()
45
a1.sum(axis=0)
array([ 5, 7, 9, 11, 13])
a1.sum(axis=1)
array([10, 35])
a1.mean()
4.5
a1
array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])
a1.mean(axis=1)
array([2., 7.])
a1.mean(axis=0)
array([2.5, 3.5, 4.5, 5.5, 6.5])
Mask 把 array 的一部分“挡住”,来阻止其他的函数对其进行操作。这在有的时候挺有用
import numpy.ma as ma
x = np.array([1,2,3,-1,4])
mx = ma.masked_array(x, mask=[0,0,0,1,0])
mx
masked_array(data=[1, 2, 3, --, 4], mask=[False, False, False, True, False], fill_value=999999)
mx.mean()
2.5
# -999 as an invalid value, as sometimes seen in some databases
a = np.array([0,1,2,-999])
ma.masked_values(a, -999)
masked_array(data=[0, 1, 2, --], mask=[False, False, False, True], fill_value=-999)
df = pd.DataFrame({'name':[chr(i) for i in range(ord('a'),ord('z')+1)],
'value':range(26)}) # string.ascii_lowercase provides the alphabet too.
df
name | value | |
---|---|---|
0 | a | 0 |
1 | b | 1 |
2 | c | 2 |
3 | d | 3 |
4 | e | 4 |
5 | f | 5 |
6 | g | 6 |
7 | h | 7 |
8 | i | 8 |
9 | j | 9 |
10 | k | 10 |
11 | l | 11 |
12 | m | 12 |
13 | n | 13 |
14 | o | 14 |
15 | p | 15 |
16 | q | 16 |
17 | r | 17 |
18 | s | 18 |
19 | t | 19 |
20 | u | 20 |
21 | v | 21 |
22 | w | 22 |
23 | x | 23 |
24 | y | 24 |
25 | z | 25 |
data = [['tom', 10], ['nick', 15], ['juli', 14]]
df = pd.DataFrame(data, columns = ['Name', 'Age'], index=['a','b','c'])
df
Name | Age | |
---|---|---|
a | tom | 10 |
b | nick | 15 |
c | juli | 14 |
pandas._testing
¶import pandas._testing as tm
dir(tm)
['ALL_INT_EA_DTYPES', 'ALL_INT_NUMPY_DTYPES', 'ALL_NUMPY_DTYPES', 'ALL_REAL_NUMPY_DTYPES', 'BOOL_DTYPES', 'BYTES_DTYPES', 'BaseMaskedArray', 'COMPLEX_DTYPES', 'Callable', 'Categorical', 'CategoricalIndex', 'ContextManager', 'Counter', 'DATETIME64_DTYPES', 'DataFrame', 'DatetimeIndex', 'Decimal', 'Dtype', 'EMPTY_STRING_PATTERN', 'ENDIAN', 'ExtensionArray', 'FLOAT_EA_DTYPES', 'FLOAT_NUMPY_DTYPES', 'Float64Index', 'Index', 'Int64Index', 'IntervalIndex', 'Iterable', 'MultiIndex', 'NARROW_NP_DTYPES', 'NDArrayBackedExtensionArray', 'NP_NAT_OBJECTS', 'NULL_OBJECTS', 'NumericIndex', 'OBJECT_DTYPES', 'PandasArray', 'RNGContext', 'RangeIndex', 'SIGNED_INT_EA_DTYPES', 'SIGNED_INT_NUMPY_DTYPES', 'STRING_DTYPES', 'Series', 'SubclassedCategorical', 'SubclassedDataFrame', 'SubclassedSeries', 'TIMEDELTA64_DTYPES', 'TYPE_CHECKING', 'UInt64Index', 'UNSIGNED_INT_EA_DTYPES', 'UNSIGNED_INT_NUMPY_DTYPES', '_K', '_N', '__all__', '__annotations__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '_create_missing_idx', '_io', '_make_skipna_wrapper', '_random', '_testing_mode_warnings', '_warnings', 'all_timeseries_index_generator', 'annotations', 'assert_almost_equal', 'assert_attr_equal', 'assert_categorical_equal', 'assert_class_equal', 'assert_contains_all', 'assert_copy', 'assert_datetime_array_equal', 'assert_dict_equal', 'assert_equal', 'assert_extension_array_equal', 'assert_frame_equal', 'assert_index_equal', 'assert_indexing_slices_equivalent', 'assert_interval_array_equal', 'assert_is_sorted', 'assert_is_valid_plot_return_object', 'assert_metadata_equivalent', 'assert_numpy_array_equal', 'assert_period_array_equal', 'assert_produces_warning', 'assert_series_equal', 'assert_sp_array_equal', 'assert_timedelta_array_equal', 'asserters', 'at', 'bdate_range', 'box_expected', 'byteorder', 'can_set_locale', 'close', 'collections', 'compat', 'contexts', 'convert_rows_list_to_csv_str', 'cython_table', 'datetime', 'decompress_file', 'ensure_clean', 'ensure_clean_dir', 'ensure_safe_environment_variables', 'equalContents', 'external_error_raised', 'extract_array', 'getCols', 'getMixedTypeDict', 'getPeriodData', 'getSeriesData', 'getTimeSeriesData', 'get_cython_table_params', 'get_dtype', 'get_locales', 'get_obj', 'get_op_from_name', 'getitem', 'iat', 'iloc', 'index_subclass_makers_generator', 'is_float_dtype', 'is_integer_dtype', 'is_sequence', 'is_unsigned_integer_dtype', 'loc', 'makeBoolIndex', 'makeCategoricalIndex', 'makeCustomDataframe', 'makeCustomIndex', 'makeDataFrame', 'makeDateIndex', 'makeFloatIndex', 'makeFloatSeries', 'makeIntIndex', 'makeIntervalIndex', 'makeMissingDataframe', 'makeMixedDataFrame', 'makeMultiIndex', 'makeNumericIndex', 'makeObjectSeries', 'makePeriodFrame', 'makePeriodIndex', 'makePeriodSeries', 'makeRangeIndex', 'makeStringIndex', 'makeStringSeries', 'makeTimeDataFrame', 'makeTimeSeries', 'makeTimedeltaIndex', 'makeUIntIndex', 'make_rand_series', 'maybe_produces_warning', 'network', 'np', 'operator', 'os', 'pa_version_under1p01', 'pandas_dtype', 'pd', 'raise_assert_detail', 'randbool', 'rands', 'rands_array', 're', 'reset_display_options', 'reset_testing_mode', 'round_trip_localpath', 'round_trip_pathlib', 'round_trip_pickle', 'set_locale', 'set_testing_mode', 'set_timezone', 'setitem', 'shares_memory', 'string', 'to_array', 'use_numexpr', 'warnings', 'with_csv_dialect', 'write_to_compressed']
tm.makeDataFrame()
A | B | C | D | |
---|---|---|---|---|
VHunICcLPJ | -0.795996 | -0.351695 | -1.385583 | -0.753557 |
NESKybUEDH | -1.269138 | 0.863860 | -0.976737 | 0.766969 |
qMAMB3xDut | 1.472684 | 0.478087 | -0.239865 | 0.353478 |
JRlRCQUzuu | -0.447649 | -0.333359 | 0.119227 | -0.246769 |
U5jtekM3k2 | -0.132406 | -0.737123 | 1.728766 | -1.153852 |
PrBiVPdIaJ | 0.382802 | 0.364979 | 1.206549 | -2.258635 |
unVAHNdKYu | -1.072163 | 0.740952 | -0.389145 | 1.039403 |
MotvwNxPmd | 0.066330 | -1.973623 | 1.012141 | -1.102447 |
89m1l3btYy | 0.016252 | 0.555613 | 0.298718 | 0.980350 |
9HTnDXF40x | -0.108969 | -1.131245 | -1.315161 | 0.365393 |
LRkcwE7Lmx | 0.696143 | -1.422110 | 1.337381 | -1.690665 |
psnMfLek7U | -0.553044 | -1.167765 | -1.129249 | 0.267534 |
HeABMPzPXF | -0.115510 | 0.721029 | -0.249622 | 1.093956 |
2xHL40vnf2 | 0.406484 | 0.816362 | 0.198217 | -0.515139 |
KSCjLRTaae | -0.521772 | 0.186359 | -1.287775 | 0.931270 |
PZMnNoQpiJ | 1.906866 | 0.014740 | -0.441951 | -0.433909 |
iJbHYwo277 | 0.999935 | 0.812229 | 0.436503 | -0.571638 |
qc8tNYVRCg | -0.015067 | -0.199682 | -1.991939 | -1.358378 |
zK2FmNbFSj | -0.098908 | 0.357793 | -0.701900 | 0.500967 |
vooqmpNXRy | 0.214110 | -0.027231 | -0.392369 | -1.650658 |
Ghj8CJjGha | -1.161240 | 0.359830 | -0.860943 | 1.392138 |
ehhvv804ye | 0.819537 | 1.287188 | -0.581673 | 0.636301 |
9jtiXBZ1y7 | -0.128448 | 0.599718 | 1.732266 | 0.492705 |
RDhWwjCJDA | 0.216929 | 0.210923 | 0.136244 | -0.541380 |
71Bkvd5XbY | -1.188419 | 0.324790 | 1.285649 | 0.176251 |
kZPHgLtScF | -0.667199 | 0.597654 | 0.400407 | 0.139640 |
jpEWrtGEYv | -0.327816 | 3.228324 | -1.162103 | 1.216709 |
kE3hNb4qjr | -0.570806 | -0.630258 | 0.648322 | 0.823451 |
XNQiSNBQGJ | 0.466998 | -0.074483 | -0.559314 | 1.242169 |
k6gL5YU8dQ | 0.325523 | -0.585791 | -0.758267 | 0.014827 |
tm.makeMixedDataFrame().info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5 entries, 0 to 4 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 A 5 non-null float64 1 B 5 non-null float64 2 C 5 non-null object 3 D 5 non-null datetime64[ns] dtypes: datetime64[ns](1), float64(2), object(1) memory usage: 288.0+ bytes
df1 = tm.makeTimeDataFrame()
df1
A | B | C | D | |
---|---|---|---|---|
2000-01-03 | -0.144370 | 0.201565 | 0.516506 | 1.144586 |
2000-01-04 | -2.145394 | 0.692739 | -0.207220 | 1.570583 |
2000-01-05 | 0.282223 | -0.412550 | -0.375129 | 1.004230 |
2000-01-06 | -0.199454 | -1.049554 | -0.416481 | 0.601689 |
2000-01-07 | 0.892563 | -0.422116 | -0.497795 | 2.062563 |
2000-01-10 | 1.952667 | -2.366110 | -0.335015 | 0.092206 |
2000-01-11 | 1.182459 | -2.054859 | -0.543254 | 0.323785 |
2000-01-12 | -0.367848 | -0.032513 | 0.021224 | 0.768913 |
2000-01-13 | 0.023308 | -0.701854 | -2.019472 | -0.251018 |
2000-01-14 | 1.410542 | -1.622660 | 0.443647 | -0.933541 |
2000-01-17 | 0.783971 | -0.007375 | 0.296998 | -0.363226 |
2000-01-18 | -1.361430 | 0.153546 | -0.619399 | 1.146770 |
2000-01-19 | 1.346466 | -1.668749 | -1.303989 | -0.543123 |
2000-01-20 | 0.347398 | 0.848440 | 1.952129 | -0.167775 |
2000-01-21 | -1.510918 | 1.309377 | -1.564935 | -0.795824 |
2000-01-24 | -0.307359 | 0.502352 | -0.047250 | 0.499364 |
2000-01-25 | 0.987366 | 0.992062 | -0.777740 | -0.188208 |
2000-01-26 | 0.187069 | 0.468695 | 1.160078 | 1.104165 |
2000-01-27 | -1.191208 | 0.324158 | -0.277405 | -0.871390 |
2000-01-28 | -1.403417 | 0.168218 | -0.297890 | -0.398667 |
2000-01-31 | -0.786557 | -0.161181 | 0.714900 | 0.916390 |
2000-02-01 | -0.541403 | -0.881791 | 1.270019 | -0.222928 |
2000-02-02 | -1.331567 | -0.279310 | 2.249028 | -0.024690 |
2000-02-03 | 0.665224 | -1.476585 | 1.902657 | -0.254340 |
2000-02-04 | 2.076048 | 1.329543 | -0.446659 | 0.007978 |
2000-02-07 | 0.936564 | -0.062680 | -1.205261 | -1.963803 |
2000-02-08 | -0.737788 | 0.516168 | -0.707155 | 0.394130 |
2000-02-09 | 1.030261 | 1.145068 | -1.908743 | -0.645290 |
2000-02-10 | 0.404236 | 0.528930 | 0.481302 | -0.483042 |
2000-02-11 | 0.498774 | -0.133681 | -0.291691 | 1.570372 |
df2 = tm.makePeriodFrame()
df2
A | B | C | D | |
---|---|---|---|---|
2000-01-03 | 1.429051 | -0.580283 | 0.990025 | -0.257934 |
2000-01-04 | -0.110579 | 1.306100 | 1.963598 | 0.894591 |
2000-01-05 | 0.350137 | 0.145053 | -2.283949 | -0.210955 |
2000-01-06 | 0.683855 | 0.319405 | 1.167300 | 0.572679 |
2000-01-07 | -1.676209 | -0.482623 | 0.261776 | 0.680530 |
2000-01-10 | 0.328323 | -2.349310 | 0.426277 | 0.120216 |
2000-01-11 | 1.065787 | 0.320567 | 0.342133 | 0.311434 |
2000-01-12 | -0.379241 | 0.143673 | -1.389723 | 1.336481 |
2000-01-13 | -1.042176 | -0.138300 | -0.909156 | -1.352213 |
2000-01-14 | -0.621671 | 1.315829 | -0.465050 | 1.242555 |
2000-01-17 | 1.035397 | 0.310866 | -0.353529 | -1.049046 |
2000-01-18 | 1.115317 | 0.182585 | -1.047423 | -0.665544 |
2000-01-19 | 1.324550 | -0.762196 | 1.100811 | -0.287230 |
2000-01-20 | 0.824192 | 0.326103 | 0.232408 | -0.035676 |
2000-01-21 | 0.073493 | -0.343796 | -0.876300 | 1.399121 |
2000-01-24 | 1.214491 | 1.377404 | -0.186971 | 0.479992 |
2000-01-25 | -0.558259 | 1.458651 | -0.045784 | -0.754432 |
2000-01-26 | 0.525250 | -0.197330 | -1.364182 | -0.761588 |
2000-01-27 | 0.049187 | -1.423574 | 1.432088 | -1.856000 |
2000-01-28 | 0.342931 | -1.934350 | 0.598586 | -0.807426 |
2000-01-31 | -0.103926 | 0.024246 | -1.330241 | -0.074468 |
2000-02-01 | 1.559401 | 1.679189 | 1.009991 | -0.042107 |
2000-02-02 | 0.269948 | 1.031486 | 0.892775 | -0.810168 |
2000-02-03 | -0.560028 | 0.091427 | 1.024934 | 0.117938 |
2000-02-04 | 1.881646 | 0.610332 | -0.231905 | -0.371461 |
2000-02-07 | -0.942971 | -0.999706 | -0.711167 | 2.733572 |
2000-02-08 | 0.618878 | -0.563883 | 1.723752 | 0.194319 |
2000-02-09 | -0.678689 | 0.367987 | 1.309123 | 0.827901 |
2000-02-10 | -1.052600 | 1.226809 | 0.871817 | -0.603279 |
2000-02-11 | -0.998651 | -0.635610 | 0.180419 | 0.739663 |
df1.index
DatetimeIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11'], dtype='datetime64[ns]', freq='B')
df1.index.day_of_week
Int64Index([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype='int64')
df1.index.shift(periods=1) # 时间序列index的shift,会按照指定的period或freq平移。注意:如果没有时间序列,则直接平移,见df1['A']例
DatetimeIndex(['2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11', '2000-02-14'], dtype='datetime64[ns]', freq='B')
df1.index.shift(freq='D')
DatetimeIndex(['2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-15', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-22', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-29', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-05', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11', '2000-02-12'], dtype='datetime64[ns]', freq=None)
df1.index.shift(freq='2D')
DatetimeIndex(['2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08', '2000-01-09', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-15', '2000-01-16', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-22', '2000-01-23', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-29', '2000-01-30', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-05', '2000-02-06', '2000-02-09', '2000-02-10', '2000-02-11', '2000-02-12', '2000-02-13'], dtype='datetime64[ns]', freq=None)
df1['A']
2000-01-03 -0.144370 2000-01-04 -2.145394 2000-01-05 0.282223 2000-01-06 -0.199454 2000-01-07 0.892563 2000-01-10 1.952667 2000-01-11 1.182459 2000-01-12 -0.367848 2000-01-13 0.023308 2000-01-14 1.410542 2000-01-17 0.783971 2000-01-18 -1.361430 2000-01-19 1.346466 2000-01-20 0.347398 2000-01-21 -1.510918 2000-01-24 -0.307359 2000-01-25 0.987366 2000-01-26 0.187069 2000-01-27 -1.191208 2000-01-28 -1.403417 2000-01-31 -0.786557 2000-02-01 -0.541403 2000-02-02 -1.331567 2000-02-03 0.665224 2000-02-04 2.076048 2000-02-07 0.936564 2000-02-08 -0.737788 2000-02-09 1.030261 2000-02-10 0.404236 2000-02-11 0.498774 Freq: B, Name: A, dtype: float64
df1['A'].shift()
2000-01-03 NaN 2000-01-04 -0.144370 2000-01-05 -2.145394 2000-01-06 0.282223 2000-01-07 -0.199454 2000-01-10 0.892563 2000-01-11 1.952667 2000-01-12 1.182459 2000-01-13 -0.367848 2000-01-14 0.023308 2000-01-17 1.410542 2000-01-18 0.783971 2000-01-19 -1.361430 2000-01-20 1.346466 2000-01-21 0.347398 2000-01-24 -1.510918 2000-01-25 -0.307359 2000-01-26 0.987366 2000-01-27 0.187069 2000-01-28 -1.191208 2000-01-31 -1.403417 2000-02-01 -0.786557 2000-02-02 -0.541403 2000-02-03 -1.331567 2000-02-04 0.665224 2000-02-07 2.076048 2000-02-08 0.936564 2000-02-09 -0.737788 2000-02-10 1.030261 2000-02-11 0.404236 Freq: B, Name: A, dtype: float64
a = df1.index.shift() - df1.index
a
TimedeltaIndex(['1 days', '1 days', '1 days', '1 days', '3 days', '1 days', '1 days', '1 days', '1 days', '3 days', '1 days', '1 days', '1 days', '1 days', '3 days', '1 days', '1 days', '1 days', '1 days', '3 days', '1 days', '1 days', '1 days', '1 days', '3 days', '1 days', '1 days', '1 days', '1 days', '3 days'], dtype='timedelta64[ns]', freq=None)
df1.index.values # index中内部的保存数据类型。包装成index后有对应于index的其他操作
array(['2000-01-03T00:00:00.000000000', '2000-01-04T00:00:00.000000000', '2000-01-05T00:00:00.000000000', '2000-01-06T00:00:00.000000000', '2000-01-07T00:00:00.000000000', '2000-01-10T00:00:00.000000000', '2000-01-11T00:00:00.000000000', '2000-01-12T00:00:00.000000000', '2000-01-13T00:00:00.000000000', '2000-01-14T00:00:00.000000000', '2000-01-17T00:00:00.000000000', '2000-01-18T00:00:00.000000000', '2000-01-19T00:00:00.000000000', '2000-01-20T00:00:00.000000000', '2000-01-21T00:00:00.000000000', '2000-01-24T00:00:00.000000000', '2000-01-25T00:00:00.000000000', '2000-01-26T00:00:00.000000000', '2000-01-27T00:00:00.000000000', '2000-01-28T00:00:00.000000000', '2000-01-31T00:00:00.000000000', '2000-02-01T00:00:00.000000000', '2000-02-02T00:00:00.000000000', '2000-02-03T00:00:00.000000000', '2000-02-04T00:00:00.000000000', '2000-02-07T00:00:00.000000000', '2000-02-08T00:00:00.000000000', '2000-02-09T00:00:00.000000000', '2000-02-10T00:00:00.000000000', '2000-02-11T00:00:00.000000000'], dtype='datetime64[ns]')
df1.index.values.to_period()
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Cell In[89], line 1 ----> 1 df1.index.values.to_period() AttributeError: 'numpy.ndarray' object has no attribute 'to_period'
df1.index.to_period()
PeriodIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11'], dtype='period[B]')
df2.index
PeriodIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11'], dtype='period[B]')
df2.index.values
array([Period('2000-01-03', 'B'), Period('2000-01-04', 'B'), Period('2000-01-05', 'B'), Period('2000-01-06', 'B'), Period('2000-01-07', 'B'), Period('2000-01-10', 'B'), Period('2000-01-11', 'B'), Period('2000-01-12', 'B'), Period('2000-01-13', 'B'), Period('2000-01-14', 'B'), Period('2000-01-17', 'B'), Period('2000-01-18', 'B'), Period('2000-01-19', 'B'), Period('2000-01-20', 'B'), Period('2000-01-21', 'B'), Period('2000-01-24', 'B'), Period('2000-01-25', 'B'), Period('2000-01-26', 'B'), Period('2000-01-27', 'B'), Period('2000-01-28', 'B'), Period('2000-01-31', 'B'), Period('2000-02-01', 'B'), Period('2000-02-02', 'B'), Period('2000-02-03', 'B'), Period('2000-02-04', 'B'), Period('2000-02-07', 'B'), Period('2000-02-08', 'B'), Period('2000-02-09', 'B'), Period('2000-02-10', 'B'), Period('2000-02-11', 'B')], dtype=object)
df2.index.week
Int64Index([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6], dtype='int64')
df2.index.day_of_week
Int64Index([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype='int64')
df2.index.weekofyear
Int64Index([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6], dtype='int64')
df2.index.to_timestamp()
DatetimeIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11'], dtype='datetime64[ns]', freq='B')
df2.index.shift() - df2.index
Index([<BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>], dtype='object')
df2.index[1:].append(pd.Index([np.nan])) # 因为shift直接平移日期。若需直接平移数值,可用这个替代。
PeriodIndex(['2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11', 'NaT'], dtype='period[B]')
df2.index[1:].append(pd.Index([np.nan])).asfreq('D') - df2.index.asfreq('D')
Index([ <Day>, <Day>, <Day>, <Day>, <3 * Days>, <Day>, <Day>, <Day>, <Day>, <3 * Days>, <Day>, <Day>, <Day>, <Day>, <3 * Days>, <Day>, <Day>, <Day>, <Day>, <3 * Days>, <Day>, <Day>, <Day>, <Day>, <3 * Days>, <Day>, <Day>, <Day>, <Day>, NaT], dtype='object')
df = pd.read_pickle('../../../data/stk_df_2015_2021.pkl')
df
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-27 | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-28 | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-29 | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-30 | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-31 | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 12 columns
display(df)
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-27 | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-28 | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-29 | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-30 | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-31 | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 12 columns
temp = pd.read_csv('../../../data/idx_df_csv.csv',dtype={'ticker':str})
temp.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1365 entries, 0 to 1364 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ticker 1365 non-null object 1 secShortName 1365 non-null object 2 tradeDate 1365 non-null object 3 preCloseIndex 1365 non-null float64 4 openIndex 1365 non-null float64 5 highestIndex 1365 non-null float64 6 lowestIndex 1365 non-null float64 7 closeIndex 1365 non-null float64 8 turnoverVol 1365 non-null int64 dtypes: float64(5), int64(1), object(3) memory usage: 96.1+ KB
temp
ticker | secShortName | tradeDate | preCloseIndex | openIndex | highestIndex | lowestIndex | closeIndex | turnoverVol | |
---|---|---|---|---|---|---|---|---|---|
0 | 000001 | 上证综指 | 2021-01-04 | 3473.0693 | 3474.6793 | 3511.6554 | 3457.2061 | 3502.9584 | 38079080000 |
1 | 000001 | 上证综指 | 2021-01-05 | 3502.9584 | 3492.1912 | 3528.6767 | 3484.7151 | 3528.6767 | 40799593400 |
2 | 000001 | 上证综指 | 2021-01-06 | 3528.6767 | 3530.9072 | 3556.8022 | 3513.1262 | 3550.8767 | 37023092600 |
3 | 000001 | 上证综指 | 2021-01-07 | 3550.8767 | 3552.9087 | 3576.2046 | 3526.6174 | 3576.2046 | 40534822600 |
4 | 000001 | 上证综指 | 2021-01-08 | 3576.2046 | 3577.6923 | 3588.0625 | 3544.8912 | 3570.1082 | 34555789600 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1360 | 399106 | 深证综指 | 2022-02-15 | 2253.1310 | 2256.8670 | 2283.6760 | 2251.3000 | 2283.6300 | 37139160803 |
1361 | 399106 | 深证综指 | 2022-02-16 | 2283.6300 | 2293.0800 | 2304.2800 | 2285.6750 | 2296.9930 | 37530877510 |
1362 | 399106 | 深证综指 | 2022-02-17 | 2296.9930 | 2293.4480 | 2314.8270 | 2287.8960 | 2302.1620 | 42928561256 |
1363 | 399106 | 深证综指 | 2022-02-18 | 2302.1620 | 2287.2640 | 2311.8220 | 2284.1220 | 2311.7900 | 38837049683 |
1364 | 399106 | 深证综指 | 2022-02-21 | 2311.7900 | 2313.5060 | 2325.9670 | 2309.7340 | 2325.7960 | 44801214227 |
1365 rows × 9 columns
# 如果读取csv或其他类型的文件, 可能会遇到一些格式转换的问题
pd.read_csv('../../../data/idx_df_csv.csv')
ticker | secShortName | tradeDate | preCloseIndex | openIndex | highestIndex | lowestIndex | closeIndex | turnoverVol | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 上证综指 | 2021-01-04 | 3473.0693 | 3474.6793 | 3511.6554 | 3457.2061 | 3502.9584 | 38079080000 |
1 | 1 | 上证综指 | 2021-01-05 | 3502.9584 | 3492.1912 | 3528.6767 | 3484.7151 | 3528.6767 | 40799593400 |
2 | 1 | 上证综指 | 2021-01-06 | 3528.6767 | 3530.9072 | 3556.8022 | 3513.1262 | 3550.8767 | 37023092600 |
3 | 1 | 上证综指 | 2021-01-07 | 3550.8767 | 3552.9087 | 3576.2046 | 3526.6174 | 3576.2046 | 40534822600 |
4 | 1 | 上证综指 | 2021-01-08 | 3576.2046 | 3577.6923 | 3588.0625 | 3544.8912 | 3570.1082 | 34555789600 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1360 | 399106 | 深证综指 | 2022-02-15 | 2253.1310 | 2256.8670 | 2283.6760 | 2251.3000 | 2283.6300 | 37139160803 |
1361 | 399106 | 深证综指 | 2022-02-16 | 2283.6300 | 2293.0800 | 2304.2800 | 2285.6750 | 2296.9930 | 37530877510 |
1362 | 399106 | 深证综指 | 2022-02-17 | 2296.9930 | 2293.4480 | 2314.8270 | 2287.8960 | 2302.1620 | 42928561256 |
1363 | 399106 | 深证综指 | 2022-02-18 | 2302.1620 | 2287.2640 | 2311.8220 | 2284.1220 | 2311.7900 | 38837049683 |
1364 | 399106 | 深证综指 | 2022-02-21 | 2311.7900 | 2313.5060 | 2325.9670 | 2309.7340 | 2325.7960 | 44801214227 |
1365 rows × 9 columns
df.head(10)
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
5 | 000001.XSHE | 平安银行 | XSHE | 2015-01-12 | 1231.004 | 1205.698 | 2696688 | 2.293105e+09 | 64376.0 | 0.0158 | 1.452882e+11 | 1.687457e+11 |
6 | 000001.XSHE | 平安银行 | XSHE | 2015-01-13 | 1205.698 | 1198.351 | 1418187 | 1.204987e+09 | 34924.0 | 0.0083 | 1.444029e+11 | 1.677175e+11 |
7 | 000001.XSHE | 平安银行 | XSHE | 2015-01-14 | 1198.351 | 1208.963 | 2192762 | 1.889297e+09 | 50675.0 | 0.0128 | 1.456817e+11 | 1.692027e+11 |
8 | 000001.XSHE | 平安银行 | XSHE | 2015-01-15 | 1208.963 | 1253.044 | 2156548 | 1.868796e+09 | 54470.0 | 0.0126 | 1.509935e+11 | 1.753721e+11 |
9 | 000001.XSHE | 平安银行 | XSHE | 2015-01-16 | 1253.044 | 1254.677 | 2701125 | 2.403346e+09 | 60637.0 | 0.0158 | 1.511903e+11 | 1.756006e+11 |
df.tail()
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
5838745 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-27 | 0.625 | 0.638 | 488511 | 308154.0 | 226.0 | 0.0027 | 116472000.0 | 220917000.0 |
5838746 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-28 | 0.638 | 0.637 | 177702 | 111881.0 | 64.0 | 0.0010 | 116288000.0 | 220568000.0 |
5838747 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-29 | 0.637 | 0.630 | 123550 | 77333.0 | 58.0 | 0.0007 | 115000000.0 | 218125000.0 |
5838748 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-30 | 0.630 | 0.635 | 113600 | 71308.0 | 41.0 | 0.0006 | 115920000.0 | 219870000.0 |
5838749 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-31 | 0.635 | 0.636 | 167800 | 105996.0 | 74.0 | 0.0009 | 116104000.0 | 220219000.0 |
df.describe()
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
count | 5.838750e+06 | 5.838750e+06 | 5.838750e+06 | 5.838750e+06 | 5.820845e+06 | 5.838750e+06 | 5.838750e+06 | 5.838750e+06 |
mean | 1.368568e+02 | 1.368763e+02 | 5.525219e+06 | 2.008992e+08 | 1.078089e+04 | 2.986831e-02 | 1.324705e+10 | 1.835989e+10 |
std | 2.086103e+03 | 2.085805e+03 | 2.405725e+07 | 4.866879e+08 | 1.874182e+04 | 4.649860e-02 | 5.686519e+10 | 7.522905e+10 |
min | 3.800000e-02 | 3.800000e-02 | 2.000000e+00 | 1.800000e+01 | 1.000000e+00 | 0.000000e+00 | 3.800000e+06 | 1.045000e+07 |
25% | 2.167700e+01 | 2.168600e+01 | 8.454962e+05 | 2.916335e+07 | 2.565000e+03 | 6.900000e-03 | 2.203200e+09 | 3.527868e+09 |
50% | 3.972300e+01 | 3.973600e+01 | 1.864330e+06 | 7.414118e+07 | 5.410000e+03 | 1.510000e-02 | 4.304763e+09 | 6.153206e+09 |
75% | 7.390000e+01 | 7.392400e+01 | 4.317285e+06 | 1.915373e+08 | 1.179500e+04 | 3.380000e-02 | 9.130280e+09 | 1.286893e+10 |
max | 4.024782e+05 | 4.024782e+05 | 5.109897e+09 | 6.794128e+10 | 3.274456e+06 | 9.896000e-01 | 3.267370e+12 | 3.267370e+12 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5838750 entries, 0 to 5838749 Data columns (total 12 columns): # Column Dtype --- ------ ----- 0 secID object 1 secShortName object 2 exchangeCD object 3 tradeDate object 4 preClosePrice float64 5 closePrice float64 6 turnoverVol int64 7 turnoverValue float64 8 dealAmount float64 9 turnoverRate float64 10 negMarketValue float64 11 marketValue float64 dtypes: float64(7), int64(1), object(4) memory usage: 534.6+ MB
df['secShortName'].nunique() # 多少只股票
4853
df['turnoverValue'].mean()
200899200.96656865
df.select_dtypes(include=np.number)
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
0 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 8 columns
df.select_dtypes(include=np.number).mean()
preClosePrice 1.368568e+02 closePrice 1.368763e+02 turnoverVol 5.525219e+06 turnoverValue 2.008992e+08 dealAmount 1.078089e+04 turnoverRate 2.986831e-02 negMarketValue 1.324705e+10 marketValue 1.835989e+10 dtype: float64
df.select_dtypes(include=np.number).mean(axis=1)
0 4.314767e+10 1 4.237068e+10 2 4.147094e+10 3 4.002552e+10 4 4.055811e+10 ... 5838745 4.227324e+07 5838746 4.214321e+07 5838747 4.166574e+07 5838748 4.199687e+07 5838749 4.207461e+07 Length: 5838750, dtype: float64
df_pingan = df[df['secShortName']=='平安银行'].copy()
df_pingan
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1700 | 000001.XSHE | 平安银行 | XSHE | 2021-12-27 | 2224.866 | 2213.298 | 881462 | 1.260455e+09 | 66911.0 | 0.0038 | 3.341671e+11 | 3.341699e+11 |
1701 | 000001.XSHE | 平安银行 | XSHE | 2021-12-28 | 2213.298 | 2206.872 | 1358314 | 1.934461e+09 | 89671.0 | 0.0058 | 3.331968e+11 | 3.331996e+11 |
1702 | 000001.XSHE | 平安银行 | XSHE | 2021-12-29 | 2206.872 | 2152.889 | 1771527 | 2.480535e+09 | 147243.0 | 0.0076 | 3.250464e+11 | 3.250491e+11 |
1703 | 000001.XSHE | 平安银行 | XSHE | 2021-12-30 | 2152.889 | 2161.886 | 960485 | 1.342374e+09 | 66792.0 | 0.0041 | 3.264048e+11 | 3.264075e+11 |
1704 | 000001.XSHE | 平安银行 | XSHE | 2021-12-31 | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
1705 rows × 12 columns
object
和str
有什么区别:
一些细节:numpy 中允许 object 和 str 两种字符串的格式。但pandas没有采用numpy的这种格式,而是用原生python的字符串格式。numpy的格式更为复杂。例:(来自https://stackoverflow.com/questions/34881079/pandas-distinction-between-str-and-object-types)
x = np.array(['Testing', 'a', 'string'], dtype='|S7') # 长度为7的字符串。|是byteorder指示符
y = np.array(['Testing', 'a', 'string'], dtype=object)
x[1] = 'a really really really long string'
x
array([b'Testing', b'a reall', b'string'], dtype='|S7')
y[1] = 'a really really really long string'
y
array(['Testing', 'a really really really long string', 'string'], dtype=object)
pandas中不允许用固定长度的str
temp = pd.DataFrame({'a':range(5)})
temp['a'].dtype
dtype('int64')
temp['a'].astype(object).dtype
dtype('O')
temp['a'].astype(str).dtype
dtype('O')
temp = tm.makeTimeDataFrame(3)
display(temp)
A | B | C | D | |
---|---|---|---|---|
2000-01-03 | -0.051528 | -0.582815 | 0.392680 | -1.395863 |
2000-01-04 | 0.538363 | -1.165366 | -0.394174 | -0.829585 |
2000-01-05 | 0.952806 | 1.578885 | -1.252698 | 0.734604 |
temp2 = temp.stack().to_frame().reset_index()
temp2.columns = ['date','variable','value']
temp2
date | variable | value | |
---|---|---|---|
0 | 2000-01-03 | A | -0.051528 |
1 | 2000-01-03 | B | -0.582815 |
2 | 2000-01-03 | C | 0.392680 |
3 | 2000-01-03 | D | -1.395863 |
4 | 2000-01-04 | A | 0.538363 |
5 | 2000-01-04 | B | -1.165366 |
6 | 2000-01-04 | C | -0.394174 |
7 | 2000-01-04 | D | -0.829585 |
8 | 2000-01-05 | A | 0.952806 |
9 | 2000-01-05 | B | 1.578885 |
10 | 2000-01-05 | C | -1.252698 |
11 | 2000-01-05 | D | 0.734604 |
temp2.sort_values(by='value')
date | variable | value | |
---|---|---|---|
3 | 2000-01-03 | D | -1.395863 |
10 | 2000-01-05 | C | -1.252698 |
5 | 2000-01-04 | B | -1.165366 |
7 | 2000-01-04 | D | -0.829585 |
1 | 2000-01-03 | B | -0.582815 |
6 | 2000-01-04 | C | -0.394174 |
0 | 2000-01-03 | A | -0.051528 |
2 | 2000-01-03 | C | 0.392680 |
4 | 2000-01-04 | A | 0.538363 |
11 | 2000-01-05 | D | 0.734604 |
8 | 2000-01-05 | A | 0.952806 |
9 | 2000-01-05 | B | 1.578885 |
temp2.sort_values(by='variable',ascending=False)
date | variable | value | |
---|---|---|---|
3 | 2000-01-03 | D | -1.395863 |
7 | 2000-01-04 | D | -0.829585 |
11 | 2000-01-05 | D | 0.734604 |
2 | 2000-01-03 | C | 0.392680 |
6 | 2000-01-04 | C | -0.394174 |
10 | 2000-01-05 | C | -1.252698 |
1 | 2000-01-03 | B | -0.582815 |
5 | 2000-01-04 | B | -1.165366 |
9 | 2000-01-05 | B | 1.578885 |
0 | 2000-01-03 | A | -0.051528 |
4 | 2000-01-04 | A | 0.538363 |
8 | 2000-01-05 | A | 0.952806 |
temp2.duplicated('variable')
0 False 1 False 2 False 3 False 4 True 5 True 6 True 7 True 8 True 9 True 10 True 11 True dtype: bool
temp2.drop_duplicates('variable')
date | variable | value | |
---|---|---|---|
0 | 2000-01-03 | A | -0.051528 |
1 | 2000-01-03 | B | -0.582815 |
2 | 2000-01-03 | C | 0.392680 |
3 | 2000-01-03 | D | -1.395863 |
temp2.drop_duplicates('variable',keep='last')
date | variable | value | |
---|---|---|---|
8 | 2000-01-05 | A | 0.952806 |
9 | 2000-01-05 | B | 1.578885 |
10 | 2000-01-05 | C | -1.252698 |
11 | 2000-01-05 | D | 0.734604 |
temp = tm.makeDataFrame()
temp['A'] = np.random.randint(0, 100, size=temp.shape[0])
temp.drop(['B','C','D'],axis=1, inplace=True)
temp.columns = ['score']
temp
score | |
---|---|
leMJDtok1y | 70 |
tHQMKBjt4g | 87 |
UHQZfBqS9C | 29 |
HnNcYZiPS3 | 27 |
Q8E0jeFgcZ | 63 |
V3O7nacCx6 | 5 |
A21m7MIYO0 | 93 |
jPRB2EYMJ9 | 6 |
lyVA6FSPDQ | 35 |
xRVJg7Noqk | 88 |
GWdYMjt64S | 51 |
tsy4vUUXuW | 37 |
aqO7XSrLqj | 75 |
ozmzHJ0hio | 48 |
R5iMCRIpOp | 51 |
f2WbBxzk3u | 53 |
NmyCgrqM22 | 79 |
kn8e4znWFQ | 73 |
61oSoW6COF | 39 |
slhMjph9xG | 62 |
P6px9D4zmM | 1 |
hZPrwuiO1U | 28 |
dY9aIT4EX3 | 59 |
eT1vVe47Lq | 85 |
7HcEWFCsMB | 83 |
3Wj1dUlcme | 62 |
AuRMTRsaSF | 90 |
R7dGWFgtRe | 92 |
Y5Y887ynFq | 27 |
xWjpVzhPye | 61 |
?pd.cut
Signature: pd.cut( x, bins, right: 'bool' = True, labels=None, retbins: 'bool' = False, precision: 'int' = 3, include_lowest: 'bool' = False, duplicates: 'str' = 'raise', ordered: 'bool' = True, ) Docstring: Bin values into discrete intervals. Use `cut` when you need to segment and sort data values into bins. This function is also useful for going from a continuous variable to a categorical variable. For example, `cut` could convert ages to groups of age ranges. Supports binning into an equal number of bins, or a pre-specified array of bins. Parameters ---------- x : array-like The input array to be binned. Must be 1-dimensional. bins : int, sequence of scalars, or IntervalIndex The criteria to bin by. * int : Defines the number of equal-width bins in the range of `x`. The range of `x` is extended by .1% on each side to include the minimum and maximum values of `x`. * sequence of scalars : Defines the bin edges allowing for non-uniform width. No extension of the range of `x` is done. * IntervalIndex : Defines the exact bins to be used. Note that IntervalIndex for `bins` must be non-overlapping. right : bool, default True Indicates whether `bins` includes the rightmost edge or not. If ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]`` indicate (1,2], (2,3], (3,4]. This argument is ignored when `bins` is an IntervalIndex. labels : array or False, default None Specifies the labels for the returned bins. Must be the same length as the resulting bins. If False, returns only integer indicators of the bins. This affects the type of the output container (see below). This argument is ignored when `bins` is an IntervalIndex. If True, raises an error. When `ordered=False`, labels must be provided. retbins : bool, default False Whether to return the bins or not. Useful when bins is provided as a scalar. precision : int, default 3 The precision at which to store and display the bins labels. include_lowest : bool, default False Whether the first interval should be left-inclusive or not. duplicates : {default 'raise', 'drop'}, optional If bin edges are not unique, raise ValueError or drop non-uniques. ordered : bool, default True Whether the labels are ordered or not. Applies to returned types Categorical and Series (with Categorical dtype). If True, the resulting categorical will be ordered. If False, the resulting categorical will be unordered (labels must be provided). .. versionadded:: 1.1.0 Returns ------- out : Categorical, Series, or ndarray An array-like object representing the respective bin for each value of `x`. The type depends on the value of `labels`. * None (default) : returns a Series for Series `x` or a Categorical for all other inputs. The values stored within are Interval dtype. * sequence of scalars : returns a Series for Series `x` or a Categorical for all other inputs. The values stored within are whatever the type in the sequence is. * False : returns an ndarray of integers. bins : numpy.ndarray or IntervalIndex. The computed or specified bins. Only returned when `retbins=True`. For scalar or sequence `bins`, this is an ndarray with the computed bins. If set `duplicates=drop`, `bins` will drop non-unique bin. For an IntervalIndex `bins`, this is equal to `bins`. See Also -------- qcut : Discretize variable into equal-sized buckets based on rank or based on sample quantiles. Categorical : Array type for storing data that come from a fixed set of values. Series : One-dimensional array with axis labels (including time series). IntervalIndex : Immutable Index implementing an ordered, sliceable set. Notes ----- Any NA values will be NA in the result. Out of bounds values will be NA in the resulting Series or Categorical object. Reference :ref:`the user guide <reshaping.tile.cut>` for more examples. Examples -------- Discretize into three equal-sized bins. >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3) ... # doctest: +ELLIPSIS [(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ... >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True) ... # doctest: +ELLIPSIS ([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ... array([0.994, 3. , 5. , 7. ])) Discovers the same bins, but assign them specific labels. Notice that the returned Categorical's categories are `labels` and is ordered. >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), ... 3, labels=["bad", "medium", "good"]) ['bad', 'good', 'medium', 'medium', 'good', 'bad'] Categories (3, object): ['bad' < 'medium' < 'good'] ``ordered=False`` will result in unordered categories when labels are passed. This parameter can be used to allow non-unique labels: >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, ... labels=["B", "A", "B"], ordered=False) ['B', 'B', 'A', 'A', 'B', 'B'] Categories (2, object): ['A', 'B'] ``labels=False`` implies you just want the bins back. >>> pd.cut([0, 1, 1, 2], bins=4, labels=False) array([0, 1, 1, 3]) Passing a Series as an input returns a Series with categorical dtype: >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), ... index=['a', 'b', 'c', 'd', 'e']) >>> pd.cut(s, 3) ... # doctest: +ELLIPSIS a (1.992, 4.667] b (1.992, 4.667] c (4.667, 7.333] d (7.333, 10.0] e (7.333, 10.0] dtype: category Categories (3, interval[float64, right]): [(1.992, 4.667] < (4.667, ... Passing a Series as an input returns a Series with mapping value. It is used to map numerically to intervals based on bins. >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), ... index=['a', 'b', 'c', 'd', 'e']) >>> pd.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False) ... # doctest: +ELLIPSIS (a 1.0 b 2.0 c 3.0 d 4.0 e NaN dtype: float64, array([ 0, 2, 4, 6, 8, 10])) Use `drop` optional when bins is not unique >>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True, ... right=False, duplicates='drop') ... # doctest: +ELLIPSIS (a 1.0 b 2.0 c 3.0 d 3.0 e NaN dtype: float64, array([ 0, 2, 4, 6, 10])) Passing an IntervalIndex for `bins` results in those categories exactly. Notice that values not covered by the IntervalIndex are set to NaN. 0 is to the left of the first bin (which is closed on the right), and 1.5 falls between two bins. >>> bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)]) >>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins) [NaN, (0.0, 1.0], NaN, (2.0, 3.0], (4.0, 5.0]] Categories (3, interval[int64, right]): [(0, 1] < (2, 3] < (4, 5]] File: ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/reshape/tile.py Type: function
temp['score'][3] = 0
bins = [0, 25, 50, 75, 100]
pd.cut(temp['score'], bins=bins, labels=['D','C','B','A'])
leMJDtok1y B tHQMKBjt4g A UHQZfBqS9C C HnNcYZiPS3 NaN Q8E0jeFgcZ B V3O7nacCx6 D A21m7MIYO0 A jPRB2EYMJ9 D lyVA6FSPDQ C xRVJg7Noqk A GWdYMjt64S B tsy4vUUXuW C aqO7XSrLqj B ozmzHJ0hio C R5iMCRIpOp B f2WbBxzk3u B NmyCgrqM22 A kn8e4znWFQ B 61oSoW6COF C slhMjph9xG B P6px9D4zmM D hZPrwuiO1U C dY9aIT4EX3 B eT1vVe47Lq A 7HcEWFCsMB A 3Wj1dUlcme B AuRMTRsaSF A R7dGWFgtRe A Y5Y887ynFq C xWjpVzhPye B Name: score, dtype: category Categories (4, object): ['D' < 'C' < 'B' < 'A']
bins = [0, 25, 50, 75, 100]
pd.cut(temp['score'], bins=bins, labels=['D','C','B','A'],include_lowest=True)
leMJDtok1y B tHQMKBjt4g A UHQZfBqS9C C HnNcYZiPS3 D Q8E0jeFgcZ B V3O7nacCx6 D A21m7MIYO0 A jPRB2EYMJ9 D lyVA6FSPDQ C xRVJg7Noqk A GWdYMjt64S B tsy4vUUXuW C aqO7XSrLqj B ozmzHJ0hio C R5iMCRIpOp B f2WbBxzk3u B NmyCgrqM22 A kn8e4znWFQ B 61oSoW6COF C slhMjph9xG B P6px9D4zmM D hZPrwuiO1U C dY9aIT4EX3 B eT1vVe47Lq A 7HcEWFCsMB A 3Wj1dUlcme B AuRMTRsaSF A R7dGWFgtRe A Y5Y887ynFq C xWjpVzhPye B Name: score, dtype: category Categories (4, object): ['D' < 'C' < 'B' < 'A']
Useful resources: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
df_pingan
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1700 | 000001.XSHE | 平安银行 | XSHE | 2021-12-27 | 2224.866 | 2213.298 | 881462 | 1.260455e+09 | 66911.0 | 0.0038 | 3.341671e+11 | 3.341699e+11 |
1701 | 000001.XSHE | 平安银行 | XSHE | 2021-12-28 | 2213.298 | 2206.872 | 1358314 | 1.934461e+09 | 89671.0 | 0.0058 | 3.331968e+11 | 3.331996e+11 |
1702 | 000001.XSHE | 平安银行 | XSHE | 2021-12-29 | 2206.872 | 2152.889 | 1771527 | 2.480535e+09 | 147243.0 | 0.0076 | 3.250464e+11 | 3.250491e+11 |
1703 | 000001.XSHE | 平安银行 | XSHE | 2021-12-30 | 2152.889 | 2161.886 | 960485 | 1.342374e+09 | 66792.0 | 0.0041 | 3.264048e+11 | 3.264075e+11 |
1704 | 000001.XSHE | 平安银行 | XSHE | 2021-12-31 | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
1705 rows × 12 columns
df_pingan.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1705 entries, 0 to 1704 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 secID 1705 non-null object 1 secShortName 1705 non-null object 2 exchangeCD 1705 non-null object 3 tradeDate 1705 non-null object 4 preClosePrice 1705 non-null float64 5 closePrice 1705 non-null float64 6 turnoverVol 1705 non-null int64 7 turnoverValue 1705 non-null float64 8 dealAmount 1705 non-null float64 9 turnoverRate 1705 non-null float64 10 negMarketValue 1705 non-null float64 11 marketValue 1705 non-null float64 dtypes: float64(7), int64(1), object(4) memory usage: 173.2+ KB
df_pingan['tradeDate'] = pd.to_datetime(df_pingan['tradeDate'], format='%Y-%m-%d')
df_pingan.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1705 entries, 0 to 1704 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 secID 1705 non-null object 1 secShortName 1705 non-null object 2 exchangeCD 1705 non-null object 3 tradeDate 1705 non-null datetime64[ns] 4 preClosePrice 1705 non-null float64 5 closePrice 1705 non-null float64 6 turnoverVol 1705 non-null int64 7 turnoverValue 1705 non-null float64 8 dealAmount 1705 non-null float64 9 turnoverRate 1705 non-null float64 10 negMarketValue 1705 non-null float64 11 marketValue 1705 non-null float64 dtypes: datetime64[ns](1), float64(7), int64(1), object(3) memory usage: 173.2+ KB
df_pingan.set_index('tradeDate', inplace=True)
df_pingan
secID | secShortName | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
tradeDate | |||||||||||
2015-01-05 | 000001.XSHE | 平安银行 | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | 平安银行 | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2015-01-07 | 000001.XSHE | 平安银行 | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
2015-01-08 | 000001.XSHE | 平安银行 | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
2015-01-09 | 000001.XSHE | 平安银行 | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-12-27 | 000001.XSHE | 平安银行 | XSHE | 2224.866 | 2213.298 | 881462 | 1.260455e+09 | 66911.0 | 0.0038 | 3.341671e+11 | 3.341699e+11 |
2021-12-28 | 000001.XSHE | 平安银行 | XSHE | 2213.298 | 2206.872 | 1358314 | 1.934461e+09 | 89671.0 | 0.0058 | 3.331968e+11 | 3.331996e+11 |
2021-12-29 | 000001.XSHE | 平安银行 | XSHE | 2206.872 | 2152.889 | 1771527 | 2.480535e+09 | 147243.0 | 0.0076 | 3.250464e+11 | 3.250491e+11 |
2021-12-30 | 000001.XSHE | 平安银行 | XSHE | 2152.889 | 2161.886 | 960485 | 1.342374e+09 | 66792.0 | 0.0041 | 3.264048e+11 | 3.264075e+11 |
2021-12-31 | 000001.XSHE | 平安银行 | XSHE | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
1705 rows × 11 columns
BM: Business Month
df_pingan.resample('BM').last()
secID | secShortName | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
tradeDate | |||||||||||
2015-01-30 | 000001.XSHE | 平安银行 | XSHE | 1134.678 | 1137.127 | 1614788 | 1.298736e+09 | 42061.0 | 0.0095 | 1.370254e+11 | 1.591488e+11 |
2015-02-27 | 000001.XSHE | 平安银行 | XSHE | 1148.556 | 1142.025 | 1377727 | 1.115146e+09 | 36250.0 | 0.0081 | 1.376156e+11 | 1.598343e+11 |
2015-03-31 | 000001.XSHE | 平安银行 | XSHE | 1278.350 | 1285.697 | 4745114 | 4.393135e+09 | 114032.0 | 0.0278 | 1.549282e+11 | 1.799421e+11 |
2015-04-30 | 000001.XSHE | 平安银行 | XSHE | 1645.458 | 1650.399 | 4182058 | 4.899380e+09 | 120845.0 | 0.0245 | 1.971277e+11 | 2.289549e+11 |
2015-05-29 | 000001.XSHE | 平安银行 | XSHE | 1531.808 | 1514.019 | 2915126 | 3.103544e+09 | 99260.0 | 0.0171 | 1.808381e+11 | 2.192089e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-08-31 | 000001.XSHE | 平安银行 | XSHE | 2277.563 | 2287.846 | 1416461 | 2.072007e+09 | 119791.0 | 0.0061 | 3.454224e+11 | 3.454253e+11 |
2021-09-30 | 000001.XSHE | 平安银行 | XSHE | 2332.832 | 2304.555 | 959216 | 1.424676e+09 | 70140.0 | 0.0041 | 3.479452e+11 | 3.479481e+11 |
2021-10-29 | 000001.XSHE | 平安银行 | XSHE | 2534.625 | 2506.348 | 650958 | 1.052760e+09 | 76331.0 | 0.0028 | 3.784122e+11 | 3.784154e+11 |
2021-11-30 | 000001.XSHE | 平安银行 | XSHE | 2250.572 | 2241.575 | 884472 | 1.280385e+09 | 65260.0 | 0.0038 | 3.384364e+11 | 3.384392e+11 |
2021-12-31 | 000001.XSHE | 平安银行 | XSHE | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
84 rows × 11 columns
df_pingan.resample('M').mean(numeric_only=True)
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
tradeDate | ||||||||
2015-01-31 | 1204.922300 | 1197.126450 | 2.685951e+06 | 2.293232e+09 | 64189.250000 | 0.015725 | 1.442554e+11 | 1.675461e+11 |
2015-02-28 | 1127.222600 | 1127.549133 | 1.506542e+06 | 1.202904e+09 | 37079.066667 | 0.008827 | 1.358712e+11 | 1.578083e+11 |
2015-03-31 | 1193.861227 | 1200.391773 | 2.966580e+06 | 2.552488e+09 | 75256.318182 | 0.017386 | 1.446489e+11 | 1.680031e+11 |
2015-04-30 | 1543.791762 | 1561.158524 | 4.287529e+06 | 4.712147e+09 | 138826.190476 | 0.025114 | 1.869631e+11 | 2.171492e+11 |
2015-05-31 | 1577.762100 | 1570.943100 | 2.897439e+06 | 3.198760e+09 | 96879.850000 | 0.016965 | 1.876373e+11 | 2.213028e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-08-31 | 2440.388364 | 2441.031045 | 1.143970e+06 | 1.800585e+09 | 96686.318182 | 0.004891 | 3.685506e+11 | 3.685537e+11 |
2021-09-30 | 2390.798950 | 2391.634400 | 1.659547e+06 | 2.550127e+09 | 126351.950000 | 0.007095 | 3.610926e+11 | 3.610956e+11 |
2021-10-31 | 2504.098750 | 2516.710812 | 1.081450e+06 | 1.752654e+09 | 97972.312500 | 0.004625 | 3.799768e+11 | 3.799800e+11 |
2021-11-30 | 2318.985409 | 2306.950273 | 1.188170e+06 | 1.768122e+09 | 94558.681818 | 0.005073 | 3.483068e+11 | 3.483098e+11 |
2021-12-31 | 2270.019130 | 2264.654348 | 1.342157e+06 | 1.968544e+09 | 96840.304348 | 0.005735 | 3.419210e+11 | 3.419238e+11 |
84 rows × 8 columns
df_pingan.loc['2015']
secID | secShortName | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
tradeDate | |||||||||||
2015-01-05 | 000001.XSHE | 平安银行 | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | 平安银行 | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2015-01-07 | 000001.XSHE | 平安银行 | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
2015-01-08 | 000001.XSHE | 平安银行 | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
2015-01-09 | 000001.XSHE | 平安银行 | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2015-12-25 | 000001.XSHE | 平安银行 | XSHE | 1219.517 | 1226.435 | 578480 | 4.956381e+08 | 19587.0 | 0.0034 | 1.464883e+11 | 1.775707e+11 |
2015-12-28 | 000001.XSHE | 平安银行 | XSHE | 1226.435 | 1183.939 | 1189828 | 1.003010e+09 | 33043.0 | 0.0070 | 1.414126e+11 | 1.714179e+11 |
2015-12-29 | 000001.XSHE | 平安银行 | XSHE | 1183.939 | 1194.810 | 896705 | 7.447184e+08 | 20170.0 | 0.0053 | 1.427110e+11 | 1.729919e+11 |
2015-12-30 | 000001.XSHE | 平安银行 | XSHE | 1194.810 | 1195.798 | 770642 | 6.412773e+08 | 18064.0 | 0.0045 | 1.428291e+11 | 1.731350e+11 |
2015-12-31 | 000001.XSHE | 平安银行 | XSHE | 1195.798 | 1184.928 | 710734 | 5.916436e+08 | 14765.0 | 0.0042 | 1.415306e+11 | 1.715610e+11 |
244 rows × 11 columns
df_pingan.loc['2015-01':'2018-03']
secID | secShortName | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
tradeDate | |||||||||||
2015-01-05 | 000001.XSHE | 平安银行 | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | 平安银行 | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2015-01-07 | 000001.XSHE | 平安银行 | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
2015-01-08 | 000001.XSHE | 平安银行 | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
2015-01-09 | 000001.XSHE | 平安银行 | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2018-03-26 | 000001.XSHE | 平安银行 | XSHE | 1384.775 | 1334.708 | 1668113 | 1.519435e+09 | 63646.0 | 0.0082 | 1.849136e+11 | 1.876726e+11 |
2018-03-27 | 000001.XSHE | 平安银行 | XSHE | 1334.708 | 1335.929 | 1330940 | 1.213990e+09 | 49311.0 | 0.0065 | 1.850827e+11 | 1.878443e+11 |
2018-03-28 | 000001.XSHE | 平安银行 | XSHE | 1335.929 | 1329.824 | 1325020 | 1.201023e+09 | 45201.0 | 0.0065 | 1.842368e+11 | 1.869858e+11 |
2018-03-29 | 000001.XSHE | 平安银行 | XSHE | 1329.824 | 1349.362 | 1604219 | 1.445282e+09 | 61870.0 | 0.0079 | 1.869437e+11 | 1.897330e+11 |
2018-03-30 | 000001.XSHE | 平安银行 | XSHE | 1349.362 | 1331.045 | 906846 | 8.234382e+08 | 31869.0 | 0.0044 | 1.844060e+11 | 1.871575e+11 |
791 rows × 11 columns
注意:pandas中的apply操作有时会很慢(包括groupby apply)
df
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-27 | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-28 | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-29 | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-30 | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-31 | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 12 columns
df.select_dtypes(np.number).apply(lambda x: x**2+3)
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
0 | 1.671966e+06 | 1.710179e+06 | 24661553281603 | 2.084277e+19 | 8.552180e+09 | 3.000847 | 2.483276e+22 | 3.349881e+22 |
1 | 1.710179e+06 | 1.659323e+06 | 14146264367107 | 1.192629e+19 | 6.452106e+09 | 3.000484 | 2.409428e+22 | 3.250262e+22 |
2 | 1.659323e+06 | 1.596829e+06 | 8711948463204 | 6.942152e+18 | 5.284854e+09 | 3.000299 | 2.318685e+22 | 3.127853e+22 |
3 | 1.596829e+06 | 1.491352e+06 | 5972896490404 | 4.528399e+18 | 4.724363e+09 | 3.000204 | 2.165525e+22 | 2.921242e+22 |
4 | 1.491352e+06 | 1.515374e+06 | 18966364691524 | 1.471013e+19 | 9.976414e+09 | 3.000650 | 2.200405e+22 | 2.968295e+22 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 3.390625e+00 | 3.407044e+00 | 238642997124 | 9.495889e+10 | 5.107900e+04 | 3.000007 | 1.356573e+16 | 4.880432e+16 |
5838746 | 3.407044e+00 | 3.405769e+00 | 31578000807 | 1.251736e+10 | 4.099000e+03 | 3.000001 | 1.352290e+16 | 4.865024e+16 |
5838747 | 3.405769e+00 | 3.396900e+00 | 15264602503 | 5.980393e+09 | 3.367000e+03 | 3.000000 | 1.322500e+16 | 4.757852e+16 |
5838748 | 3.396900e+00 | 3.403225e+00 | 12904960003 | 5.084831e+09 | 1.684000e+03 | 3.000000 | 1.343745e+16 | 4.834282e+16 |
5838749 | 3.403225e+00 | 3.404496e+00 | 28156840003 | 1.123515e+10 | 5.479000e+03 | 3.000001 | 1.348014e+16 | 4.849641e+16 |
5838750 rows × 8 columns
df.groupby('secShortName')
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f99b254a5e0>
df.groupby('secShortName').last()
secID | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | |||||||||||
*ST万方 | 000638.XSHE | XSHE | 2021-12-31 | 21.428 | 21.560 | 550679 | 1.185508e+07 | 794.0 | 0.0079 | 1.509872e+09 | 1.509872e+09 |
*ST上普 | 600680.XSHG | XSHG | 2018-04-27 | 28.879 | 30.133 | 1272828 | 3.498398e+07 | 2843.0 | 0.0180 | 1.979601e+09 | 2.939313e+09 |
*ST东海A | 000613.XSHE | XSHE | 2021-12-31 | 5.678 | 5.836 | 3854756 | 2.219116e+07 | 1384.0 | 0.0207 | 1.088870e+09 | 1.485528e+09 |
*ST东海B | 200613.XSHE | XSHE | 2021-12-31 | 1.757 | 1.757 | 33909 | 6.009500e+04 | 13.0 | 0.0004 | 1.408000e+08 | 5.825600e+08 |
*ST东电 | 000585.XSHE | XSHE | 2021-12-31 | 2.617 | 2.617 | 5993090 | 1.543082e+07 | 1268.0 | 0.0123 | 1.249313e+09 | 1.790408e+09 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
龙竹科技 | 831445.XBEI | XBEI | 2021-12-31 | 20.824 | 21.098 | 456508 | 8.807597e+06 | NaN | 0.0068 | 1.297350e+09 | 2.217318e+09 |
龙腾光电 | 688055.XSHG | XSHG | 2021-12-31 | 6.808 | 6.858 | 4664701 | 3.186770e+07 | 3830.0 | 0.0026 | 1.223867e+10 | 2.283333e+10 |
龙蟠科技 | 603906.XSHG | XSHG | 2021-12-31 | 77.083 | 77.208 | 3700591 | 2.766969e+08 | 20421.0 | 0.0155 | 1.785667e+10 | 1.785667e+10 |
龙软科技 | 688078.XSHG | XSHG | 2021-12-31 | 62.467 | 59.057 | 963837 | 5.722752e+07 | 2401.0 | 0.0339 | 1.657041e+09 | 4.117650e+09 |
龙高股份 | 605086.XSHG | XSHG | 2021-12-31 | 23.830 | 23.620 | 833004 | 1.976806e+07 | 1403.0 | 0.0260 | 7.558400e+08 | 3.023360e+09 |
4853 rows × 11 columns
df.groupby('secShortName').first()
secID | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | |||||||||||
*ST万方 | 000638.XSHE | XSHE | 2015-01-05 | 28.364 | 27.304 | 3103725 | 8.408389e+07 | 4279.0 | 0.0443 | 1.912092e+09 | 1.912092e+09 |
*ST上普 | 600680.XSHG | XSHG | 2015-01-05 | 57.366 | 57.562 | 483414 | 2.566049e+07 | 1787.0 | 0.0069 | 3.781578e+09 | 5.614890e+09 |
*ST东海A | 000613.XSHE | XSHE | 2015-01-05 | 9.769 | 9.783 | 2242078 | 2.191941e+07 | 1159.0 | 0.0123 | 1.782263e+09 | 2.490444e+09 |
*ST东海B | 200613.XSHE | XSHE | 2015-01-05 | 6.642 | 6.576 | 164636 | 1.085171e+06 | 50.0 | 0.0021 | 5.271200e+08 | 2.180959e+09 |
*ST东电 | 000585.XSHE | XSHE | 2015-01-05 | 5.183 | 5.272 | 9358955 | 4.800403e+07 | 4519.0 | 0.0192 | 2.516909e+09 | 3.607018e+09 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
龙竹科技 | 831445.XBEI | XBEI | 2020-07-27 | 9.180 | 7.840 | 4804739 | 4.053933e+07 | NaN | 0.0730 | 5.157141e+08 | 8.976079e+08 |
龙腾光电 | 688055.XSHG | XSHG | 2020-08-17 | 1.220 | 9.850 | 215585877 | 1.892487e+09 | 211520.0 | 0.8003 | 2.653273e+09 | 3.283333e+10 |
龙蟠科技 | 603906.XSHG | XSHG | 2017-04-10 | 9.520 | 13.710 | 11500 | 1.576650e+05 | 15.0 | 0.0002 | 7.129200e+08 | 2.851680e+09 |
龙软科技 | 688078.XSHG | XSHG | 2019-12-30 | 21.590 | 48.900 | 12454266 | 6.069590e+08 | 28417.0 | 0.7736 | 7.872460e+08 | 3.459675e+09 |
龙高股份 | 605086.XSHG | XSHG | 2021-04-16 | 12.860 | 18.520 | 226150 | 4.186135e+06 | 345.0 | 0.0071 | 5.926400e+08 | 2.370560e+09 |
4853 rows × 11 columns
df.groupby('secShortName').head(3)
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
1705 | 000002.XSHE | 万科A | XSHE | 2015-01-05 | 1567.420 | 1681.312 | 11835160 | 9.700712e+09 | 185938.0 | 0.0676 | 1.447266e+11 | 1.645692e+11 |
1706 | 000002.XSHE | 万科A | XSHE | 2015-01-06 | 1681.312 | 1619.292 | 6036510 | 4.839616e+09 | 123274.0 | 0.0345 | 1.393879e+11 | 1.584986e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5835695 | 900956.XSHG | 东贝B股 | XSHG | 2015-01-06 | 1.159 | 1.158 | 438800 | 4.708660e+05 | 265.0 | 0.0038 | 1.242000e+08 | 2.538000e+08 |
5835696 | 900956.XSHG | 东贝B股 | XSHG | 2015-01-07 | 1.158 | 1.159 | 429200 | 4.600200e+05 | 236.0 | 0.0037 | 1.243150e+08 | 2.540350e+08 |
5837045 | 900957.XSHG | 凌云B股 | XSHG | 2015-01-05 | 0.620 | 0.628 | 615555 | 3.823390e+05 | 268.0 | 0.0033 | 1.146320e+08 | 2.174270e+08 |
5837046 | 900957.XSHG | 凌云B股 | XSHG | 2015-01-06 | 0.628 | 0.623 | 459481 | 2.838860e+05 | 194.0 | 0.0025 | 1.137120e+08 | 2.156820e+08 |
5837047 | 900957.XSHG | 凌云B股 | XSHG | 2015-01-07 | 0.623 | 0.631 | 703095 | 4.412970e+05 | 324.0 | 0.0038 | 1.151840e+08 | 2.184740e+08 |
14554 rows × 12 columns
df.groupby('secShortName').nth(-2)
secID | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | |||||||||||
*ST万方 | 000638.XSHE | XSHE | 2021-12-30 | 21.163 | 21.428 | 793779 | 1.689394e+07 | 1009.0 | 0.0113 | 1.500590e+09 | 1.500590e+09 |
*ST上普 | 600680.XSHG | XSHG | 2018-04-26 | 28.800 | 28.879 | 867415 | 2.348474e+07 | 1907.0 | 0.0123 | 1.897224e+09 | 2.817000e+09 |
*ST东海A | 000613.XSHE | XSHE | 2021-12-30 | 5.521 | 5.678 | 2735594 | 1.541656e+07 | 1158.0 | 0.0147 | 1.059513e+09 | 1.445477e+09 |
*ST东海B | 200613.XSHE | XSHE | 2021-12-30 | 1.757 | 1.757 | 261068 | 4.593800e+05 | 10.0 | 0.0033 | 1.408000e+08 | 5.825600e+08 |
*ST东电 | 000585.XSHE | XSHE | 2021-12-30 | 2.604 | 2.617 | 2416167 | 6.230545e+06 | 678.0 | 0.0050 | 1.249313e+09 | 1.790408e+09 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
龙竹科技 | 831445.XBEI | XBEI | 2021-12-30 | 20.167 | 20.824 | 561059 | 1.063724e+07 | NaN | 0.0083 | 1.280519e+09 | 2.188552e+09 |
龙腾光电 | 688055.XSHG | XSHG | 2021-12-30 | 6.768 | 6.808 | 4752067 | 3.232808e+07 | 3395.0 | 0.0027 | 1.214933e+10 | 2.266667e+10 |
龙蟠科技 | 603906.XSHG | XSHG | 2021-12-30 | 79.167 | 77.083 | 5338171 | 4.007450e+08 | 31920.0 | 0.0223 | 1.782774e+10 | 1.782774e+10 |
龙软科技 | 688078.XSHG | XSHG | 2021-12-30 | 61.980 | 62.467 | 387495 | 2.392429e+07 | 1454.0 | 0.0136 | 1.752706e+09 | 4.355370e+09 |
龙高股份 | 605086.XSHG | XSHG | 2021-12-30 | 23.510 | 23.830 | 1185853 | 2.812384e+07 | 2482.0 | 0.0371 | 7.625600e+08 | 3.050240e+09 |
4852 rows × 11 columns
arrays = [
np.array(["bar", "bar", 'bar', "baz", "baz", "baz", "foo", "foo", "foo", "qux", "qux", "qux"]),
np.array([1,2,3, 1,2,3, 1,2,3, 1,2,3]),
]
temp = pd.DataFrame(np.random.randn(12, 4), index=arrays)
temp
0 | 1 | 2 | 3 | ||
---|---|---|---|---|---|
bar | 1 | -1.742879 | 0.155123 | -1.454690 | -0.127365 |
2 | 0.958199 | 0.372288 | -1.108044 | -0.195609 | |
3 | 0.379005 | -1.867816 | -0.234514 | -1.230953 | |
baz | 1 | -1.108997 | -0.506065 | -1.544889 | 0.692354 |
2 | 0.137478 | -0.073801 | 2.395381 | 0.457265 | |
3 | -1.559135 | 0.570340 | 0.332941 | 0.428118 | |
foo | 1 | -1.138488 | 0.911376 | -0.733840 | -2.143457 |
2 | 0.706423 | -0.352932 | 1.191399 | 0.198311 | |
3 | -1.031937 | -0.656603 | 1.706294 | -0.295609 | |
qux | 1 | 0.470402 | -0.726789 | 2.559832 | 1.079300 |
2 | 0.914655 | 1.224235 | 1.376842 | 0.244068 | |
3 | 1.305349 | 0.966439 | -0.521711 | 0.436194 |
temp.loc[('bar',2):('foo',3)]
0 | 1 | 2 | 3 | ||
---|---|---|---|---|---|
bar | 2 | 0.958199 | 0.372288 | -1.108044 | -0.195609 |
3 | 0.379005 | -1.867816 | -0.234514 | -1.230953 | |
baz | 1 | -1.108997 | -0.506065 | -1.544889 | 0.692354 |
2 | 0.137478 | -0.073801 | 2.395381 | 0.457265 | |
3 | -1.559135 | 0.570340 | 0.332941 | 0.428118 | |
foo | 1 | -1.138488 | 0.911376 | -0.733840 | -2.143457 |
2 | 0.706423 | -0.352932 | 1.191399 | 0.198311 | |
3 | -1.031937 | -0.656603 | 1.706294 | -0.295609 |
df['tradeDate'] = pd.to_datetime(df['tradeDate'], format='%Y-%m-%d')
df.set_index(['secShortName','tradeDate'])
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | ||
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | tradeDate | ||||||||||
平安银行 | 2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 | |
2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 | |
2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 | |
2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
凌云B股 | 2021-12-27 | 900957.XSHG | XSHG | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
2021-12-28 | 900957.XSHG | XSHG | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 | |
2021-12-29 | 900957.XSHG | XSHG | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 | |
2021-12-30 | 900957.XSHG | XSHG | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 | |
2021-12-31 | 900957.XSHG | XSHG | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 10 columns
df.set_index(['secShortName','tradeDate'],inplace=True)
df
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | ||
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | tradeDate | ||||||||||
平安银行 | 2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 | |
2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 | |
2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 | |
2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
凌云B股 | 2021-12-27 | 900957.XSHG | XSHG | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
2021-12-28 | 900957.XSHG | XSHG | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 | |
2021-12-29 | 900957.XSHG | XSHG | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 | |
2021-12-30 | 900957.XSHG | XSHG | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 | |
2021-12-31 | 900957.XSHG | XSHG | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 10 columns
df.groupby(['secShortName','tradeDate']).last()
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | ||
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | tradeDate | ||||||||||
*ST万方 | 2015-01-05 | 000638.XSHE | XSHE | 28.364 | 27.304 | 3103725 | 84083888.60 | 4279.0 | 0.0443 | 1.912092e+09 | 1.912092e+09 |
2015-01-06 | 000638.XSHE | XSHE | 27.304 | 27.039 | 1926608 | 51665933.96 | 3060.0 | 0.0275 | 1.893528e+09 | 1.893528e+09 | |
2015-01-07 | 000638.XSHE | XSHE | 27.039 | 26.995 | 1295798 | 35013368.40 | 1919.0 | 0.0185 | 1.890434e+09 | 1.890434e+09 | |
2015-01-08 | 000638.XSHE | XSHE | 26.995 | 27.702 | 2224507 | 61274527.17 | 3560.0 | 0.0318 | 1.939938e+09 | 1.939938e+09 | |
2015-01-09 | 000638.XSHE | XSHE | 27.702 | 27.348 | 1053730 | 29061439.79 | 1860.0 | 0.0151 | 1.915186e+09 | 1.915186e+09 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
龙高股份 | 2021-12-27 | 605086.XSHG | XSHG | 23.320 | 23.340 | 711496 | 16590677.00 | 1586.0 | 0.0222 | 7.468800e+08 | 2.987520e+09 |
2021-12-28 | 605086.XSHG | XSHG | 23.340 | 23.540 | 819496 | 19232165.00 | 1664.0 | 0.0256 | 7.532800e+08 | 3.013120e+09 | |
2021-12-29 | 605086.XSHG | XSHG | 23.540 | 23.510 | 656443 | 15410019.00 | 1646.0 | 0.0205 | 7.523200e+08 | 3.009280e+09 | |
2021-12-30 | 605086.XSHG | XSHG | 23.510 | 23.830 | 1185853 | 28123837.00 | 2482.0 | 0.0371 | 7.625600e+08 | 3.050240e+09 | |
2021-12-31 | 605086.XSHG | XSHG | 23.830 | 23.620 | 833004 | 19768062.00 | 1403.0 | 0.0260 | 7.558400e+08 | 3.023360e+09 |
5838680 rows × 10 columns
df.loc['平安银行','2021']
/var/folders/gh/26025ywx7w128zfds279s_9r0000gn/T/ipykernel_12830/224712730.py:1: PerformanceWarning: indexing past lexsort depth may impact performance. df.loc['平安银行','2021'] /var/folders/gh/26025ywx7w128zfds279s_9r0000gn/T/ipykernel_12830/224712730.py:1: FutureWarning: Indexing a DataFrame with a datetimelike index using a single string to slice the rows, like `frame[string]`, is deprecated and will be removed in a future version. Use `frame.loc[string]` instead. df.loc['平安银行','2021']
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|
tradeDate | ||||||||||
2021-01-04 | 000001.XSHE | XSHE | 2466.388 | 2372.018 | 1873816 | 2.891682e+09 | 119328.0 | 0.0080 | 3.609470e+11 | 3.609501e+11 |
2021-01-05 | 000001.XSHE | XSHE | 2372.018 | 2317.181 | 2195884 | 3.284607e+09 | 187364.0 | 0.0094 | 3.526025e+11 | 3.526055e+11 |
2021-01-06 | 000001.XSHE | XSHE | 2317.181 | 2494.444 | 2332835 | 3.648522e+09 | 154714.0 | 0.0100 | 3.795765e+11 | 3.795798e+11 |
2021-01-07 | 000001.XSHE | XSHE | 2494.444 | 2537.804 | 1909948 | 3.111275e+09 | 123304.0 | 0.0082 | 3.861745e+11 | 3.861778e+11 |
2021-01-08 | 000001.XSHE | XSHE | 2537.804 | 2531.427 | 1441303 | 2.348316e+09 | 95828.0 | 0.0062 | 3.852042e+11 | 3.852075e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-12-27 | 000001.XSHE | XSHE | 2224.866 | 2213.298 | 881462 | 1.260455e+09 | 66911.0 | 0.0038 | 3.341671e+11 | 3.341699e+11 |
2021-12-28 | 000001.XSHE | XSHE | 2213.298 | 2206.872 | 1358314 | 1.934461e+09 | 89671.0 | 0.0058 | 3.331968e+11 | 3.331996e+11 |
2021-12-29 | 000001.XSHE | XSHE | 2206.872 | 2152.889 | 1771527 | 2.480535e+09 | 147243.0 | 0.0076 | 3.250464e+11 | 3.250491e+11 |
2021-12-30 | 000001.XSHE | XSHE | 2152.889 | 2161.886 | 960485 | 1.342374e+09 | 66792.0 | 0.0041 | 3.264048e+11 | 3.264075e+11 |
2021-12-31 | 000001.XSHE | XSHE | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
243 rows × 10 columns
df.loc[('平安银行','2021')] # a tuple is interpreted as one multi-level key
/var/folders/gh/26025ywx7w128zfds279s_9r0000gn/T/ipykernel_12830/2348492628.py:1: PerformanceWarning: indexing past lexsort depth may impact performance. df.loc[('平安银行','2021')] # a tuple is interpreted as one multi-level key /var/folders/gh/26025ywx7w128zfds279s_9r0000gn/T/ipykernel_12830/2348492628.py:1: FutureWarning: Indexing a DataFrame with a datetimelike index using a single string to slice the rows, like `frame[string]`, is deprecated and will be removed in a future version. Use `frame.loc[string]` instead. df.loc[('平安银行','2021')] # a tuple is interpreted as one multi-level key
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|
tradeDate | ||||||||||
2021-01-04 | 000001.XSHE | XSHE | 2466.388 | 2372.018 | 1873816 | 2.891682e+09 | 119328.0 | 0.0080 | 3.609470e+11 | 3.609501e+11 |
2021-01-05 | 000001.XSHE | XSHE | 2372.018 | 2317.181 | 2195884 | 3.284607e+09 | 187364.0 | 0.0094 | 3.526025e+11 | 3.526055e+11 |
2021-01-06 | 000001.XSHE | XSHE | 2317.181 | 2494.444 | 2332835 | 3.648522e+09 | 154714.0 | 0.0100 | 3.795765e+11 | 3.795798e+11 |
2021-01-07 | 000001.XSHE | XSHE | 2494.444 | 2537.804 | 1909948 | 3.111275e+09 | 123304.0 | 0.0082 | 3.861745e+11 | 3.861778e+11 |
2021-01-08 | 000001.XSHE | XSHE | 2537.804 | 2531.427 | 1441303 | 2.348316e+09 | 95828.0 | 0.0062 | 3.852042e+11 | 3.852075e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-12-27 | 000001.XSHE | XSHE | 2224.866 | 2213.298 | 881462 | 1.260455e+09 | 66911.0 | 0.0038 | 3.341671e+11 | 3.341699e+11 |
2021-12-28 | 000001.XSHE | XSHE | 2213.298 | 2206.872 | 1358314 | 1.934461e+09 | 89671.0 | 0.0058 | 3.331968e+11 | 3.331996e+11 |
2021-12-29 | 000001.XSHE | XSHE | 2206.872 | 2152.889 | 1771527 | 2.480535e+09 | 147243.0 | 0.0076 | 3.250464e+11 | 3.250491e+11 |
2021-12-30 | 000001.XSHE | XSHE | 2152.889 | 2161.886 | 960485 | 1.342374e+09 | 66792.0 | 0.0041 | 3.264048e+11 | 3.264075e+11 |
2021-12-31 | 000001.XSHE | XSHE | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
243 rows × 10 columns
df.loc[['平安银行','2016']]# a list is used to specify several keys [on the same level]
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) Cell In[172], line 1 ----> 1 df.loc[['平安银行','2016']] File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1073, in _LocationIndexer.__getitem__(self, key) 1070 axis = self.axis or 0 1072 maybe_callable = com.apply_if_callable(key, self.obj) -> 1073 return self._getitem_axis(maybe_callable, axis=axis) File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1301, in _LocIndexer._getitem_axis(self, key, axis) 1298 if hasattr(key, "ndim") and key.ndim > 1: 1299 raise ValueError("Cannot index with multidimensional key") -> 1301 return self._getitem_iterable(key, axis=axis) 1303 # nested tuple slicing 1304 if is_nested_tuple(key, labels): File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1239, in _LocIndexer._getitem_iterable(self, key, axis) 1236 self._validate_key(key, axis) 1238 # A collection of keys -> 1239 keyarr, indexer = self._get_listlike_indexer(key, axis) 1240 return self.obj._reindex_with_indexers( 1241 {axis: [keyarr, indexer]}, copy=True, allow_dups=True 1242 ) File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1432, in _LocIndexer._get_listlike_indexer(self, key, axis) 1429 ax = self.obj._get_axis(axis) 1430 axis_name = self.obj._get_axis_name(axis) -> 1432 keyarr, indexer = ax._get_indexer_strict(key, axis_name) 1434 return keyarr, indexer File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/multi.py:2623, in MultiIndex._get_indexer_strict(self, key, axis_name) 2620 if len(keyarr) and not isinstance(keyarr[0], tuple): 2621 indexer = self._get_indexer_level_0(keyarr) -> 2623 self._raise_if_missing(key, indexer, axis_name) 2624 return self[indexer], indexer 2626 return super()._get_indexer_strict(key, axis_name) File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/multi.py:2641, in MultiIndex._raise_if_missing(self, key, indexer, axis_name) 2639 cmask = check == -1 2640 if cmask.any(): -> 2641 raise KeyError(f"{keyarr[cmask]} not in index") 2642 # We get here when levels still contain values which are not 2643 # actually in Index anymore 2644 raise KeyError(f"{keyarr} not in index") KeyError: "['2016'] not in index"
df.loc[('平安银行','2015':'2016')]
Cell In[173], line 1 df.loc[('平安银行','2015':'2016')] ^ SyntaxError: invalid syntax
df.loc[('平安银行',pd.Timestamp('2015-01-05')):('平安银行', pd.Timestamp('2015-01-28'))]
--------------------------------------------------------------------------- UnsortedIndexError Traceback (most recent call last) Cell In[174], line 1 ----> 1 df.loc[('平安银行',pd.Timestamp('2015-01-05')):('平安银行', pd.Timestamp('2015-01-28'))] File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1073, in _LocationIndexer.__getitem__(self, key) 1070 axis = self.axis or 0 1072 maybe_callable = com.apply_if_callable(key, self.obj) -> 1073 return self._getitem_axis(maybe_callable, axis=axis) File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1290, in _LocIndexer._getitem_axis(self, key, axis) 1288 if isinstance(key, slice): 1289 self._validate_key(key, axis) -> 1290 return self._get_slice_axis(key, axis=axis) 1291 elif com.is_bool_indexer(key): 1292 return self._getbool_axis(key, axis=axis) File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1324, in _LocIndexer._get_slice_axis(self, slice_obj, axis) 1321 return obj.copy(deep=False) 1323 labels = obj._get_axis(axis) -> 1324 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step) 1326 if isinstance(indexer, slice): 1327 return self.obj._slice(indexer, axis=axis) File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:6602, in Index.slice_indexer(self, start, end, step, kind) 6559 """ 6560 Compute the slice indexer for input labels and step. 6561 (...) 6598 slice(1, 3, None) 6599 """ 6600 self._deprecated_arg(kind, "kind", "slice_indexer") -> 6602 start_slice, end_slice = self.slice_locs(start, end, step=step) 6604 # return a slice 6605 if not is_scalar(start_slice): File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/multi.py:2775, in MultiIndex.slice_locs(self, start, end, step, kind) 2772 self._deprecated_arg(kind, "kind", "slice_locs") 2773 # This function adds nothing to its parent implementation (the magic 2774 # happens in get_slice_bound method), but it adds meaningful doc. -> 2775 return super().slice_locs(start, end, step) File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:6810, in Index.slice_locs(self, start, end, step, kind) 6808 start_slice = None 6809 if start is not None: -> 6810 start_slice = self.get_slice_bound(start, "left") 6811 if start_slice is None: 6812 start_slice = 0 File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/multi.py:2714, in MultiIndex.get_slice_bound(self, label, side, kind) 2712 if not isinstance(label, tuple): 2713 label = (label,) -> 2714 return self._partial_tup_index(label, side=side) File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/multi.py:2779, in MultiIndex._partial_tup_index(self, tup, side) 2777 def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left"): 2778 if len(tup) > self._lexsort_depth: -> 2779 raise UnsortedIndexError( 2780 f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth " 2781 f"({self._lexsort_depth})" 2782 ) 2784 n = len(tup) 2785 start, end = 0, len(self) UnsortedIndexError: 'Key length (2) was greater than MultiIndex lexsort depth (0)'
df2 = df.sort_index() # 必须先排序才可做slicing
df2.loc[('平安银行',pd.Timestamp('2015-01-05')):('平安银行', pd.Timestamp('2015-01-28'))]
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | ||
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | tradeDate | ||||||||||
平安银行 | 2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 | |
2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 | |
2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 | |
2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 | |
2015-01-12 | 000001.XSHE | XSHE | 1231.004 | 1205.698 | 2696688 | 2.293105e+09 | 64376.0 | 0.0158 | 1.452882e+11 | 1.687457e+11 | |
2015-01-13 | 000001.XSHE | XSHE | 1205.698 | 1198.351 | 1418187 | 1.204987e+09 | 34924.0 | 0.0083 | 1.444029e+11 | 1.677175e+11 | |
2015-01-14 | 000001.XSHE | XSHE | 1198.351 | 1208.963 | 2192762 | 1.889297e+09 | 50675.0 | 0.0128 | 1.456817e+11 | 1.692027e+11 | |
2015-01-15 | 000001.XSHE | XSHE | 1208.963 | 1253.044 | 2156548 | 1.868796e+09 | 54470.0 | 0.0126 | 1.509935e+11 | 1.753721e+11 | |
2015-01-16 | 000001.XSHE | XSHE | 1253.044 | 1254.677 | 2701125 | 2.403346e+09 | 60637.0 | 0.0158 | 1.511903e+11 | 1.756006e+11 | |
2015-01-19 | 000001.XSHE | XSHE | 1254.677 | 1128.964 | 3710288 | 3.016203e+09 | 103111.0 | 0.0217 | 1.360417e+11 | 1.580063e+11 | |
2015-01-20 | 000001.XSHE | XSHE | 1128.964 | 1128.964 | 2588576 | 2.064281e+09 | 65826.0 | 0.0152 | 1.360417e+11 | 1.580063e+11 | |
2015-01-21 | 000001.XSHE | XSHE | 1128.964 | 1177.127 | 3368980 | 2.758193e+09 | 78709.0 | 0.0197 | 1.418454e+11 | 1.647470e+11 | |
2015-01-22 | 000001.XSHE | XSHE | 1177.127 | 1167.331 | 2178850 | 1.801436e+09 | 53599.0 | 0.0128 | 1.406650e+11 | 1.633760e+11 | |
2015-01-23 | 000001.XSHE | XSHE | 1167.331 | 1175.494 | 2533304 | 2.108747e+09 | 53672.0 | 0.0148 | 1.416487e+11 | 1.645185e+11 | |
2015-01-26 | 000001.XSHE | XSHE | 1175.494 | 1170.596 | 1836123 | 1.508447e+09 | 47025.0 | 0.0108 | 1.410585e+11 | 1.638330e+11 | |
2015-01-27 | 000001.XSHE | XSHE | 1170.596 | 1142.025 | 2325514 | 1.881059e+09 | 61710.0 | 0.0136 | 1.376156e+11 | 1.598343e+11 | |
2015-01-28 | 000001.XSHE | XSHE | 1142.025 | 1147.739 | 2154303 | 1.742176e+09 | 55381.0 | 0.0126 | 1.383042e+11 | 1.606340e+11 |
df2.loc[pd.IndexSlice[('宁德时代','2015-01'):('平安银行','2020-02')]]
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | ||
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | tradeDate | ||||||||||
宁德时代 | 2018-06-11 | 300750.XSHE | XSHE | 25.140 | 36.200 | 78829 | 2.845472e+06 | 154.0 | 0.0004 | 7.864222e+09 | 7.864222e+10 |
2018-06-12 | 300750.XSHE | XSHE | 36.200 | 39.820 | 26579 | 1.058376e+06 | 86.0 | 0.0001 | 8.650644e+09 | 8.650644e+10 | |
2018-06-13 | 300750.XSHE | XSHE | 39.820 | 43.800 | 45030 | 1.972314e+06 | 98.0 | 0.0002 | 9.515274e+09 | 9.515274e+10 | |
2018-06-14 | 300750.XSHE | XSHE | 43.800 | 48.180 | 74267 | 3.578184e+06 | 211.0 | 0.0003 | 1.046680e+10 | 1.046680e+11 | |
2018-06-15 | 300750.XSHE | XSHE | 48.180 | 53.000 | 256510 | 1.359503e+07 | 717.0 | 0.0012 | 1.151392e+10 | 1.151392e+11 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
平安银行 | 2020-01-20 | 000001.XSHE | XSHE | 2055.130 | 2062.654 | 899493 | 1.226465e+09 | 42805.0 | 0.0038 | 3.192246e+11 | 3.192274e+11 |
2020-01-21 | 000001.XSHE | XSHE | 2062.654 | 2006.229 | 1080975 | 1.442171e+09 | 60130.0 | 0.0046 | 3.104920e+11 | 3.104947e+11 | |
2020-01-22 | 000001.XSHE | XSHE | 2006.229 | 2017.514 | 867411 | 1.150933e+09 | 49245.0 | 0.0037 | 3.122386e+11 | 3.122412e+11 | |
2020-01-23 | 000001.XSHE | XSHE | 2017.514 | 1948.549 | 1326911 | 1.723394e+09 | 71683.0 | 0.0057 | 3.015654e+11 | 3.015680e+11 | |
2020-02-03 | 000001.XSHE | XSHE | 1948.549 | 1754.196 | 2723762 | 3.201454e+09 | 105935.0 | 0.0116 | 2.714865e+11 | 2.714888e+11 |
294154 rows × 10 columns
df.loc['平安银行'].loc['2015':'2016']
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|
tradeDate | ||||||||||
2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2016-12-26 | 000001.XSHE | XSHE | 1092.827 | 1097.641 | 364173 | 2.739343e+08 | 11409.0 | 0.0021 | 1.334364e+11 | 1.565942e+11 |
2016-12-27 | 000001.XSHE | XSHE | 1097.641 | 1092.827 | 324124 | 2.447883e+08 | 9433.0 | 0.0018 | 1.328511e+11 | 1.559073e+11 |
2016-12-28 | 000001.XSHE | XSHE | 1092.827 | 1090.420 | 405159 | 3.048986e+08 | 10133.0 | 0.0023 | 1.325585e+11 | 1.555639e+11 |
2016-12-29 | 000001.XSHE | XSHE | 1090.420 | 1092.827 | 408419 | 3.071835e+08 | 9453.0 | 0.0023 | 1.328511e+11 | 1.559073e+11 |
2016-12-30 | 000001.XSHE | XSHE | 1092.827 | 1095.234 | 364834 | 2.748827e+08 | 10822.0 | 0.0021 | 1.331437e+11 | 1.562507e+11 |
488 rows × 10 columns
df1 = pd.DataFrame({'a':[1,2,3],'b':[4,5,6],'c':[10,20,30]})
df2 = pd.DataFrame({'a':np.array([1,2,6]),
'b':[s for s in 'abc'],
'd':[s for s in 'zxz']})
df1
a | b | c | |
---|---|---|---|
0 | 1 | 4 | 10 |
1 | 2 | 5 | 20 |
2 | 3 | 6 | 30 |
df2
a | b | d | |
---|---|---|---|
0 | 1 | a | z |
1 | 2 | b | x |
2 | 6 | c | z |
pd.merge(df1,df2,on='a')
a | b_x | c | b_y | d | |
---|---|---|---|---|---|
0 | 1 | 4 | 10 | a | z |
1 | 2 | 5 | 20 | b | x |
pd.merge(df1,df2,on='a',how='outer')
a | b_x | c | b_y | d | |
---|---|---|---|---|---|
0 | 1 | 4.0 | 10.0 | a | z |
1 | 2 | 5.0 | 20.0 | b | x |
2 | 3 | 6.0 | 30.0 | NaN | NaN |
3 | 6 | NaN | NaN | c | z |
pd.merge(df1,df2,on='a',how='left')
a | b_x | c | b_y | d | |
---|---|---|---|---|---|
0 | 1 | 4 | 10 | a | z |
1 | 2 | 5 | 20 | b | x |
2 | 3 | 6 | 30 | NaN | NaN |
pd.merge(df1,df2,on='a',how='right')
a | b_x | c | b_y | d | |
---|---|---|---|---|---|
0 | 1 | 4.0 | 10.0 | a | z |
1 | 2 | 5.0 | 20.0 | b | x |
2 | 6 | NaN | NaN | c | z |
temp = tm.makeTimeDataFrame(3)
display(temp)
A | B | C | D | |
---|---|---|---|---|
2000-01-03 | -0.522757 | -1.607854 | -0.000651 | -0.264986 |
2000-01-04 | -0.574578 | -0.483190 | -0.920019 | 0.332520 |
2000-01-05 | 1.490557 | 0.681432 | -1.439317 | -0.741802 |
pd.DataFrame(temp.stack()).reset_index()
level_0 | level_1 | 0 | |
---|---|---|---|
0 | 2000-01-03 | A | -0.522757 |
1 | 2000-01-03 | B | -1.607854 |
2 | 2000-01-03 | C | -0.000651 |
3 | 2000-01-03 | D | -0.264986 |
4 | 2000-01-04 | A | -0.574578 |
5 | 2000-01-04 | B | -0.483190 |
6 | 2000-01-04 | C | -0.920019 |
7 | 2000-01-04 | D | 0.332520 |
8 | 2000-01-05 | A | 1.490557 |
9 | 2000-01-05 | B | 0.681432 |
10 | 2000-01-05 | C | -1.439317 |
11 | 2000-01-05 | D | -0.741802 |
temp2 = pd.DataFrame(temp.stack()).reset_index()
temp2.columns=['date','variable','value']
temp2
date | variable | value | |
---|---|---|---|
0 | 2000-01-03 | A | -0.522757 |
1 | 2000-01-03 | B | -1.607854 |
2 | 2000-01-03 | C | -0.000651 |
3 | 2000-01-03 | D | -0.264986 |
4 | 2000-01-04 | A | -0.574578 |
5 | 2000-01-04 | B | -0.483190 |
6 | 2000-01-04 | C | -0.920019 |
7 | 2000-01-04 | D | 0.332520 |
8 | 2000-01-05 | A | 1.490557 |
9 | 2000-01-05 | B | 0.681432 |
10 | 2000-01-05 | C | -1.439317 |
11 | 2000-01-05 | D | -0.741802 |
def unpivot(frame):
"""
Essentially the same as 'stack'
"""
N, K = frame.shape
data = {
"value": frame.to_numpy().ravel("F"),
"variable": np.asarray(frame.columns).repeat(N),
"date": np.tile(np.asarray(frame.index), K),
}
return pd.DataFrame(data, columns=["date", "variable", "value"])
temp2 = unpivot(temp)
temp2
date | variable | value | |
---|---|---|---|
0 | 2000-01-03 | A | -0.522757 |
1 | 2000-01-04 | A | -0.574578 |
2 | 2000-01-05 | A | 1.490557 |
3 | 2000-01-03 | B | -1.607854 |
4 | 2000-01-04 | B | -0.483190 |
5 | 2000-01-05 | B | 0.681432 |
6 | 2000-01-03 | C | -0.000651 |
7 | 2000-01-04 | C | -0.920019 |
8 | 2000-01-05 | C | -1.439317 |
9 | 2000-01-03 | D | -0.264986 |
10 | 2000-01-04 | D | 0.332520 |
11 | 2000-01-05 | D | -0.741802 |
temp2.set_index(['date','variable'])
value | ||
---|---|---|
date | variable | |
2000-01-03 | A | -0.522757 |
2000-01-04 | A | -0.574578 |
2000-01-05 | A | 1.490557 |
2000-01-03 | B | -1.607854 |
2000-01-04 | B | -0.483190 |
2000-01-05 | B | 0.681432 |
2000-01-03 | C | -0.000651 |
2000-01-04 | C | -0.920019 |
2000-01-05 | C | -1.439317 |
2000-01-03 | D | -0.264986 |
2000-01-04 | D | 0.332520 |
2000-01-05 | D | -0.741802 |
temp2.set_index(['date','variable']).unstack()
value | ||||
---|---|---|---|---|
variable | A | B | C | D |
date | ||||
2000-01-03 | -0.522757 | -1.607854 | -0.000651 | -0.264986 |
2000-01-04 | -0.574578 | -0.483190 | -0.920019 | 0.332520 |
2000-01-05 | 1.490557 | 0.681432 | -1.439317 | -0.741802 |
temp2.pivot(index='date',columns='variable',values='value')
variable | A | B | C | D |
---|---|---|---|---|
date | ||||
2000-01-03 | -0.522757 | -1.607854 | -0.000651 | -0.264986 |
2000-01-04 | -0.574578 | -0.483190 | -0.920019 | 0.332520 |
2000-01-05 | 1.490557 | 0.681432 | -1.439317 | -0.741802 |
temp2['value2'] = temp2['value']**2
temp2
date | variable | value | value2 | |
---|---|---|---|---|
0 | 2000-01-03 | A | -0.522757 | 2.732746e-01 |
1 | 2000-01-04 | A | -0.574578 | 3.301401e-01 |
2 | 2000-01-05 | A | 1.490557 | 2.221761e+00 |
3 | 2000-01-03 | B | -1.607854 | 2.585194e+00 |
4 | 2000-01-04 | B | -0.483190 | 2.334727e-01 |
5 | 2000-01-05 | B | 0.681432 | 4.643496e-01 |
6 | 2000-01-03 | C | -0.000651 | 4.233486e-07 |
7 | 2000-01-04 | C | -0.920019 | 8.464347e-01 |
8 | 2000-01-05 | C | -1.439317 | 2.071633e+00 |
9 | 2000-01-03 | D | -0.264986 | 7.021733e-02 |
10 | 2000-01-04 | D | 0.332520 | 1.105695e-01 |
11 | 2000-01-05 | D | -0.741802 | 5.502701e-01 |
temp2.pivot(index='date',columns='variable')
value | value2 | |||||||
---|---|---|---|---|---|---|---|---|
variable | A | B | C | D | A | B | C | D |
date | ||||||||
2000-01-03 | -0.522757 | -1.607854 | -0.000651 | -0.264986 | 0.273275 | 2.585194 | 4.233486e-07 | 0.070217 |
2000-01-04 | -0.574578 | -0.483190 | -0.920019 | 0.332520 | 0.330140 | 0.233473 | 8.464347e-01 | 0.110570 |
2000-01-05 | 1.490557 | 0.681432 | -1.439317 | -0.741802 | 2.221761 | 0.464350 | 2.071633e+00 | 0.550270 |
df.reset_index()
secShortName | tradeDate | secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 平安银行 | 2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 平安银行 | 2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 平安银行 | 2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 平安银行 | 2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 平安银行 | 2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 凌云B股 | 2021-12-27 | 900957.XSHG | XSHG | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 凌云B股 | 2021-12-28 | 900957.XSHG | XSHG | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 凌云B股 | 2021-12-29 | 900957.XSHG | XSHG | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 凌云B股 | 2021-12-30 | 900957.XSHG | XSHG | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 凌云B股 | 2021-12-31 | 900957.XSHG | XSHG | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 12 columns
df.reset_index(inplace=True)
df
secShortName | tradeDate | secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 平安银行 | 2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 平安银行 | 2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 平安银行 | 2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 平安银行 | 2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 平安银行 | 2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 凌云B股 | 2021-12-27 | 900957.XSHG | XSHG | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 凌云B股 | 2021-12-28 | 900957.XSHG | XSHG | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 凌云B股 | 2021-12-29 | 900957.XSHG | XSHG | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 凌云B股 | 2021-12-30 | 900957.XSHG | XSHG | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 凌云B股 | 2021-12-31 | 900957.XSHG | XSHG | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 12 columns
df.pivot(index='tradeDate',columns='secID',values='closePrice').iloc[:,0:4]
secID | 000001.XSHE | 000002.XSHE | 000004.XSHE | 000005.XSHE |
---|---|---|---|---|
tradeDate | ||||
2015-01-05 | 1307.737 | 1681.312 | 107.225 | NaN |
2015-01-06 | 1288.146 | 1619.292 | 112.487 | NaN |
2015-01-07 | 1263.656 | 1604.632 | 112.145 | NaN |
2015-01-08 | 1221.208 | 1532.463 | 115.630 | NaN |
2015-01-09 | 1231.004 | 1516.676 | 112.282 | NaN |
... | ... | ... | ... | ... |
2021-12-27 | 2213.298 | 2919.261 | 128.342 | 22.207 |
2021-12-28 | 2206.872 | 2928.188 | 133.399 | 22.502 |
2021-12-29 | 2152.889 | 2883.551 | 132.510 | 22.502 |
2021-12-30 | 2161.886 | 2844.866 | 133.945 | 22.502 |
2021-12-31 | 2118.185 | 2940.092 | 132.715 | 22.600 |
1705 rows × 4 columns
df.reset_index().pivot(index='tradeDate',columns='secID',values='closePrice').iloc[:,0:4].plot()
<AxesSubplot: xlabel='tradeDate'>
import matplotlib as mpl
import matplotlib.pyplot as plt
axes: 执行画图的instance,子图
axis: x, y
为什么只画一个图的时候,也经常看到写fig, ax = plt.subplots()
?
因为通常画图时候需要保存。
plt.subplots?
Signature: plt.subplots( nrows=1, ncols=1, *, sharex=False, sharey=False, squeeze=True, width_ratios=None, height_ratios=None, subplot_kw=None, gridspec_kw=None, **fig_kw, ) Docstring: Create a figure and a set of subplots. This utility wrapper makes it convenient to create common layouts of subplots, including the enclosing figure object, in a single call. Parameters ---------- nrows, ncols : int, default: 1 Number of rows/columns of the subplot grid. sharex, sharey : bool or {'none', 'all', 'row', 'col'}, default: False Controls sharing of properties among x (*sharex*) or y (*sharey*) axes: - True or 'all': x- or y-axis will be shared among all subplots. - False or 'none': each subplot x- or y-axis will be independent. - 'row': each subplot row will share an x- or y-axis. - 'col': each subplot column will share an x- or y-axis. When subplots have a shared x-axis along a column, only the x tick labels of the bottom subplot are created. Similarly, when subplots have a shared y-axis along a row, only the y tick labels of the first column subplot are created. To later turn other subplots' ticklabels on, use `~matplotlib.axes.Axes.tick_params`. When subplots have a shared axis that has units, calling `~matplotlib.axis.Axis.set_units` will update each axis with the new units. squeeze : bool, default: True - If True, extra dimensions are squeezed out from the returned array of `~matplotlib.axes.Axes`: - if only one subplot is constructed (nrows=ncols=1), the resulting single Axes object is returned as a scalar. - for Nx1 or 1xM subplots, the returned object is a 1D numpy object array of Axes objects. - for NxM, subplots with N>1 and M>1 are returned as a 2D array. - If False, no squeezing at all is done: the returned Axes object is always a 2D array containing Axes instances, even if it ends up being 1x1. width_ratios : array-like of length *ncols*, optional Defines the relative widths of the columns. Each column gets a relative width of ``width_ratios[i] / sum(width_ratios)``. If not given, all columns will have the same width. Equivalent to ``gridspec_kw={'width_ratios': [...]}``. height_ratios : array-like of length *nrows*, optional Defines the relative heights of the rows. Each row gets a relative height of ``height_ratios[i] / sum(height_ratios)``. If not given, all rows will have the same height. Convenience for ``gridspec_kw={'height_ratios': [...]}``. subplot_kw : dict, optional Dict with keywords passed to the `~matplotlib.figure.Figure.add_subplot` call used to create each subplot. gridspec_kw : dict, optional Dict with keywords passed to the `~matplotlib.gridspec.GridSpec` constructor used to create the grid the subplots are placed on. **fig_kw All additional keyword arguments are passed to the `.pyplot.figure` call. Returns ------- fig : `.Figure` ax : `~.axes.Axes` or array of Axes *ax* can be either a single `~.axes.Axes` object, or an array of Axes objects if more than one subplot was created. The dimensions of the resulting array can be controlled with the squeeze keyword, see above. Typical idioms for handling the return value are:: # using the variable ax for single a Axes fig, ax = plt.subplots() # using the variable axs for multiple Axes fig, axs = plt.subplots(2, 2) # using tuple unpacking for multiple Axes fig, (ax1, ax2) = plt.subplots(1, 2) fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2) The names ``ax`` and pluralized ``axs`` are preferred over ``axes`` because for the latter it's not clear if it refers to a single `~.axes.Axes` instance or a collection of these. See Also -------- .pyplot.figure .pyplot.subplot .pyplot.axes .Figure.subplots .Figure.add_subplot Examples -------- :: # First create some toy data: x = np.linspace(0, 2*np.pi, 400) y = np.sin(x**2) # Create just a figure and only one subplot fig, ax = plt.subplots() ax.plot(x, y) ax.set_title('Simple plot') # Create two subplots and unpack the output array immediately f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) ax1.plot(x, y) ax1.set_title('Sharing Y axis') ax2.scatter(x, y) # Create four polar axes and access them through the returned array fig, axs = plt.subplots(2, 2, subplot_kw=dict(projection="polar")) axs[0, 0].plot(x, y) axs[1, 1].scatter(x, y) # Share a X axis with each column of subplots plt.subplots(2, 2, sharex='col') # Share a Y axis with each row of subplots plt.subplots(2, 2, sharey='row') # Share both X and Y axes with all subplots plt.subplots(2, 2, sharex='all', sharey='all') # Note that this is the same as plt.subplots(2, 2, sharex=True, sharey=True) # Create figure number 10 with a single subplot # and clears it if it already exists. fig, ax = plt.subplots(num=10, clear=True) File: ~/opt/anaconda3/lib/python3.9/site-packages/matplotlib/pyplot.py Type: function
fig = plt.figure() # an empty figure with no Axes
fig, ax = plt.subplots() # a figure with a single Axes
fig, axs = plt.subplots(2, 2) # a figure with a 2x2 grid of Axes
<Figure size 640x480 with 0 Axes>
fig, ax = plt.subplots()
ax.plot([1, 2, 3, 4], [1, 4, 2, 3])
[<matplotlib.lines.Line2D at 0x7f97eb2adfa0>]
temp2
date | variable | value | value2 | |
---|---|---|---|---|
0 | 2000-01-03 | A | -0.522757 | 2.732746e-01 |
1 | 2000-01-04 | A | -0.574578 | 3.301401e-01 |
2 | 2000-01-05 | A | 1.490557 | 2.221761e+00 |
3 | 2000-01-03 | B | -1.607854 | 2.585194e+00 |
4 | 2000-01-04 | B | -0.483190 | 2.334727e-01 |
5 | 2000-01-05 | B | 0.681432 | 4.643496e-01 |
6 | 2000-01-03 | C | -0.000651 | 4.233486e-07 |
7 | 2000-01-04 | C | -0.920019 | 8.464347e-01 |
8 | 2000-01-05 | C | -1.439317 | 2.071633e+00 |
9 | 2000-01-03 | D | -0.264986 | 7.021733e-02 |
10 | 2000-01-04 | D | 0.332520 | 1.105695e-01 |
11 | 2000-01-05 | D | -0.741802 | 5.502701e-01 |
fig, axs = plt.subplots(1,2)
x = range(temp2.shape[0])
y1 = temp2['value']
y2 = temp2['value']**2
axs[0].plot(x, y1)
axs[1].plot(x, y2)
axs[0].plot(x, y1**3)
[<matplotlib.lines.Line2D at 0x7f97eb3938e0>]
fig
x = pd.date_range(start='2021-01-03',periods=temp2.shape[0])
plt.plot(x,temp2['value'], label='random value')
plt.plot(x,temp2['value']**2, label='quadratic of random value')
plt.xlabel('date')
plt.ylabel('value')
plt.title('Example')
plt.legend()
<matplotlib.legend.Legend at 0x7f97eb2fae50>
df_pingan.select_dtypes(np.number).corr()
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
preClosePrice | 1.000000 | 0.997575 | 0.064982 | 0.389881 | 0.601451 | -0.039942 | 0.989830 | 0.994295 |
closePrice | 0.997575 | 1.000000 | 0.075458 | 0.400851 | 0.606522 | -0.030710 | 0.991942 | 0.996452 |
turnoverVol | 0.064982 | 0.075458 | 1.000000 | 0.924077 | 0.752066 | 0.981827 | 0.027619 | 0.055961 |
turnoverValue | 0.389881 | 0.400851 | 0.924077 | 1.000000 | 0.914040 | 0.862471 | 0.355060 | 0.383088 |
dealAmount | 0.601451 | 0.606522 | 0.752066 | 0.914040 | 1.000000 | 0.665757 | 0.573258 | 0.596717 |
turnoverRate | -0.039942 | -0.030710 | 0.981827 | 0.862471 | 0.665757 | 1.000000 | -0.090386 | -0.050985 |
negMarketValue | 0.989830 | 0.991942 | 0.027619 | 0.355060 | 0.573258 | -0.090386 | 1.000000 | 0.993615 |
marketValue | 0.994295 | 0.996452 | 0.055961 | 0.383088 | 0.596717 | -0.050985 | 0.993615 | 1.000000 |
corr = df_pingan.select_dtypes(np.number).corr()
corr
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
preClosePrice | 1.000000 | 0.997575 | 0.064982 | 0.389881 | 0.601451 | -0.039942 | 0.989830 | 0.994295 |
closePrice | 0.997575 | 1.000000 | 0.075458 | 0.400851 | 0.606522 | -0.030710 | 0.991942 | 0.996452 |
turnoverVol | 0.064982 | 0.075458 | 1.000000 | 0.924077 | 0.752066 | 0.981827 | 0.027619 | 0.055961 |
turnoverValue | 0.389881 | 0.400851 | 0.924077 | 1.000000 | 0.914040 | 0.862471 | 0.355060 | 0.383088 |
dealAmount | 0.601451 | 0.606522 | 0.752066 | 0.914040 | 1.000000 | 0.665757 | 0.573258 | 0.596717 |
turnoverRate | -0.039942 | -0.030710 | 0.981827 | 0.862471 | 0.665757 | 1.000000 | -0.090386 | -0.050985 |
negMarketValue | 0.989830 | 0.991942 | 0.027619 | 0.355060 | 0.573258 | -0.090386 | 1.000000 | 0.993615 |
marketValue | 0.994295 | 0.996452 | 0.055961 | 0.383088 | 0.596717 | -0.050985 | 0.993615 | 1.000000 |
np.ones_like?
Signature: np.ones_like(a, dtype=None, order='K', subok=True, shape=None) Docstring: Return an array of ones with the same shape and type as a given array. Parameters ---------- a : array_like The shape and data-type of `a` define these same attributes of the returned array. dtype : data-type, optional Overrides the data type of the result. .. versionadded:: 1.6.0 order : {'C', 'F', 'A', or 'K'}, optional Overrides the memory layout of the result. 'C' means C-order, 'F' means F-order, 'A' means 'F' if `a` is Fortran contiguous, 'C' otherwise. 'K' means match the layout of `a` as closely as possible. .. versionadded:: 1.6.0 subok : bool, optional. If True, then the newly created array will use the sub-class type of `a`, otherwise it will be a base-class array. Defaults to True. shape : int or sequence of ints, optional. Overrides the shape of the result. If order='K' and the number of dimensions is unchanged, will try to keep order, otherwise, order='C' is implied. .. versionadded:: 1.17.0 Returns ------- out : ndarray Array of ones with the same shape and type as `a`. See Also -------- empty_like : Return an empty array with shape and type of input. zeros_like : Return an array of zeros with shape and type of input. full_like : Return a new array with shape of input filled with value. ones : Return a new array setting values to one. Examples -------- >>> x = np.arange(6) >>> x = x.reshape((2, 3)) >>> x array([[0, 1, 2], [3, 4, 5]]) >>> np.ones_like(x) array([[1, 1, 1], [1, 1, 1]]) >>> y = np.arange(3, dtype=float) >>> y array([0., 1., 2.]) >>> np.ones_like(y) array([1., 1., 1.]) File: ~/opt/anaconda3/lib/python3.9/site-packages/numpy/core/numeric.py Type: function
np.ones_like(corr, dtype=bool)
array([[ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True]])
np.triu?
Signature: np.triu(m, k=0) Docstring: Upper triangle of an array. Return a copy of an array with the elements below the `k`-th diagonal zeroed. For arrays with ``ndim`` exceeding 2, `triu` will apply to the final two axes. Please refer to the documentation for `tril` for further details. See Also -------- tril : lower triangle of an array Examples -------- >>> np.triu([[1,2,3],[4,5,6],[7,8,9],[10,11,12]], -1) array([[ 1, 2, 3], [ 4, 5, 6], [ 0, 8, 9], [ 0, 0, 12]]) >>> np.triu(np.arange(3*4*5).reshape(3, 4, 5)) array([[[ 0, 1, 2, 3, 4], [ 0, 6, 7, 8, 9], [ 0, 0, 12, 13, 14], [ 0, 0, 0, 18, 19]], [[20, 21, 22, 23, 24], [ 0, 26, 27, 28, 29], [ 0, 0, 32, 33, 34], [ 0, 0, 0, 38, 39]], [[40, 41, 42, 43, 44], [ 0, 46, 47, 48, 49], [ 0, 0, 52, 53, 54], [ 0, 0, 0, 58, 59]]]) File: ~/opt/anaconda3/lib/python3.9/site-packages/numpy/lib/twodim_base.py Type: function
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))
mask
array([[ True, True, True, True, True, True, True, True], [False, True, True, True, True, True, True, True], [False, False, True, True, True, True, True, True], [False, False, False, True, True, True, True, True], [False, False, False, False, True, True, True, True], [False, False, False, False, False, True, True, True], [False, False, False, False, False, False, True, True], [False, False, False, False, False, False, False, True]])
import seaborn as sns
# Set up the matplotlib figure
fig, ax = plt.subplots(figsize=(18, 18))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0, annot=True,
square=True, linewidths=.5, annot_kws={"size": 10},cbar_kws={"shrink": .5})
<AxesSubplot: >
df_pingan_num = df_pingan.select_dtypes(np.number)
df_pingan_num
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
tradeDate | ||||||||
2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-12-27 | 2224.866 | 2213.298 | 881462 | 1.260455e+09 | 66911.0 | 0.0038 | 3.341671e+11 | 3.341699e+11 |
2021-12-28 | 2213.298 | 2206.872 | 1358314 | 1.934461e+09 | 89671.0 | 0.0058 | 3.331968e+11 | 3.331996e+11 |
2021-12-29 | 2206.872 | 2152.889 | 1771527 | 2.480535e+09 | 147243.0 | 0.0076 | 3.250464e+11 | 3.250491e+11 |
2021-12-30 | 2152.889 | 2161.886 | 960485 | 1.342374e+09 | 66792.0 | 0.0041 | 3.264048e+11 | 3.264075e+11 |
2021-12-31 | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
1705 rows × 8 columns
df_pingan_num.plot()
<AxesSubplot: xlabel='tradeDate'>
plt.rcParams['figure.figsize'] = (16.0, 9.0)
df_pingan_num.drop(['negMarketValue','marketValue'],axis=1).plot()
<AxesSubplot: xlabel='tradeDate'>
def standardization(col):
return (col - col.mean()) / col.std()
df_pingan_num.apply(standardization).plot()
<AxesSubplot: xlabel='tradeDate'>
df_pingan_num.apply(standardization)[['preClosePrice','turnoverValue','marketValue']].plot(style=['bs-', 'ro-', 'y^-'])
<AxesSubplot: xlabel='tradeDate'>
df_pingan_num.plot(subplots=True)
array([<AxesSubplot: xlabel='tradeDate'>, <AxesSubplot: xlabel='tradeDate'>, <AxesSubplot: xlabel='tradeDate'>, <AxesSubplot: xlabel='tradeDate'>, <AxesSubplot: xlabel='tradeDate'>, <AxesSubplot: xlabel='tradeDate'>, <AxesSubplot: xlabel='tradeDate'>, <AxesSubplot: xlabel='tradeDate'>], dtype=object)