import numpy as np
import pandas as pd
np.__version__
'1.23.5'
numpy中的数据存储是array, array要求数据类型是一样的,这和python中的list不同
np.array([1,1.0,'2']) # 强制转换成同一种类型
array(['1', '1.0', '2'], dtype='<U32')
np.array([1,1.0,'2'],dtype=int)
array([1, 1, 2])
np.ones((3,5))
array([[1., 1., 1., 1., 1.], [1., 1., 1., 1., 1.], [1., 1., 1., 1., 1.]])
np.full((3,5),1.231)
array([[1.231, 1.231, 1.231, 1.231, 1.231], [1.231, 1.231, 1.231, 1.231, 1.231], [1.231, 1.231, 1.231, 1.231, 1.231]])
np.arange(0, 20)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
np.random.normal(0, 1, (3,3))
array([[ 0.11949003, 0.51600352, -0.60939972], [-0.44918345, 0.80383762, -0.20336444], [ 0.20335392, -1.23692957, -1.40778524]])
np.eye(3)
array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]])
np.tile?
Signature: np.tile(A, reps) Docstring: Construct an array by repeating A the number of times given by reps. If `reps` has length ``d``, the result will have dimension of ``max(d, A.ndim)``. If ``A.ndim < d``, `A` is promoted to be d-dimensional by prepending new axes. So a shape (3,) array is promoted to (1, 3) for 2-D replication, or shape (1, 1, 3) for 3-D replication. If this is not the desired behavior, promote `A` to d-dimensions manually before calling this function. If ``A.ndim > d``, `reps` is promoted to `A`.ndim by pre-pending 1's to it. Thus for an `A` of shape (2, 3, 4, 5), a `reps` of (2, 2) is treated as (1, 1, 2, 2). Note : Although tile may be used for broadcasting, it is strongly recommended to use numpy's broadcasting operations and functions. Parameters ---------- A : array_like The input array. reps : array_like The number of repetitions of `A` along each axis. Returns ------- c : ndarray The tiled output array. See Also -------- repeat : Repeat elements of an array. broadcast_to : Broadcast an array to a new shape Examples -------- >>> a = np.array([0, 1, 2]) >>> np.tile(a, 2) array([0, 1, 2, 0, 1, 2]) >>> np.tile(a, (2, 2)) array([[0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2]]) >>> np.tile(a, (2, 1, 2)) array([[[0, 1, 2, 0, 1, 2]], [[0, 1, 2, 0, 1, 2]]]) >>> b = np.array([[1, 2], [3, 4]]) >>> np.tile(b, 2) array([[1, 2, 1, 2], [3, 4, 3, 4]]) >>> np.tile(b, (2, 1)) array([[1, 2], [3, 4], [1, 2], [3, 4]]) >>> c = np.array([1,2,3,4]) >>> np.tile(c,(4,1)) array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]]) File: ~/opt/anaconda3/lib/python3.9/site-packages/numpy/lib/shape_base.py Type: function
np.tile(10,3)
array([10, 10, 10])
np.tile([1,2,3],3)
array([1, 2, 3, 1, 2, 3, 1, 2, 3])
np.repeat?
Signature: np.repeat(a, repeats, axis=None) Docstring: Repeat elements of an array. Parameters ---------- a : array_like Input array. repeats : int or array of ints The number of repetitions for each element. `repeats` is broadcasted to fit the shape of the given axis. axis : int, optional The axis along which to repeat values. By default, use the flattened input array, and return a flat output array. Returns ------- repeated_array : ndarray Output array which has the same shape as `a`, except along the given axis. See Also -------- tile : Tile an array. unique : Find the unique elements of an array. Examples -------- >>> np.repeat(3, 4) array([3, 3, 3, 3]) >>> x = np.array([[1,2],[3,4]]) >>> np.repeat(x, 2) array([1, 1, 2, 2, 3, 3, 4, 4]) >>> np.repeat(x, 3, axis=1) array([[1, 1, 1, 2, 2, 2], [3, 3, 3, 4, 4, 4]]) >>> np.repeat(x, [1, 2], axis=0) array([[1, 2], [3, 4], [3, 4]]) File: ~/opt/anaconda3/lib/python3.9/site-packages/numpy/core/fromnumeric.py Type: function
np.asarray([1,2,3]).repeat(3)
array([1, 1, 1, 2, 2, 2, 3, 3, 3])
a = np.random.normal(0, 1, (3,3))
a
array([[-1.26401894, 0.13314849, -0.81723518], [-0.35654833, 1.03394826, 0.05554441], [-0.22990499, 0.14560186, -0.26441533]])
a[0,1]
0.1331484866845464
a[0:2]
array([[-1.26401894, 0.13314849, -0.81723518], [-0.35654833, 1.03394826, 0.05554441]])
a[0:2,1]
array([0.13314849, 1.03394826])
a[0,1] = 1000
a
array([[-1.26401894e+00, 1.00000000e+03, -8.17235181e-01], [-3.56548330e-01, 1.03394826e+00, 5.55444060e-02], [-2.29904987e-01, 1.45601862e-01, -2.64415325e-01]])
a[0:3:2]
array([[-1.26401894e+00, 1.00000000e+03, -8.17235181e-01], [-2.29904987e-01, 1.45601862e-01, -2.64415325e-01]])
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
z = [21,21,21]
np.concatenate([x,y,z])
array([ 1, 2, 3, 3, 2, 1, 21, 21, 21])
a1 = np.ones((3,3))
a2 = np.random.normal(0, 1, (3,3))
a1
array([[1., 1., 1.], [1., 1., 1.], [1., 1., 1.]])
a2
array([[-0.48831842, -0.30504266, 0.82149508], [-1.71501757, -0.41102703, -0.38721744], [ 1.20687861, 0.0802707 , -1.83544937]])
np.concatenate([a1,a2])
array([[ 1. , 1. , 1. ], [ 1. , 1. , 1. ], [ 1. , 1. , 1. ], [-0.48831842, -0.30504266, 0.82149508], [-1.71501757, -0.41102703, -0.38721744], [ 1.20687861, 0.0802707 , -1.83544937]])
np.concatenate([a1,a2], axis=1)
array([[ 1. , 1. , 1. , -0.48831842, -0.30504266, 0.82149508], [ 1. , 1. , 1. , -1.71501757, -0.41102703, -0.38721744], [ 1. , 1. , 1. , 1.20687861, 0.0802707 , -1.83544937]])
a1 = np.array(range(10))
a1[0:3]
array([0, 1, 2])
a1
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
a1.shape
(10,)
a1 = a1.reshape((10,1))
a1.shape
(10, 1)
a1
array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]])
a1 = a1.reshape((1,10))
a1.shape
(1, 10)
a1
array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
a1.reshape((2,6))
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[39], line 1 ----> 1 a1.reshape((2,6)) ValueError: cannot reshape array of size 10 into shape (2,6)
a1 = a1.reshape((2,5))
a1
array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])
a2 = np.array(range(10,20)).reshape((2,5))
a2
array([[10, 11, 12, 13, 14], [15, 16, 17, 18, 19]])
a1
array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])
np.vstack([a1,a2])
array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14], [15, 16, 17, 18, 19]])
np.hstack([a1,a2])
array([[ 0, 1, 2, 3, 4, 10, 11, 12, 13, 14], [ 5, 6, 7, 8, 9, 15, 16, 17, 18, 19]])
做numpy和pandas操作时的一个原则:能用向量操作尽量用向量操作,速度最快。尽量避免写元素的循环
a1 + 1000
array([[1000, 1001, 1002, 1003, 1004], [1005, 1006, 1007, 1008, 1009]])
a1 * a2
array([[ 0, 11, 24, 39, 56], [ 75, 96, 119, 144, 171]])
list1 = [1,2,3] # some manipulations is not possible with raw python
list2 = [4,5,6]
list1 * list2
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[50], line 1 ----> 1 list1 * list2 TypeError: can't multiply sequence by non-int of type 'list'
list1 + list2
[1, 2, 3, 4, 5, 6]
x = np.array([[1, 2, 3], [4, 5, 6]])
x.ravel()
array([1, 2, 3, 4, 5, 6])
x.ravel(order='F')
array([1, 4, 2, 5, 3, 6])
a1
array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])
a1.max()
9
axis: 0,从上往下(跨行)。1,从左往右(跨列)
也适用于pandas
a1.max(axis=1)
array([4, 9])
a1.max(axis=0)
array([5, 6, 7, 8, 9])
a1.sum()
45
a1.sum(axis=0)
array([ 5, 7, 9, 11, 13])
a1.sum(axis=1)
array([10, 35])
a1.mean()
4.5
a1
array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])
a1.mean(axis=1)
array([2., 7.])
a1.mean(axis=0)
array([2.5, 3.5, 4.5, 5.5, 6.5])
Mask 把 array 的一部分“挡住”,来阻止其他的函数对其进行操作。这在有的时候挺有用
import numpy.ma as ma
x = np.array([1,2,3,-1,4])
mx = ma.masked_array(x, mask=[0,0,0,1,0])
mx
masked_array(data=[1, 2, 3, --, 4], mask=[False, False, False, True, False], fill_value=999999)
mx.mean()
2.5
# -999 as an invalid value, as sometimes seen in some databases
a = np.array([0,1,2,-999])
ma.masked_values(a, -999)
masked_array(data=[0, 1, 2, --], mask=[False, False, False, True], fill_value=-999)
df = pd.DataFrame({'name':[chr(i) for i in range(ord('a'),ord('z')+1)],
'value':range(26)}) # string.ascii_lowercase provides the alphabet too.
df
name | value | |
---|---|---|
0 | a | 0 |
1 | b | 1 |
2 | c | 2 |
3 | d | 3 |
4 | e | 4 |
5 | f | 5 |
6 | g | 6 |
7 | h | 7 |
8 | i | 8 |
9 | j | 9 |
10 | k | 10 |
11 | l | 11 |
12 | m | 12 |
13 | n | 13 |
14 | o | 14 |
15 | p | 15 |
16 | q | 16 |
17 | r | 17 |
18 | s | 18 |
19 | t | 19 |
20 | u | 20 |
21 | v | 21 |
22 | w | 22 |
23 | x | 23 |
24 | y | 24 |
25 | z | 25 |
data = [['tom', 10], ['nick', 15], ['juli', 14]]
df = pd.DataFrame(data, columns = ['Name', 'Age'], index=['a','b','c'])
df
Name | Age | |
---|---|---|
a | tom | 10 |
b | nick | 15 |
c | juli | 14 |
pandas._testing
¶import pandas._testing as tm
dir(tm)
['ALL_INT_EA_DTYPES', 'ALL_INT_NUMPY_DTYPES', 'ALL_NUMPY_DTYPES', 'ALL_REAL_NUMPY_DTYPES', 'BOOL_DTYPES', 'BYTES_DTYPES', 'BaseMaskedArray', 'COMPLEX_DTYPES', 'Callable', 'Categorical', 'CategoricalIndex', 'ContextManager', 'Counter', 'DATETIME64_DTYPES', 'DataFrame', 'DatetimeIndex', 'Decimal', 'Dtype', 'EMPTY_STRING_PATTERN', 'ENDIAN', 'ExtensionArray', 'FLOAT_EA_DTYPES', 'FLOAT_NUMPY_DTYPES', 'Float64Index', 'Index', 'Int64Index', 'IntervalIndex', 'Iterable', 'MultiIndex', 'NARROW_NP_DTYPES', 'NDArrayBackedExtensionArray', 'NP_NAT_OBJECTS', 'NULL_OBJECTS', 'NumericIndex', 'OBJECT_DTYPES', 'PandasArray', 'RNGContext', 'RangeIndex', 'SIGNED_INT_EA_DTYPES', 'SIGNED_INT_NUMPY_DTYPES', 'STRING_DTYPES', 'Series', 'SubclassedCategorical', 'SubclassedDataFrame', 'SubclassedSeries', 'TIMEDELTA64_DTYPES', 'TYPE_CHECKING', 'UInt64Index', 'UNSIGNED_INT_EA_DTYPES', 'UNSIGNED_INT_NUMPY_DTYPES', '_K', '_N', '__all__', '__annotations__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '_create_missing_idx', '_io', '_make_skipna_wrapper', '_random', '_testing_mode_warnings', '_warnings', 'all_timeseries_index_generator', 'annotations', 'assert_almost_equal', 'assert_attr_equal', 'assert_categorical_equal', 'assert_class_equal', 'assert_contains_all', 'assert_copy', 'assert_datetime_array_equal', 'assert_dict_equal', 'assert_equal', 'assert_extension_array_equal', 'assert_frame_equal', 'assert_index_equal', 'assert_indexing_slices_equivalent', 'assert_interval_array_equal', 'assert_is_sorted', 'assert_is_valid_plot_return_object', 'assert_metadata_equivalent', 'assert_numpy_array_equal', 'assert_period_array_equal', 'assert_produces_warning', 'assert_series_equal', 'assert_sp_array_equal', 'assert_timedelta_array_equal', 'asserters', 'at', 'bdate_range', 'box_expected', 'byteorder', 'can_set_locale', 'close', 'collections', 'compat', 'contexts', 'convert_rows_list_to_csv_str', 'cython_table', 'datetime', 'decompress_file', 'ensure_clean', 'ensure_clean_dir', 'ensure_safe_environment_variables', 'equalContents', 'external_error_raised', 'extract_array', 'getCols', 'getMixedTypeDict', 'getPeriodData', 'getSeriesData', 'getTimeSeriesData', 'get_cython_table_params', 'get_dtype', 'get_locales', 'get_obj', 'get_op_from_name', 'getitem', 'iat', 'iloc', 'index_subclass_makers_generator', 'is_float_dtype', 'is_integer_dtype', 'is_sequence', 'is_unsigned_integer_dtype', 'loc', 'makeBoolIndex', 'makeCategoricalIndex', 'makeCustomDataframe', 'makeCustomIndex', 'makeDataFrame', 'makeDateIndex', 'makeFloatIndex', 'makeFloatSeries', 'makeIntIndex', 'makeIntervalIndex', 'makeMissingDataframe', 'makeMixedDataFrame', 'makeMultiIndex', 'makeNumericIndex', 'makeObjectSeries', 'makePeriodFrame', 'makePeriodIndex', 'makePeriodSeries', 'makeRangeIndex', 'makeStringIndex', 'makeStringSeries', 'makeTimeDataFrame', 'makeTimeSeries', 'makeTimedeltaIndex', 'makeUIntIndex', 'make_rand_series', 'maybe_produces_warning', 'network', 'np', 'operator', 'os', 'pa_version_under1p01', 'pandas_dtype', 'pd', 'raise_assert_detail', 'randbool', 'rands', 'rands_array', 're', 'reset_display_options', 'reset_testing_mode', 'round_trip_localpath', 'round_trip_pathlib', 'round_trip_pickle', 'set_locale', 'set_testing_mode', 'set_timezone', 'setitem', 'shares_memory', 'string', 'to_array', 'use_numexpr', 'warnings', 'with_csv_dialect', 'write_to_compressed']
tm.makeDataFrame()
A | B | C | D | |
---|---|---|---|---|
cZJYRFBjsZ | 1.017466 | 0.408885 | 0.322214 | 0.054719 |
lLVrbjR6Vr | 0.033257 | 0.616884 | -1.040222 | -1.780187 |
eLUp4PALTt | 2.656438 | 0.640599 | 0.261727 | -0.520845 |
BzoGUejyQV | -0.828406 | 0.037566 | -0.536611 | 0.292041 |
UfVsUDv70f | -0.563905 | -0.634545 | 0.429127 | 0.117443 |
ZCvtoS4VLQ | 0.455937 | -1.760947 | 0.924035 | -1.096260 |
ZCnKayvc0L | 1.224438 | -0.215123 | -0.083788 | -0.436338 |
trYE7TVe9q | -0.316089 | 0.210430 | 1.309266 | -0.360450 |
0BjFKt4A4R | 0.878868 | 0.925385 | -1.945032 | 0.580221 |
g5R8etUU47 | -1.324358 | 0.879758 | -0.075207 | -1.033363 |
WyUH5ihfxA | -0.505695 | -0.562268 | -1.301524 | -2.136327 |
lgaFDVh71E | -2.878787 | -2.156321 | -0.753986 | 1.287103 |
MKe0DW9Aco | 0.875395 | 0.718449 | 0.098643 | 1.016631 |
Fu5VORq3HB | 0.196059 | -1.715751 | 0.377394 | -0.425211 |
4awpGNwhCr | -1.430431 | 1.107052 | 0.351579 | 0.038333 |
abWuxFTgak | 1.134595 | -0.654446 | -1.780598 | 0.475640 |
LdGjWdsrap | 1.052956 | -0.321872 | 1.515367 | 0.178291 |
haSxy2J3H7 | 0.382825 | 1.793814 | 0.452140 | -0.396571 |
QKkLEY5HdO | -1.091404 | 1.648727 | 1.189487 | 0.734657 |
Na4icA12zD | 1.463891 | 0.415739 | 1.857051 | -0.437692 |
Xrx5Xz8pHm | 1.010493 | 0.532272 | 0.981934 | -1.179908 |
eXHd43VLjJ | 0.301460 | -0.344703 | 0.725504 | -0.505409 |
i0bUrWpwuC | 0.818570 | 1.907338 | 1.997379 | -0.226253 |
jBFWDXzj9J | 1.070393 | 0.019434 | -2.291473 | 0.049770 |
eoQ1Xto9xt | 1.038056 | 0.386739 | -1.145852 | -0.691260 |
MX1ffAFzk5 | 1.050974 | -0.011987 | 1.075066 | 0.700000 |
v1bcFWTZHT | 1.132417 | -0.014813 | 0.277577 | 0.020498 |
SrJSnSQKf6 | 2.308104 | 0.008408 | -0.900696 | 0.515424 |
uH7sGOyWjP | -0.144934 | -0.138548 | 0.331670 | -0.315515 |
dHDDaETSMu | 0.352889 | -0.455680 | 1.487833 | 1.291541 |
tm.makeMixedDataFrame()
A | B | C | D | |
---|---|---|---|---|
0 | 0.0 | 0.0 | foo1 | 2009-01-01 |
1 | 1.0 | 1.0 | foo2 | 2009-01-02 |
2 | 2.0 | 0.0 | foo3 | 2009-01-05 |
3 | 3.0 | 1.0 | foo4 | 2009-01-06 |
4 | 4.0 | 0.0 | foo5 | 2009-01-07 |
df1 = tm.makeTimeDataFrame()
df1
A | B | C | D | |
---|---|---|---|---|
2000-01-03 | -0.256639 | -0.360026 | 0.781808 | 2.365876 |
2000-01-04 | -0.441481 | -0.416982 | -0.085862 | 1.215792 |
2000-01-05 | 0.687317 | -0.840725 | 0.368144 | -0.604941 |
2000-01-06 | 0.524671 | 0.562214 | 1.348964 | -0.017417 |
2000-01-07 | 1.436570 | -1.263538 | -0.035792 | 0.671226 |
2000-01-10 | 0.047872 | 0.716265 | 0.640712 | -1.106825 |
2000-01-11 | 0.930656 | -1.086892 | -0.519984 | -0.337252 |
2000-01-12 | 0.907508 | -0.841800 | 1.025432 | -0.862827 |
2000-01-13 | 0.539813 | 1.520450 | 0.024358 | 1.493062 |
2000-01-14 | 0.157760 | -0.282024 | -0.499526 | -0.562860 |
2000-01-17 | 0.309461 | -2.399618 | -0.353253 | 1.401097 |
2000-01-18 | 0.010488 | -0.879997 | -0.551815 | -0.522194 |
2000-01-19 | 1.299948 | 0.264738 | -0.883126 | 0.356829 |
2000-01-20 | -1.287793 | -0.818199 | -1.101668 | -0.558555 |
2000-01-21 | -0.100412 | 0.177467 | 1.180741 | 0.013179 |
2000-01-24 | 0.404562 | 0.633419 | -1.084324 | 0.124110 |
2000-01-25 | 0.126925 | 0.241513 | -0.769587 | -1.072740 |
2000-01-26 | -0.960476 | 0.935380 | -0.443059 | -0.887785 |
2000-01-27 | 2.786106 | -1.733085 | -0.935420 | 1.842912 |
2000-01-28 | 1.691881 | 0.806902 | -0.520169 | -1.187983 |
2000-01-31 | 0.219999 | -0.523229 | 0.483812 | -1.892425 |
2000-02-01 | -1.021874 | -1.009266 | -1.116588 | -0.104468 |
2000-02-02 | -0.166256 | 0.266004 | -0.616978 | -0.215507 |
2000-02-03 | -1.855476 | 0.295577 | 1.180183 | -0.084075 |
2000-02-04 | -0.538952 | -0.350280 | 0.007614 | -0.075128 |
2000-02-07 | -0.902403 | 0.642015 | 1.659144 | -1.720960 |
2000-02-08 | -0.105402 | 1.135322 | -1.378690 | -2.452588 |
2000-02-09 | -0.935287 | 0.179174 | 0.662516 | 0.180056 |
2000-02-10 | 1.261172 | -1.363582 | 2.425621 | -0.272038 |
2000-02-11 | 0.869008 | -2.175696 | 1.096948 | 0.191190 |
df2 = tm.makePeriodFrame()
df2
A | B | C | D | |
---|---|---|---|---|
2000-01-03 | -1.639489 | 0.871156 | 0.146877 | -1.838182 |
2000-01-04 | 2.045404 | -2.275963 | -2.061878 | -0.209553 |
2000-01-05 | 0.211661 | -0.511191 | 0.584097 | -0.129640 |
2000-01-06 | -0.455644 | -0.611017 | -1.114282 | 0.336684 |
2000-01-07 | 0.637326 | 0.002655 | 1.198229 | 0.026967 |
2000-01-10 | -1.052640 | -1.227736 | 0.296380 | -1.068391 |
2000-01-11 | -1.805982 | 0.129988 | 0.302069 | 0.149396 |
2000-01-12 | -0.800961 | 1.510763 | 1.023772 | 0.485134 |
2000-01-13 | -0.771532 | 0.966103 | 0.538022 | 0.698567 |
2000-01-14 | 0.430516 | -1.960807 | -0.256653 | -0.808380 |
2000-01-17 | 1.768485 | 0.076938 | -0.182843 | -0.487593 |
2000-01-18 | -0.152704 | 1.178684 | 1.561573 | 1.296492 |
2000-01-19 | 0.457053 | 0.015916 | 0.216055 | -0.940051 |
2000-01-20 | -0.097853 | 1.026431 | -0.201932 | 0.009988 |
2000-01-21 | -0.299978 | 0.980998 | -0.674989 | 0.110689 |
2000-01-24 | -0.043211 | -0.917959 | -0.356359 | 0.704586 |
2000-01-25 | 1.284588 | 1.653408 | -1.578361 | 0.569167 |
2000-01-26 | -0.826600 | 0.007350 | -1.336029 | 1.181770 |
2000-01-27 | 0.035391 | -1.108028 | 1.219294 | 0.131281 |
2000-01-28 | 0.477427 | -1.605746 | -1.041161 | -1.128379 |
2000-01-31 | -0.955470 | -0.491113 | -0.738858 | 0.276953 |
2000-02-01 | 0.259385 | 0.335096 | -0.485912 | -0.007712 |
2000-02-02 | -0.237599 | -1.375179 | -1.198458 | -0.662935 |
2000-02-03 | -0.598414 | -0.261143 | 0.173009 | -0.541031 |
2000-02-04 | -1.290128 | -1.422936 | -2.372973 | 0.367791 |
2000-02-07 | -0.716938 | -1.103327 | -0.906131 | 0.756418 |
2000-02-08 | 0.092658 | 0.755122 | 0.156270 | -0.231200 |
2000-02-09 | -0.860367 | 0.696204 | 0.346518 | -0.347244 |
2000-02-10 | -0.819556 | -0.249229 | 0.136163 | 0.666071 |
2000-02-11 | -0.133716 | 1.038838 | -2.515921 | 1.147261 |
df1.index
DatetimeIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11'], dtype='datetime64[ns]', freq='B')
df1.index.day_of_week
Int64Index([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype='int64')
df1.index.shift(periods=1) # 时间序列index的shift,会按照指定的period或freq平移。注意:如果没有时间序列,则直接平移,见df1['A']例
DatetimeIndex(['2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11', '2000-02-14'], dtype='datetime64[ns]', freq='B')
df1.index.shift(freq='D')
DatetimeIndex(['2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-15', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-22', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-29', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-05', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11', '2000-02-12'], dtype='datetime64[ns]', freq=None)
df1.index.shift(freq='2D')
DatetimeIndex(['2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08', '2000-01-09', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-15', '2000-01-16', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-22', '2000-01-23', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-29', '2000-01-30', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-05', '2000-02-06', '2000-02-09', '2000-02-10', '2000-02-11', '2000-02-12', '2000-02-13'], dtype='datetime64[ns]', freq=None)
df1
A | B | C | D | |
---|---|---|---|---|
2000-01-03 | -0.256639 | -0.360026 | 0.781808 | 2.365876 |
2000-01-04 | -0.441481 | -0.416982 | -0.085862 | 1.215792 |
2000-01-05 | 0.687317 | -0.840725 | 0.368144 | -0.604941 |
2000-01-06 | 0.524671 | 0.562214 | 1.348964 | -0.017417 |
2000-01-07 | 1.436570 | -1.263538 | -0.035792 | 0.671226 |
2000-01-10 | 0.047872 | 0.716265 | 0.640712 | -1.106825 |
2000-01-11 | 0.930656 | -1.086892 | -0.519984 | -0.337252 |
2000-01-12 | 0.907508 | -0.841800 | 1.025432 | -0.862827 |
2000-01-13 | 0.539813 | 1.520450 | 0.024358 | 1.493062 |
2000-01-14 | 0.157760 | -0.282024 | -0.499526 | -0.562860 |
2000-01-17 | 0.309461 | -2.399618 | -0.353253 | 1.401097 |
2000-01-18 | 0.010488 | -0.879997 | -0.551815 | -0.522194 |
2000-01-19 | 1.299948 | 0.264738 | -0.883126 | 0.356829 |
2000-01-20 | -1.287793 | -0.818199 | -1.101668 | -0.558555 |
2000-01-21 | -0.100412 | 0.177467 | 1.180741 | 0.013179 |
2000-01-24 | 0.404562 | 0.633419 | -1.084324 | 0.124110 |
2000-01-25 | 0.126925 | 0.241513 | -0.769587 | -1.072740 |
2000-01-26 | -0.960476 | 0.935380 | -0.443059 | -0.887785 |
2000-01-27 | 2.786106 | -1.733085 | -0.935420 | 1.842912 |
2000-01-28 | 1.691881 | 0.806902 | -0.520169 | -1.187983 |
2000-01-31 | 0.219999 | -0.523229 | 0.483812 | -1.892425 |
2000-02-01 | -1.021874 | -1.009266 | -1.116588 | -0.104468 |
2000-02-02 | -0.166256 | 0.266004 | -0.616978 | -0.215507 |
2000-02-03 | -1.855476 | 0.295577 | 1.180183 | -0.084075 |
2000-02-04 | -0.538952 | -0.350280 | 0.007614 | -0.075128 |
2000-02-07 | -0.902403 | 0.642015 | 1.659144 | -1.720960 |
2000-02-08 | -0.105402 | 1.135322 | -1.378690 | -2.452588 |
2000-02-09 | -0.935287 | 0.179174 | 0.662516 | 0.180056 |
2000-02-10 | 1.261172 | -1.363582 | 2.425621 | -0.272038 |
2000-02-11 | 0.869008 | -2.175696 | 1.096948 | 0.191190 |
df1['A']
2000-01-03 -0.256639 2000-01-04 -0.441481 2000-01-05 0.687317 2000-01-06 0.524671 2000-01-07 1.436570 2000-01-10 0.047872 2000-01-11 0.930656 2000-01-12 0.907508 2000-01-13 0.539813 2000-01-14 0.157760 2000-01-17 0.309461 2000-01-18 0.010488 2000-01-19 1.299948 2000-01-20 -1.287793 2000-01-21 -0.100412 2000-01-24 0.404562 2000-01-25 0.126925 2000-01-26 -0.960476 2000-01-27 2.786106 2000-01-28 1.691881 2000-01-31 0.219999 2000-02-01 -1.021874 2000-02-02 -0.166256 2000-02-03 -1.855476 2000-02-04 -0.538952 2000-02-07 -0.902403 2000-02-08 -0.105402 2000-02-09 -0.935287 2000-02-10 1.261172 2000-02-11 0.869008 Freq: B, Name: A, dtype: float64
df1['A'].shift()
2000-01-03 NaN 2000-01-04 -0.256639 2000-01-05 -0.441481 2000-01-06 0.687317 2000-01-07 0.524671 2000-01-10 1.436570 2000-01-11 0.047872 2000-01-12 0.930656 2000-01-13 0.907508 2000-01-14 0.539813 2000-01-17 0.157760 2000-01-18 0.309461 2000-01-19 0.010488 2000-01-20 1.299948 2000-01-21 -1.287793 2000-01-24 -0.100412 2000-01-25 0.404562 2000-01-26 0.126925 2000-01-27 -0.960476 2000-01-28 2.786106 2000-01-31 1.691881 2000-02-01 0.219999 2000-02-02 -1.021874 2000-02-03 -0.166256 2000-02-04 -1.855476 2000-02-07 -0.538952 2000-02-08 -0.902403 2000-02-09 -0.105402 2000-02-10 -0.935287 2000-02-11 1.261172 Freq: B, Name: A, dtype: float64
a = df1.index.shift() - df1.index
a
TimedeltaIndex(['1 days', '1 days', '1 days', '1 days', '3 days', '1 days', '1 days', '1 days', '1 days', '3 days', '1 days', '1 days', '1 days', '1 days', '3 days', '1 days', '1 days', '1 days', '1 days', '3 days', '1 days', '1 days', '1 days', '1 days', '3 days', '1 days', '1 days', '1 days', '1 days', '3 days'], dtype='timedelta64[ns]', freq=None)
df1.index.values # index中内部的保存数据类型。包装成index后有对应于index的其他操作
array(['2000-01-03T00:00:00.000000000', '2000-01-04T00:00:00.000000000', '2000-01-05T00:00:00.000000000', '2000-01-06T00:00:00.000000000', '2000-01-07T00:00:00.000000000', '2000-01-10T00:00:00.000000000', '2000-01-11T00:00:00.000000000', '2000-01-12T00:00:00.000000000', '2000-01-13T00:00:00.000000000', '2000-01-14T00:00:00.000000000', '2000-01-17T00:00:00.000000000', '2000-01-18T00:00:00.000000000', '2000-01-19T00:00:00.000000000', '2000-01-20T00:00:00.000000000', '2000-01-21T00:00:00.000000000', '2000-01-24T00:00:00.000000000', '2000-01-25T00:00:00.000000000', '2000-01-26T00:00:00.000000000', '2000-01-27T00:00:00.000000000', '2000-01-28T00:00:00.000000000', '2000-01-31T00:00:00.000000000', '2000-02-01T00:00:00.000000000', '2000-02-02T00:00:00.000000000', '2000-02-03T00:00:00.000000000', '2000-02-04T00:00:00.000000000', '2000-02-07T00:00:00.000000000', '2000-02-08T00:00:00.000000000', '2000-02-09T00:00:00.000000000', '2000-02-10T00:00:00.000000000', '2000-02-11T00:00:00.000000000'], dtype='datetime64[ns]')
df1.index.values.to_period()
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Cell In[90], line 1 ----> 1 df1.index.values.to_period() AttributeError: 'numpy.ndarray' object has no attribute 'to_period'
df1.index.to_period()
PeriodIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11'], dtype='period[B]')
df2
A | B | C | D | |
---|---|---|---|---|
2000-01-03 | -1.639489 | 0.871156 | 0.146877 | -1.838182 |
2000-01-04 | 2.045404 | -2.275963 | -2.061878 | -0.209553 |
2000-01-05 | 0.211661 | -0.511191 | 0.584097 | -0.129640 |
2000-01-06 | -0.455644 | -0.611017 | -1.114282 | 0.336684 |
2000-01-07 | 0.637326 | 0.002655 | 1.198229 | 0.026967 |
2000-01-10 | -1.052640 | -1.227736 | 0.296380 | -1.068391 |
2000-01-11 | -1.805982 | 0.129988 | 0.302069 | 0.149396 |
2000-01-12 | -0.800961 | 1.510763 | 1.023772 | 0.485134 |
2000-01-13 | -0.771532 | 0.966103 | 0.538022 | 0.698567 |
2000-01-14 | 0.430516 | -1.960807 | -0.256653 | -0.808380 |
2000-01-17 | 1.768485 | 0.076938 | -0.182843 | -0.487593 |
2000-01-18 | -0.152704 | 1.178684 | 1.561573 | 1.296492 |
2000-01-19 | 0.457053 | 0.015916 | 0.216055 | -0.940051 |
2000-01-20 | -0.097853 | 1.026431 | -0.201932 | 0.009988 |
2000-01-21 | -0.299978 | 0.980998 | -0.674989 | 0.110689 |
2000-01-24 | -0.043211 | -0.917959 | -0.356359 | 0.704586 |
2000-01-25 | 1.284588 | 1.653408 | -1.578361 | 0.569167 |
2000-01-26 | -0.826600 | 0.007350 | -1.336029 | 1.181770 |
2000-01-27 | 0.035391 | -1.108028 | 1.219294 | 0.131281 |
2000-01-28 | 0.477427 | -1.605746 | -1.041161 | -1.128379 |
2000-01-31 | -0.955470 | -0.491113 | -0.738858 | 0.276953 |
2000-02-01 | 0.259385 | 0.335096 | -0.485912 | -0.007712 |
2000-02-02 | -0.237599 | -1.375179 | -1.198458 | -0.662935 |
2000-02-03 | -0.598414 | -0.261143 | 0.173009 | -0.541031 |
2000-02-04 | -1.290128 | -1.422936 | -2.372973 | 0.367791 |
2000-02-07 | -0.716938 | -1.103327 | -0.906131 | 0.756418 |
2000-02-08 | 0.092658 | 0.755122 | 0.156270 | -0.231200 |
2000-02-09 | -0.860367 | 0.696204 | 0.346518 | -0.347244 |
2000-02-10 | -0.819556 | -0.249229 | 0.136163 | 0.666071 |
2000-02-11 | -0.133716 | 1.038838 | -2.515921 | 1.147261 |
df1.index
DatetimeIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11'], dtype='datetime64[ns]', freq='B')
df2.index
PeriodIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11'], dtype='period[B]')
df2.index.values
array([Period('2000-01-03', 'B'), Period('2000-01-04', 'B'), Period('2000-01-05', 'B'), Period('2000-01-06', 'B'), Period('2000-01-07', 'B'), Period('2000-01-10', 'B'), Period('2000-01-11', 'B'), Period('2000-01-12', 'B'), Period('2000-01-13', 'B'), Period('2000-01-14', 'B'), Period('2000-01-17', 'B'), Period('2000-01-18', 'B'), Period('2000-01-19', 'B'), Period('2000-01-20', 'B'), Period('2000-01-21', 'B'), Period('2000-01-24', 'B'), Period('2000-01-25', 'B'), Period('2000-01-26', 'B'), Period('2000-01-27', 'B'), Period('2000-01-28', 'B'), Period('2000-01-31', 'B'), Period('2000-02-01', 'B'), Period('2000-02-02', 'B'), Period('2000-02-03', 'B'), Period('2000-02-04', 'B'), Period('2000-02-07', 'B'), Period('2000-02-08', 'B'), Period('2000-02-09', 'B'), Period('2000-02-10', 'B'), Period('2000-02-11', 'B')], dtype=object)
df2.index.week
Int64Index([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6], dtype='int64')
df2.index.day_of_week
Int64Index([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype='int64')
df2.index.weekofyear
Int64Index([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6], dtype='int64')
df2.index.to_timestamp()
DatetimeIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11'], dtype='datetime64[ns]', freq='B')
df2.index.shift() - df2.index
Index([<BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>, <BusinessDay>], dtype='object')
df2.index[1:].append(pd.Index([np.nan])) # 因为shift直接平移日期。若需直接平移数值,可用这个替代。
PeriodIndex(['2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11', 'NaT'], dtype='period[B]')
df2.index[1:].append(pd.Index([np.nan])).asfreq('D') - df2.index.asfreq('D')
Index([ <Day>, <Day>, <Day>, <Day>, <3 * Days>, <Day>, <Day>, <Day>, <Day>, <3 * Days>, <Day>, <Day>, <Day>, <Day>, <3 * Days>, <Day>, <Day>, <Day>, <Day>, <3 * Days>, <Day>, <Day>, <Day>, <Day>, <3 * Days>, <Day>, <Day>, <Day>, <Day>, NaT], dtype='object')
df = pd.read_pickle('../../../data/stk_df_2015_2021.pkl')
df
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-27 | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-28 | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-29 | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-30 | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-31 | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 12 columns
display(df)
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-27 | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-28 | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-29 | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-30 | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-31 | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 12 columns
temp = pd.read_csv('../../../data/idx_df_csv.csv',dtype={'ticker':str})
temp.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1365 entries, 0 to 1364 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ticker 1365 non-null object 1 secShortName 1365 non-null object 2 tradeDate 1365 non-null object 3 preCloseIndex 1365 non-null float64 4 openIndex 1365 non-null float64 5 highestIndex 1365 non-null float64 6 lowestIndex 1365 non-null float64 7 closeIndex 1365 non-null float64 8 turnoverVol 1365 non-null int64 dtypes: float64(5), int64(1), object(3) memory usage: 96.1+ KB
temp
ticker | secShortName | tradeDate | preCloseIndex | openIndex | highestIndex | lowestIndex | closeIndex | turnoverVol | |
---|---|---|---|---|---|---|---|---|---|
0 | 000001 | 上证综指 | 2021-01-04 | 3473.0693 | 3474.6793 | 3511.6554 | 3457.2061 | 3502.9584 | 38079080000 |
1 | 000001 | 上证综指 | 2021-01-05 | 3502.9584 | 3492.1912 | 3528.6767 | 3484.7151 | 3528.6767 | 40799593400 |
2 | 000001 | 上证综指 | 2021-01-06 | 3528.6767 | 3530.9072 | 3556.8022 | 3513.1262 | 3550.8767 | 37023092600 |
3 | 000001 | 上证综指 | 2021-01-07 | 3550.8767 | 3552.9087 | 3576.2046 | 3526.6174 | 3576.2046 | 40534822600 |
4 | 000001 | 上证综指 | 2021-01-08 | 3576.2046 | 3577.6923 | 3588.0625 | 3544.8912 | 3570.1082 | 34555789600 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1360 | 399106 | 深证综指 | 2022-02-15 | 2253.1310 | 2256.8670 | 2283.6760 | 2251.3000 | 2283.6300 | 37139160803 |
1361 | 399106 | 深证综指 | 2022-02-16 | 2283.6300 | 2293.0800 | 2304.2800 | 2285.6750 | 2296.9930 | 37530877510 |
1362 | 399106 | 深证综指 | 2022-02-17 | 2296.9930 | 2293.4480 | 2314.8270 | 2287.8960 | 2302.1620 | 42928561256 |
1363 | 399106 | 深证综指 | 2022-02-18 | 2302.1620 | 2287.2640 | 2311.8220 | 2284.1220 | 2311.7900 | 38837049683 |
1364 | 399106 | 深证综指 | 2022-02-21 | 2311.7900 | 2313.5060 | 2325.9670 | 2309.7340 | 2325.7960 | 44801214227 |
1365 rows × 9 columns
# 如果读取csv或其他类型的文件, 可能会遇到一些格式转换的问题
pd.read_csv('../../../data/idx_df_csv.csv')
ticker | secShortName | tradeDate | preCloseIndex | openIndex | highestIndex | lowestIndex | closeIndex | turnoverVol | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 上证综指 | 2021-01-04 | 3473.0693 | 3474.6793 | 3511.6554 | 3457.2061 | 3502.9584 | 38079080000 |
1 | 1 | 上证综指 | 2021-01-05 | 3502.9584 | 3492.1912 | 3528.6767 | 3484.7151 | 3528.6767 | 40799593400 |
2 | 1 | 上证综指 | 2021-01-06 | 3528.6767 | 3530.9072 | 3556.8022 | 3513.1262 | 3550.8767 | 37023092600 |
3 | 1 | 上证综指 | 2021-01-07 | 3550.8767 | 3552.9087 | 3576.2046 | 3526.6174 | 3576.2046 | 40534822600 |
4 | 1 | 上证综指 | 2021-01-08 | 3576.2046 | 3577.6923 | 3588.0625 | 3544.8912 | 3570.1082 | 34555789600 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1360 | 399106 | 深证综指 | 2022-02-15 | 2253.1310 | 2256.8670 | 2283.6760 | 2251.3000 | 2283.6300 | 37139160803 |
1361 | 399106 | 深证综指 | 2022-02-16 | 2283.6300 | 2293.0800 | 2304.2800 | 2285.6750 | 2296.9930 | 37530877510 |
1362 | 399106 | 深证综指 | 2022-02-17 | 2296.9930 | 2293.4480 | 2314.8270 | 2287.8960 | 2302.1620 | 42928561256 |
1363 | 399106 | 深证综指 | 2022-02-18 | 2302.1620 | 2287.2640 | 2311.8220 | 2284.1220 | 2311.7900 | 38837049683 |
1364 | 399106 | 深证综指 | 2022-02-21 | 2311.7900 | 2313.5060 | 2325.9670 | 2309.7340 | 2325.7960 | 44801214227 |
1365 rows × 9 columns
df.head(10)
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
5 | 000001.XSHE | 平安银行 | XSHE | 2015-01-12 | 1231.004 | 1205.698 | 2696688 | 2.293105e+09 | 64376.0 | 0.0158 | 1.452882e+11 | 1.687457e+11 |
6 | 000001.XSHE | 平安银行 | XSHE | 2015-01-13 | 1205.698 | 1198.351 | 1418187 | 1.204987e+09 | 34924.0 | 0.0083 | 1.444029e+11 | 1.677175e+11 |
7 | 000001.XSHE | 平安银行 | XSHE | 2015-01-14 | 1198.351 | 1208.963 | 2192762 | 1.889297e+09 | 50675.0 | 0.0128 | 1.456817e+11 | 1.692027e+11 |
8 | 000001.XSHE | 平安银行 | XSHE | 2015-01-15 | 1208.963 | 1253.044 | 2156548 | 1.868796e+09 | 54470.0 | 0.0126 | 1.509935e+11 | 1.753721e+11 |
9 | 000001.XSHE | 平安银行 | XSHE | 2015-01-16 | 1253.044 | 1254.677 | 2701125 | 2.403346e+09 | 60637.0 | 0.0158 | 1.511903e+11 | 1.756006e+11 |
df.tail()
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
5838745 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-27 | 0.625 | 0.638 | 488511 | 308154.0 | 226.0 | 0.0027 | 116472000.0 | 220917000.0 |
5838746 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-28 | 0.638 | 0.637 | 177702 | 111881.0 | 64.0 | 0.0010 | 116288000.0 | 220568000.0 |
5838747 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-29 | 0.637 | 0.630 | 123550 | 77333.0 | 58.0 | 0.0007 | 115000000.0 | 218125000.0 |
5838748 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-30 | 0.630 | 0.635 | 113600 | 71308.0 | 41.0 | 0.0006 | 115920000.0 | 219870000.0 |
5838749 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-31 | 0.635 | 0.636 | 167800 | 105996.0 | 74.0 | 0.0009 | 116104000.0 | 220219000.0 |
df.describe()
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
count | 5.838750e+06 | 5.838750e+06 | 5.838750e+06 | 5.838750e+06 | 5.820845e+06 | 5.838750e+06 | 5.838750e+06 | 5.838750e+06 |
mean | 1.368568e+02 | 1.368763e+02 | 5.525219e+06 | 2.008992e+08 | 1.078089e+04 | 2.986831e-02 | 1.324705e+10 | 1.835989e+10 |
std | 2.086103e+03 | 2.085805e+03 | 2.405725e+07 | 4.866879e+08 | 1.874182e+04 | 4.649860e-02 | 5.686519e+10 | 7.522905e+10 |
min | 3.800000e-02 | 3.800000e-02 | 2.000000e+00 | 1.800000e+01 | 1.000000e+00 | 0.000000e+00 | 3.800000e+06 | 1.045000e+07 |
25% | 2.167700e+01 | 2.168600e+01 | 8.454962e+05 | 2.916335e+07 | 2.565000e+03 | 6.900000e-03 | 2.203200e+09 | 3.527868e+09 |
50% | 3.972300e+01 | 3.973600e+01 | 1.864330e+06 | 7.414118e+07 | 5.410000e+03 | 1.510000e-02 | 4.304763e+09 | 6.153206e+09 |
75% | 7.390000e+01 | 7.392400e+01 | 4.317285e+06 | 1.915373e+08 | 1.179500e+04 | 3.380000e-02 | 9.130280e+09 | 1.286893e+10 |
max | 4.024782e+05 | 4.024782e+05 | 5.109897e+09 | 6.794128e+10 | 3.274456e+06 | 9.896000e-01 | 3.267370e+12 | 3.267370e+12 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5838750 entries, 0 to 5838749 Data columns (total 12 columns): # Column Dtype --- ------ ----- 0 secID object 1 secShortName object 2 exchangeCD object 3 tradeDate object 4 preClosePrice float64 5 closePrice float64 6 turnoverVol int64 7 turnoverValue float64 8 dealAmount float64 9 turnoverRate float64 10 negMarketValue float64 11 marketValue float64 dtypes: float64(7), int64(1), object(4) memory usage: 534.6+ MB
df['secShortName'].nunique() # 多少只股票
4853
df['turnoverValue'].mean()
200899200.96656865
df.select_dtypes(include=np.number)
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
0 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 8 columns
df.select_dtypes(include=np.number).mean()
preClosePrice 1.368568e+02 closePrice 1.368763e+02 turnoverVol 5.525219e+06 turnoverValue 2.008992e+08 dealAmount 1.078089e+04 turnoverRate 2.986831e-02 negMarketValue 1.324705e+10 marketValue 1.835989e+10 dtype: float64
df.select_dtypes(include=np.number).mean(axis=1)
0 4.314767e+10 1 4.237068e+10 2 4.147094e+10 3 4.002552e+10 4 4.055811e+10 ... 5838745 4.227324e+07 5838746 4.214321e+07 5838747 4.166574e+07 5838748 4.199687e+07 5838749 4.207461e+07 Length: 5838750, dtype: float64
df['secShortName']=='平安银行'
0 True 1 True 2 True 3 True 4 True ... 5838745 False 5838746 False 5838747 False 5838748 False 5838749 False Name: secShortName, Length: 5838750, dtype: bool
df_pingan = df[df['secShortName']=='平安银行'].copy()
df_pingan
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1700 | 000001.XSHE | 平安银行 | XSHE | 2021-12-27 | 2224.866 | 2213.298 | 881462 | 1.260455e+09 | 66911.0 | 0.0038 | 3.341671e+11 | 3.341699e+11 |
1701 | 000001.XSHE | 平安银行 | XSHE | 2021-12-28 | 2213.298 | 2206.872 | 1358314 | 1.934461e+09 | 89671.0 | 0.0058 | 3.331968e+11 | 3.331996e+11 |
1702 | 000001.XSHE | 平安银行 | XSHE | 2021-12-29 | 2206.872 | 2152.889 | 1771527 | 2.480535e+09 | 147243.0 | 0.0076 | 3.250464e+11 | 3.250491e+11 |
1703 | 000001.XSHE | 平安银行 | XSHE | 2021-12-30 | 2152.889 | 2161.886 | 960485 | 1.342374e+09 | 66792.0 | 0.0041 | 3.264048e+11 | 3.264075e+11 |
1704 | 000001.XSHE | 平安银行 | XSHE | 2021-12-31 | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
1705 rows × 12 columns
object
和str
有什么区别:
一些细节:numpy 中允许 object 和 str 两种字符串的格式。但pandas没有采用numpy的这种格式,而是用原生python的字符串格式。numpy的格式更为复杂。例:(来自https://stackoverflow.com/questions/34881079/pandas-distinction-between-str-and-object-types)
x = np.array(['Testing', 'a', 'string'], dtype='|S7') # 长度为7的字符串。|是byteorder指示符
y = np.array(['Testing', 'a', 'string'], dtype=object)
x[1] = 'a really really really long string'
x
array([b'Testing', b'a reall', b'string'], dtype='|S7')
y[1] = 'a really really really long string'
y
array(['Testing', 'a really really really long string', 'string'], dtype=object)
pandas中不允许用固定长度的str
temp = pd.DataFrame({'a':range(5)})
temp['a'].dtype
dtype('int64')
temp['a'].astype(object).dtype
dtype('O')
temp['a'].astype(str).dtype
dtype('O')
temp = tm.makeTimeDataFrame(3)
display(temp)
A | B | C | D | |
---|---|---|---|---|
2000-01-03 | -0.095473 | 1.490130 | 0.790182 | 0.184909 |
2000-01-04 | -0.248742 | 0.393997 | 1.307647 | 0.051067 |
2000-01-05 | -1.235977 | 1.187502 | -1.665897 | -0.355819 |
temp2 = temp.stack().to_frame().reset_index()
temp2.columns = ['date','variable','value']
temp2
date | variable | value | |
---|---|---|---|
0 | 2000-01-03 | A | -0.095473 |
1 | 2000-01-03 | B | 1.490130 |
2 | 2000-01-03 | C | 0.790182 |
3 | 2000-01-03 | D | 0.184909 |
4 | 2000-01-04 | A | -0.248742 |
5 | 2000-01-04 | B | 0.393997 |
6 | 2000-01-04 | C | 1.307647 |
7 | 2000-01-04 | D | 0.051067 |
8 | 2000-01-05 | A | -1.235977 |
9 | 2000-01-05 | B | 1.187502 |
10 | 2000-01-05 | C | -1.665897 |
11 | 2000-01-05 | D | -0.355819 |
temp2.sort_values(by='variable', inplace=True)
temp2
date | variable | value | |
---|---|---|---|
0 | 2000-01-03 | A | -0.095473 |
4 | 2000-01-04 | A | -0.248742 |
8 | 2000-01-05 | A | -1.235977 |
1 | 2000-01-03 | B | 1.490130 |
5 | 2000-01-04 | B | 0.393997 |
9 | 2000-01-05 | B | 1.187502 |
2 | 2000-01-03 | C | 0.790182 |
6 | 2000-01-04 | C | 1.307647 |
10 | 2000-01-05 | C | -1.665897 |
3 | 2000-01-03 | D | 0.184909 |
7 | 2000-01-04 | D | 0.051067 |
11 | 2000-01-05 | D | -0.355819 |
temp2.sort_values(by='variable',ascending=False)
date | variable | value | |
---|---|---|---|
3 | 2000-01-03 | D | 0.184909 |
7 | 2000-01-04 | D | 0.051067 |
11 | 2000-01-05 | D | -0.355819 |
2 | 2000-01-03 | C | 0.790182 |
6 | 2000-01-04 | C | 1.307647 |
10 | 2000-01-05 | C | -1.665897 |
1 | 2000-01-03 | B | 1.490130 |
5 | 2000-01-04 | B | 0.393997 |
9 | 2000-01-05 | B | 1.187502 |
0 | 2000-01-03 | A | -0.095473 |
4 | 2000-01-04 | A | -0.248742 |
8 | 2000-01-05 | A | -1.235977 |
temp2
date | variable | value | |
---|---|---|---|
0 | 2000-01-03 | A | -0.095473 |
4 | 2000-01-04 | A | -0.248742 |
8 | 2000-01-05 | A | -1.235977 |
1 | 2000-01-03 | B | 1.490130 |
5 | 2000-01-04 | B | 0.393997 |
9 | 2000-01-05 | B | 1.187502 |
2 | 2000-01-03 | C | 0.790182 |
6 | 2000-01-04 | C | 1.307647 |
10 | 2000-01-05 | C | -1.665897 |
3 | 2000-01-03 | D | 0.184909 |
7 | 2000-01-04 | D | 0.051067 |
11 | 2000-01-05 | D | -0.355819 |
temp2.duplicated('variable')
0 False 4 True 8 True 1 False 5 True 9 True 2 False 6 True 10 True 3 False 7 True 11 True dtype: bool
temp2.drop_duplicates('variable')
date | variable | value | |
---|---|---|---|
0 | 2000-01-03 | A | -0.095473 |
1 | 2000-01-03 | B | 1.490130 |
2 | 2000-01-03 | C | 0.790182 |
3 | 2000-01-03 | D | 0.184909 |
temp2.drop_duplicates('variable',keep='last')
date | variable | value | |
---|---|---|---|
8 | 2000-01-05 | A | -1.235977 |
9 | 2000-01-05 | B | 1.187502 |
10 | 2000-01-05 | C | -1.665897 |
11 | 2000-01-05 | D | -0.355819 |
temp = tm.makeDataFrame()
temp['A'] = np.random.randint(0, 100, size=temp.shape[0])
temp.drop(['B','C','D'],axis=1, inplace=True)
temp.columns = ['score']
temp
score | |
---|---|
ocSZYYUkHs | 53 |
gTKl4LEzN5 | 55 |
E3WFBpPnP1 | 64 |
l7t7hYzIjN | 11 |
5MyJDWBGFj | 98 |
V9PGZpnzu1 | 25 |
HHopAhwz1P | 20 |
H8aIGxvWJP | 33 |
yAE3gfP5fn | 56 |
D3SfsS2ddm | 15 |
EjwMd3huy1 | 97 |
SEsr3gTI6C | 84 |
StlWk4TtWt | 22 |
lJ99Pqv1zg | 49 |
Eeoc8MbIok | 91 |
pkJofTFlsG | 89 |
kpbtUkTTTy | 72 |
6K6nnrZZAz | 0 |
pZyED2L9lP | 7 |
5CF9jssPwu | 42 |
5jiQoqwnOp | 86 |
wFwwHYb4x0 | 22 |
humbKxNMFR | 73 |
oC1e0WXbvX | 12 |
PumV2ro6sp | 20 |
KZu0LmKlTN | 16 |
2JPkKXlSWj | 99 |
JEcnqKLoo0 | 48 |
BDMmStdXla | 91 |
jeoVnXIIgq | 11 |
?pd.cut
Signature: pd.cut( x, bins, right: 'bool' = True, labels=None, retbins: 'bool' = False, precision: 'int' = 3, include_lowest: 'bool' = False, duplicates: 'str' = 'raise', ordered: 'bool' = True, ) Docstring: Bin values into discrete intervals. Use `cut` when you need to segment and sort data values into bins. This function is also useful for going from a continuous variable to a categorical variable. For example, `cut` could convert ages to groups of age ranges. Supports binning into an equal number of bins, or a pre-specified array of bins. Parameters ---------- x : array-like The input array to be binned. Must be 1-dimensional. bins : int, sequence of scalars, or IntervalIndex The criteria to bin by. * int : Defines the number of equal-width bins in the range of `x`. The range of `x` is extended by .1% on each side to include the minimum and maximum values of `x`. * sequence of scalars : Defines the bin edges allowing for non-uniform width. No extension of the range of `x` is done. * IntervalIndex : Defines the exact bins to be used. Note that IntervalIndex for `bins` must be non-overlapping. right : bool, default True Indicates whether `bins` includes the rightmost edge or not. If ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]`` indicate (1,2], (2,3], (3,4]. This argument is ignored when `bins` is an IntervalIndex. labels : array or False, default None Specifies the labels for the returned bins. Must be the same length as the resulting bins. If False, returns only integer indicators of the bins. This affects the type of the output container (see below). This argument is ignored when `bins` is an IntervalIndex. If True, raises an error. When `ordered=False`, labels must be provided. retbins : bool, default False Whether to return the bins or not. Useful when bins is provided as a scalar. precision : int, default 3 The precision at which to store and display the bins labels. include_lowest : bool, default False Whether the first interval should be left-inclusive or not. duplicates : {default 'raise', 'drop'}, optional If bin edges are not unique, raise ValueError or drop non-uniques. ordered : bool, default True Whether the labels are ordered or not. Applies to returned types Categorical and Series (with Categorical dtype). If True, the resulting categorical will be ordered. If False, the resulting categorical will be unordered (labels must be provided). .. versionadded:: 1.1.0 Returns ------- out : Categorical, Series, or ndarray An array-like object representing the respective bin for each value of `x`. The type depends on the value of `labels`. * None (default) : returns a Series for Series `x` or a Categorical for all other inputs. The values stored within are Interval dtype. * sequence of scalars : returns a Series for Series `x` or a Categorical for all other inputs. The values stored within are whatever the type in the sequence is. * False : returns an ndarray of integers. bins : numpy.ndarray or IntervalIndex. The computed or specified bins. Only returned when `retbins=True`. For scalar or sequence `bins`, this is an ndarray with the computed bins. If set `duplicates=drop`, `bins` will drop non-unique bin. For an IntervalIndex `bins`, this is equal to `bins`. See Also -------- qcut : Discretize variable into equal-sized buckets based on rank or based on sample quantiles. Categorical : Array type for storing data that come from a fixed set of values. Series : One-dimensional array with axis labels (including time series). IntervalIndex : Immutable Index implementing an ordered, sliceable set. Notes ----- Any NA values will be NA in the result. Out of bounds values will be NA in the resulting Series or Categorical object. Reference :ref:`the user guide <reshaping.tile.cut>` for more examples. Examples -------- Discretize into three equal-sized bins. >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3) ... # doctest: +ELLIPSIS [(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ... >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True) ... # doctest: +ELLIPSIS ([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ... array([0.994, 3. , 5. , 7. ])) Discovers the same bins, but assign them specific labels. Notice that the returned Categorical's categories are `labels` and is ordered. >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), ... 3, labels=["bad", "medium", "good"]) ['bad', 'good', 'medium', 'medium', 'good', 'bad'] Categories (3, object): ['bad' < 'medium' < 'good'] ``ordered=False`` will result in unordered categories when labels are passed. This parameter can be used to allow non-unique labels: >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, ... labels=["B", "A", "B"], ordered=False) ['B', 'B', 'A', 'A', 'B', 'B'] Categories (2, object): ['A', 'B'] ``labels=False`` implies you just want the bins back. >>> pd.cut([0, 1, 1, 2], bins=4, labels=False) array([0, 1, 1, 3]) Passing a Series as an input returns a Series with categorical dtype: >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), ... index=['a', 'b', 'c', 'd', 'e']) >>> pd.cut(s, 3) ... # doctest: +ELLIPSIS a (1.992, 4.667] b (1.992, 4.667] c (4.667, 7.333] d (7.333, 10.0] e (7.333, 10.0] dtype: category Categories (3, interval[float64, right]): [(1.992, 4.667] < (4.667, ... Passing a Series as an input returns a Series with mapping value. It is used to map numerically to intervals based on bins. >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), ... index=['a', 'b', 'c', 'd', 'e']) >>> pd.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False) ... # doctest: +ELLIPSIS (a 1.0 b 2.0 c 3.0 d 4.0 e NaN dtype: float64, array([ 0, 2, 4, 6, 8, 10])) Use `drop` optional when bins is not unique >>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True, ... right=False, duplicates='drop') ... # doctest: +ELLIPSIS (a 1.0 b 2.0 c 3.0 d 3.0 e NaN dtype: float64, array([ 0, 2, 4, 6, 10])) Passing an IntervalIndex for `bins` results in those categories exactly. Notice that values not covered by the IntervalIndex are set to NaN. 0 is to the left of the first bin (which is closed on the right), and 1.5 falls between two bins. >>> bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)]) >>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins) [NaN, (0.0, 1.0], NaN, (2.0, 3.0], (4.0, 5.0]] Categories (3, interval[int64, right]): [(0, 1] < (2, 3] < (4, 5]] File: ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/reshape/tile.py Type: function
temp['score'][3] = 0
bins = [0, 25, 50, 75, 100]
pd.cut(temp['score'], bins=bins, labels=['D','C','B','A'])
ocSZYYUkHs B gTKl4LEzN5 B E3WFBpPnP1 B l7t7hYzIjN NaN 5MyJDWBGFj A V9PGZpnzu1 D HHopAhwz1P D H8aIGxvWJP C yAE3gfP5fn B D3SfsS2ddm D EjwMd3huy1 A SEsr3gTI6C A StlWk4TtWt D lJ99Pqv1zg C Eeoc8MbIok A pkJofTFlsG A kpbtUkTTTy B 6K6nnrZZAz NaN pZyED2L9lP D 5CF9jssPwu C 5jiQoqwnOp A wFwwHYb4x0 D humbKxNMFR B oC1e0WXbvX D PumV2ro6sp D KZu0LmKlTN D 2JPkKXlSWj A JEcnqKLoo0 C BDMmStdXla A jeoVnXIIgq D Name: score, dtype: category Categories (4, object): ['D' < 'C' < 'B' < 'A']
bins = [0, 25, 50, 75, 100]
pd.cut(temp['score'], bins=bins, labels=['D','C','B','A'],include_lowest=True)
ocSZYYUkHs B gTKl4LEzN5 B E3WFBpPnP1 B l7t7hYzIjN D 5MyJDWBGFj A V9PGZpnzu1 D HHopAhwz1P D H8aIGxvWJP C yAE3gfP5fn B D3SfsS2ddm D EjwMd3huy1 A SEsr3gTI6C A StlWk4TtWt D lJ99Pqv1zg C Eeoc8MbIok A pkJofTFlsG A kpbtUkTTTy B 6K6nnrZZAz D pZyED2L9lP D 5CF9jssPwu C 5jiQoqwnOp A wFwwHYb4x0 D humbKxNMFR B oC1e0WXbvX D PumV2ro6sp D KZu0LmKlTN D 2JPkKXlSWj A JEcnqKLoo0 C BDMmStdXla A jeoVnXIIgq D Name: score, dtype: category Categories (4, object): ['D' < 'C' < 'B' < 'A']
Useful resources: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
df_pingan
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1700 | 000001.XSHE | 平安银行 | XSHE | 2021-12-27 | 2224.866 | 2213.298 | 881462 | 1.260455e+09 | 66911.0 | 0.0038 | 3.341671e+11 | 3.341699e+11 |
1701 | 000001.XSHE | 平安银行 | XSHE | 2021-12-28 | 2213.298 | 2206.872 | 1358314 | 1.934461e+09 | 89671.0 | 0.0058 | 3.331968e+11 | 3.331996e+11 |
1702 | 000001.XSHE | 平安银行 | XSHE | 2021-12-29 | 2206.872 | 2152.889 | 1771527 | 2.480535e+09 | 147243.0 | 0.0076 | 3.250464e+11 | 3.250491e+11 |
1703 | 000001.XSHE | 平安银行 | XSHE | 2021-12-30 | 2152.889 | 2161.886 | 960485 | 1.342374e+09 | 66792.0 | 0.0041 | 3.264048e+11 | 3.264075e+11 |
1704 | 000001.XSHE | 平安银行 | XSHE | 2021-12-31 | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
1705 rows × 12 columns
df_pingan.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1705 entries, 0 to 1704 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 secID 1705 non-null object 1 secShortName 1705 non-null object 2 exchangeCD 1705 non-null object 3 tradeDate 1705 non-null object 4 preClosePrice 1705 non-null float64 5 closePrice 1705 non-null float64 6 turnoverVol 1705 non-null int64 7 turnoverValue 1705 non-null float64 8 dealAmount 1705 non-null float64 9 turnoverRate 1705 non-null float64 10 negMarketValue 1705 non-null float64 11 marketValue 1705 non-null float64 dtypes: float64(7), int64(1), object(4) memory usage: 173.2+ KB
df_pingan['tradeDate'] = pd.to_datetime(df_pingan['tradeDate'], format='%Y-%m-%d')
df_pingan.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1705 entries, 0 to 1704 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 secID 1705 non-null object 1 secShortName 1705 non-null object 2 exchangeCD 1705 non-null object 3 tradeDate 1705 non-null datetime64[ns] 4 preClosePrice 1705 non-null float64 5 closePrice 1705 non-null float64 6 turnoverVol 1705 non-null int64 7 turnoverValue 1705 non-null float64 8 dealAmount 1705 non-null float64 9 turnoverRate 1705 non-null float64 10 negMarketValue 1705 non-null float64 11 marketValue 1705 non-null float64 dtypes: datetime64[ns](1), float64(7), int64(1), object(3) memory usage: 173.2+ KB
df_pingan.set_index('tradeDate', inplace=True)
df_pingan
secID | secShortName | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
tradeDate | |||||||||||
2015-01-05 | 000001.XSHE | 平安银行 | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | 平安银行 | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2015-01-07 | 000001.XSHE | 平安银行 | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
2015-01-08 | 000001.XSHE | 平安银行 | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
2015-01-09 | 000001.XSHE | 平安银行 | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-12-27 | 000001.XSHE | 平安银行 | XSHE | 2224.866 | 2213.298 | 881462 | 1.260455e+09 | 66911.0 | 0.0038 | 3.341671e+11 | 3.341699e+11 |
2021-12-28 | 000001.XSHE | 平安银行 | XSHE | 2213.298 | 2206.872 | 1358314 | 1.934461e+09 | 89671.0 | 0.0058 | 3.331968e+11 | 3.331996e+11 |
2021-12-29 | 000001.XSHE | 平安银行 | XSHE | 2206.872 | 2152.889 | 1771527 | 2.480535e+09 | 147243.0 | 0.0076 | 3.250464e+11 | 3.250491e+11 |
2021-12-30 | 000001.XSHE | 平安银行 | XSHE | 2152.889 | 2161.886 | 960485 | 1.342374e+09 | 66792.0 | 0.0041 | 3.264048e+11 | 3.264075e+11 |
2021-12-31 | 000001.XSHE | 平安银行 | XSHE | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
1705 rows × 11 columns
BM: Business Month
df_pingan.resample('BM').last()
secID | secShortName | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
tradeDate | |||||||||||
2015-01-30 | 000001.XSHE | 平安银行 | XSHE | 1134.678 | 1137.127 | 1614788 | 1.298736e+09 | 42061.0 | 0.0095 | 1.370254e+11 | 1.591488e+11 |
2015-02-27 | 000001.XSHE | 平安银行 | XSHE | 1148.556 | 1142.025 | 1377727 | 1.115146e+09 | 36250.0 | 0.0081 | 1.376156e+11 | 1.598343e+11 |
2015-03-31 | 000001.XSHE | 平安银行 | XSHE | 1278.350 | 1285.697 | 4745114 | 4.393135e+09 | 114032.0 | 0.0278 | 1.549282e+11 | 1.799421e+11 |
2015-04-30 | 000001.XSHE | 平安银行 | XSHE | 1645.458 | 1650.399 | 4182058 | 4.899380e+09 | 120845.0 | 0.0245 | 1.971277e+11 | 2.289549e+11 |
2015-05-29 | 000001.XSHE | 平安银行 | XSHE | 1531.808 | 1514.019 | 2915126 | 3.103544e+09 | 99260.0 | 0.0171 | 1.808381e+11 | 2.192089e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-08-31 | 000001.XSHE | 平安银行 | XSHE | 2277.563 | 2287.846 | 1416461 | 2.072007e+09 | 119791.0 | 0.0061 | 3.454224e+11 | 3.454253e+11 |
2021-09-30 | 000001.XSHE | 平安银行 | XSHE | 2332.832 | 2304.555 | 959216 | 1.424676e+09 | 70140.0 | 0.0041 | 3.479452e+11 | 3.479481e+11 |
2021-10-29 | 000001.XSHE | 平安银行 | XSHE | 2534.625 | 2506.348 | 650958 | 1.052760e+09 | 76331.0 | 0.0028 | 3.784122e+11 | 3.784154e+11 |
2021-11-30 | 000001.XSHE | 平安银行 | XSHE | 2250.572 | 2241.575 | 884472 | 1.280385e+09 | 65260.0 | 0.0038 | 3.384364e+11 | 3.384392e+11 |
2021-12-31 | 000001.XSHE | 平安银行 | XSHE | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
84 rows × 11 columns
df_pingan.resample('M').mean(numeric_only=True)
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
tradeDate | ||||||||
2015-01-31 | 1204.922300 | 1197.126450 | 2.685951e+06 | 2.293232e+09 | 64189.250000 | 0.015725 | 1.442554e+11 | 1.675461e+11 |
2015-02-28 | 1127.222600 | 1127.549133 | 1.506542e+06 | 1.202904e+09 | 37079.066667 | 0.008827 | 1.358712e+11 | 1.578083e+11 |
2015-03-31 | 1193.861227 | 1200.391773 | 2.966580e+06 | 2.552488e+09 | 75256.318182 | 0.017386 | 1.446489e+11 | 1.680031e+11 |
2015-04-30 | 1543.791762 | 1561.158524 | 4.287529e+06 | 4.712147e+09 | 138826.190476 | 0.025114 | 1.869631e+11 | 2.171492e+11 |
2015-05-31 | 1577.762100 | 1570.943100 | 2.897439e+06 | 3.198760e+09 | 96879.850000 | 0.016965 | 1.876373e+11 | 2.213028e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-08-31 | 2440.388364 | 2441.031045 | 1.143970e+06 | 1.800585e+09 | 96686.318182 | 0.004891 | 3.685506e+11 | 3.685537e+11 |
2021-09-30 | 2390.798950 | 2391.634400 | 1.659547e+06 | 2.550127e+09 | 126351.950000 | 0.007095 | 3.610926e+11 | 3.610956e+11 |
2021-10-31 | 2504.098750 | 2516.710812 | 1.081450e+06 | 1.752654e+09 | 97972.312500 | 0.004625 | 3.799768e+11 | 3.799800e+11 |
2021-11-30 | 2318.985409 | 2306.950273 | 1.188170e+06 | 1.768122e+09 | 94558.681818 | 0.005073 | 3.483068e+11 | 3.483098e+11 |
2021-12-31 | 2270.019130 | 2264.654348 | 1.342157e+06 | 1.968544e+09 | 96840.304348 | 0.005735 | 3.419210e+11 | 3.419238e+11 |
84 rows × 8 columns
df_pingan.loc['2015']
secID | secShortName | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
tradeDate | |||||||||||
2015-01-05 | 000001.XSHE | 平安银行 | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | 平安银行 | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2015-01-07 | 000001.XSHE | 平安银行 | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
2015-01-08 | 000001.XSHE | 平安银行 | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
2015-01-09 | 000001.XSHE | 平安银行 | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2015-12-25 | 000001.XSHE | 平安银行 | XSHE | 1219.517 | 1226.435 | 578480 | 4.956381e+08 | 19587.0 | 0.0034 | 1.464883e+11 | 1.775707e+11 |
2015-12-28 | 000001.XSHE | 平安银行 | XSHE | 1226.435 | 1183.939 | 1189828 | 1.003010e+09 | 33043.0 | 0.0070 | 1.414126e+11 | 1.714179e+11 |
2015-12-29 | 000001.XSHE | 平安银行 | XSHE | 1183.939 | 1194.810 | 896705 | 7.447184e+08 | 20170.0 | 0.0053 | 1.427110e+11 | 1.729919e+11 |
2015-12-30 | 000001.XSHE | 平安银行 | XSHE | 1194.810 | 1195.798 | 770642 | 6.412773e+08 | 18064.0 | 0.0045 | 1.428291e+11 | 1.731350e+11 |
2015-12-31 | 000001.XSHE | 平安银行 | XSHE | 1195.798 | 1184.928 | 710734 | 5.916436e+08 | 14765.0 | 0.0042 | 1.415306e+11 | 1.715610e+11 |
244 rows × 11 columns
df_pingan.loc['2015-01':'2018-03']
secID | secShortName | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
tradeDate | |||||||||||
2015-01-05 | 000001.XSHE | 平安银行 | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | 平安银行 | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2015-01-07 | 000001.XSHE | 平安银行 | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
2015-01-08 | 000001.XSHE | 平安银行 | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
2015-01-09 | 000001.XSHE | 平安银行 | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2018-03-26 | 000001.XSHE | 平安银行 | XSHE | 1384.775 | 1334.708 | 1668113 | 1.519435e+09 | 63646.0 | 0.0082 | 1.849136e+11 | 1.876726e+11 |
2018-03-27 | 000001.XSHE | 平安银行 | XSHE | 1334.708 | 1335.929 | 1330940 | 1.213990e+09 | 49311.0 | 0.0065 | 1.850827e+11 | 1.878443e+11 |
2018-03-28 | 000001.XSHE | 平安银行 | XSHE | 1335.929 | 1329.824 | 1325020 | 1.201023e+09 | 45201.0 | 0.0065 | 1.842368e+11 | 1.869858e+11 |
2018-03-29 | 000001.XSHE | 平安银行 | XSHE | 1329.824 | 1349.362 | 1604219 | 1.445282e+09 | 61870.0 | 0.0079 | 1.869437e+11 | 1.897330e+11 |
2018-03-30 | 000001.XSHE | 平安银行 | XSHE | 1349.362 | 1331.045 | 906846 | 8.234382e+08 | 31869.0 | 0.0044 | 1.844060e+11 | 1.871575e+11 |
791 rows × 11 columns
注意:pandas中的apply操作有时会很慢(包括groupby apply)
df
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-27 | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-28 | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-29 | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-30 | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-31 | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 12 columns
df.select_dtypes(np.number).apply(lambda x: x**2+3)
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
0 | 1.671966e+06 | 1.710179e+06 | 24661553281603 | 2.084277e+19 | 8.552180e+09 | 3.000847 | 2.483276e+22 | 3.349881e+22 |
1 | 1.710179e+06 | 1.659323e+06 | 14146264367107 | 1.192629e+19 | 6.452106e+09 | 3.000484 | 2.409428e+22 | 3.250262e+22 |
2 | 1.659323e+06 | 1.596829e+06 | 8711948463204 | 6.942152e+18 | 5.284854e+09 | 3.000299 | 2.318685e+22 | 3.127853e+22 |
3 | 1.596829e+06 | 1.491352e+06 | 5972896490404 | 4.528399e+18 | 4.724363e+09 | 3.000204 | 2.165525e+22 | 2.921242e+22 |
4 | 1.491352e+06 | 1.515374e+06 | 18966364691524 | 1.471013e+19 | 9.976414e+09 | 3.000650 | 2.200405e+22 | 2.968295e+22 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 3.390625e+00 | 3.407044e+00 | 238642997124 | 9.495889e+10 | 5.107900e+04 | 3.000007 | 1.356573e+16 | 4.880432e+16 |
5838746 | 3.407044e+00 | 3.405769e+00 | 31578000807 | 1.251736e+10 | 4.099000e+03 | 3.000001 | 1.352290e+16 | 4.865024e+16 |
5838747 | 3.405769e+00 | 3.396900e+00 | 15264602503 | 5.980393e+09 | 3.367000e+03 | 3.000000 | 1.322500e+16 | 4.757852e+16 |
5838748 | 3.396900e+00 | 3.403225e+00 | 12904960003 | 5.084831e+09 | 1.684000e+03 | 3.000000 | 1.343745e+16 | 4.834282e+16 |
5838749 | 3.403225e+00 | 3.404496e+00 | 28156840003 | 1.123515e+10 | 5.479000e+03 | 3.000001 | 1.348014e+16 | 4.849641e+16 |
5838750 rows × 8 columns
df.groupby('secShortName')
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f8c8674a3a0>
df.groupby('secShortName').last()
secID | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | |||||||||||
*ST万方 | 000638.XSHE | XSHE | 2021-12-31 | 21.428 | 21.560 | 550679 | 1.185508e+07 | 794.0 | 0.0079 | 1.509872e+09 | 1.509872e+09 |
*ST上普 | 600680.XSHG | XSHG | 2018-04-27 | 28.879 | 30.133 | 1272828 | 3.498398e+07 | 2843.0 | 0.0180 | 1.979601e+09 | 2.939313e+09 |
*ST东海A | 000613.XSHE | XSHE | 2021-12-31 | 5.678 | 5.836 | 3854756 | 2.219116e+07 | 1384.0 | 0.0207 | 1.088870e+09 | 1.485528e+09 |
*ST东海B | 200613.XSHE | XSHE | 2021-12-31 | 1.757 | 1.757 | 33909 | 6.009500e+04 | 13.0 | 0.0004 | 1.408000e+08 | 5.825600e+08 |
*ST东电 | 000585.XSHE | XSHE | 2021-12-31 | 2.617 | 2.617 | 5993090 | 1.543082e+07 | 1268.0 | 0.0123 | 1.249313e+09 | 1.790408e+09 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
龙竹科技 | 831445.XBEI | XBEI | 2021-12-31 | 20.824 | 21.098 | 456508 | 8.807597e+06 | NaN | 0.0068 | 1.297350e+09 | 2.217318e+09 |
龙腾光电 | 688055.XSHG | XSHG | 2021-12-31 | 6.808 | 6.858 | 4664701 | 3.186770e+07 | 3830.0 | 0.0026 | 1.223867e+10 | 2.283333e+10 |
龙蟠科技 | 603906.XSHG | XSHG | 2021-12-31 | 77.083 | 77.208 | 3700591 | 2.766969e+08 | 20421.0 | 0.0155 | 1.785667e+10 | 1.785667e+10 |
龙软科技 | 688078.XSHG | XSHG | 2021-12-31 | 62.467 | 59.057 | 963837 | 5.722752e+07 | 2401.0 | 0.0339 | 1.657041e+09 | 4.117650e+09 |
龙高股份 | 605086.XSHG | XSHG | 2021-12-31 | 23.830 | 23.620 | 833004 | 1.976806e+07 | 1403.0 | 0.0260 | 7.558400e+08 | 3.023360e+09 |
4853 rows × 11 columns
df.groupby('secShortName').first()
secID | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | |||||||||||
*ST万方 | 000638.XSHE | XSHE | 2015-01-05 | 28.364 | 27.304 | 3103725 | 8.408389e+07 | 4279.0 | 0.0443 | 1.912092e+09 | 1.912092e+09 |
*ST上普 | 600680.XSHG | XSHG | 2015-01-05 | 57.366 | 57.562 | 483414 | 2.566049e+07 | 1787.0 | 0.0069 | 3.781578e+09 | 5.614890e+09 |
*ST东海A | 000613.XSHE | XSHE | 2015-01-05 | 9.769 | 9.783 | 2242078 | 2.191941e+07 | 1159.0 | 0.0123 | 1.782263e+09 | 2.490444e+09 |
*ST东海B | 200613.XSHE | XSHE | 2015-01-05 | 6.642 | 6.576 | 164636 | 1.085171e+06 | 50.0 | 0.0021 | 5.271200e+08 | 2.180959e+09 |
*ST东电 | 000585.XSHE | XSHE | 2015-01-05 | 5.183 | 5.272 | 9358955 | 4.800403e+07 | 4519.0 | 0.0192 | 2.516909e+09 | 3.607018e+09 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
龙竹科技 | 831445.XBEI | XBEI | 2020-07-27 | 9.180 | 7.840 | 4804739 | 4.053933e+07 | NaN | 0.0730 | 5.157141e+08 | 8.976079e+08 |
龙腾光电 | 688055.XSHG | XSHG | 2020-08-17 | 1.220 | 9.850 | 215585877 | 1.892487e+09 | 211520.0 | 0.8003 | 2.653273e+09 | 3.283333e+10 |
龙蟠科技 | 603906.XSHG | XSHG | 2017-04-10 | 9.520 | 13.710 | 11500 | 1.576650e+05 | 15.0 | 0.0002 | 7.129200e+08 | 2.851680e+09 |
龙软科技 | 688078.XSHG | XSHG | 2019-12-30 | 21.590 | 48.900 | 12454266 | 6.069590e+08 | 28417.0 | 0.7736 | 7.872460e+08 | 3.459675e+09 |
龙高股份 | 605086.XSHG | XSHG | 2021-04-16 | 12.860 | 18.520 | 226150 | 4.186135e+06 | 345.0 | 0.0071 | 5.926400e+08 | 2.370560e+09 |
4853 rows × 11 columns
df.groupby('secShortName').head(3)
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
1705 | 000002.XSHE | 万科A | XSHE | 2015-01-05 | 1567.420 | 1681.312 | 11835160 | 9.700712e+09 | 185938.0 | 0.0676 | 1.447266e+11 | 1.645692e+11 |
1706 | 000002.XSHE | 万科A | XSHE | 2015-01-06 | 1681.312 | 1619.292 | 6036510 | 4.839616e+09 | 123274.0 | 0.0345 | 1.393879e+11 | 1.584986e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5835695 | 900956.XSHG | 东贝B股 | XSHG | 2015-01-06 | 1.159 | 1.158 | 438800 | 4.708660e+05 | 265.0 | 0.0038 | 1.242000e+08 | 2.538000e+08 |
5835696 | 900956.XSHG | 东贝B股 | XSHG | 2015-01-07 | 1.158 | 1.159 | 429200 | 4.600200e+05 | 236.0 | 0.0037 | 1.243150e+08 | 2.540350e+08 |
5837045 | 900957.XSHG | 凌云B股 | XSHG | 2015-01-05 | 0.620 | 0.628 | 615555 | 3.823390e+05 | 268.0 | 0.0033 | 1.146320e+08 | 2.174270e+08 |
5837046 | 900957.XSHG | 凌云B股 | XSHG | 2015-01-06 | 0.628 | 0.623 | 459481 | 2.838860e+05 | 194.0 | 0.0025 | 1.137120e+08 | 2.156820e+08 |
5837047 | 900957.XSHG | 凌云B股 | XSHG | 2015-01-07 | 0.623 | 0.631 | 703095 | 4.412970e+05 | 324.0 | 0.0038 | 1.151840e+08 | 2.184740e+08 |
14554 rows × 12 columns
df.groupby('secShortName').nth(-2)
secID | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | |||||||||||
*ST万方 | 000638.XSHE | XSHE | 2021-12-30 | 21.163 | 21.428 | 793779 | 1.689394e+07 | 1009.0 | 0.0113 | 1.500590e+09 | 1.500590e+09 |
*ST上普 | 600680.XSHG | XSHG | 2018-04-26 | 28.800 | 28.879 | 867415 | 2.348474e+07 | 1907.0 | 0.0123 | 1.897224e+09 | 2.817000e+09 |
*ST东海A | 000613.XSHE | XSHE | 2021-12-30 | 5.521 | 5.678 | 2735594 | 1.541656e+07 | 1158.0 | 0.0147 | 1.059513e+09 | 1.445477e+09 |
*ST东海B | 200613.XSHE | XSHE | 2021-12-30 | 1.757 | 1.757 | 261068 | 4.593800e+05 | 10.0 | 0.0033 | 1.408000e+08 | 5.825600e+08 |
*ST东电 | 000585.XSHE | XSHE | 2021-12-30 | 2.604 | 2.617 | 2416167 | 6.230545e+06 | 678.0 | 0.0050 | 1.249313e+09 | 1.790408e+09 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
龙竹科技 | 831445.XBEI | XBEI | 2021-12-30 | 20.167 | 20.824 | 561059 | 1.063724e+07 | NaN | 0.0083 | 1.280519e+09 | 2.188552e+09 |
龙腾光电 | 688055.XSHG | XSHG | 2021-12-30 | 6.768 | 6.808 | 4752067 | 3.232808e+07 | 3395.0 | 0.0027 | 1.214933e+10 | 2.266667e+10 |
龙蟠科技 | 603906.XSHG | XSHG | 2021-12-30 | 79.167 | 77.083 | 5338171 | 4.007450e+08 | 31920.0 | 0.0223 | 1.782774e+10 | 1.782774e+10 |
龙软科技 | 688078.XSHG | XSHG | 2021-12-30 | 61.980 | 62.467 | 387495 | 2.392429e+07 | 1454.0 | 0.0136 | 1.752706e+09 | 4.355370e+09 |
龙高股份 | 605086.XSHG | XSHG | 2021-12-30 | 23.510 | 23.830 | 1185853 | 2.812384e+07 | 2482.0 | 0.0371 | 7.625600e+08 | 3.050240e+09 |
4852 rows × 11 columns
arrays = [
np.array(["bar", "bar", 'bar', "baz", "baz", "baz", "foo", "foo", "foo", "qux", "qux", "qux"]),
np.array([1,2,3, 1,2,3, 1,2,3, 1,2,3]),
]
temp = pd.DataFrame(np.random.randn(12, 4), index=arrays)
temp
0 | 1 | 2 | 3 | ||
---|---|---|---|---|---|
bar | 1 | -0.006910 | -0.495349 | 1.583263 | 0.679016 |
2 | -0.264934 | 0.034949 | 1.936711 | 0.711140 | |
3 | -1.161140 | -0.364241 | 0.874213 | -0.482764 | |
baz | 1 | -1.672136 | 0.990049 | 0.068838 | -0.474269 |
2 | 0.007340 | 0.476043 | 0.561914 | -0.520332 | |
3 | -1.266026 | 0.144909 | 2.426870 | -0.361181 | |
foo | 1 | 0.272596 | -1.057101 | 1.400982 | 1.160019 |
2 | -0.912277 | 0.609319 | -0.361195 | -0.668084 | |
3 | 0.214207 | -0.023626 | 1.501661 | -0.926852 | |
qux | 1 | -2.123011 | 0.508700 | -1.222445 | -0.634993 |
2 | 0.826994 | 1.076484 | 0.884695 | -0.682403 | |
3 | 0.015557 | -0.419355 | -1.725047 | 0.314541 |
temp.loc[('bar',2):('foo',3)]
0 | 1 | 2 | 3 | ||
---|---|---|---|---|---|
bar | 2 | -0.264934 | 0.034949 | 1.936711 | 0.711140 |
3 | -1.161140 | -0.364241 | 0.874213 | -0.482764 | |
baz | 1 | -1.672136 | 0.990049 | 0.068838 | -0.474269 |
2 | 0.007340 | 0.476043 | 0.561914 | -0.520332 | |
3 | -1.266026 | 0.144909 | 2.426870 | -0.361181 | |
foo | 1 | 0.272596 | -1.057101 | 1.400982 | 1.160019 |
2 | -0.912277 | 0.609319 | -0.361195 | -0.668084 | |
3 | 0.214207 | -0.023626 | 1.501661 | -0.926852 |
df['tradeDate'] = pd.to_datetime(df['tradeDate'], format='%Y-%m-%d')
df.set_index(['secShortName','tradeDate'])
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | ||
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | tradeDate | ||||||||||
平安银行 | 2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 | |
2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 | |
2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 | |
2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
凌云B股 | 2021-12-27 | 900957.XSHG | XSHG | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
2021-12-28 | 900957.XSHG | XSHG | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 | |
2021-12-29 | 900957.XSHG | XSHG | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 | |
2021-12-30 | 900957.XSHG | XSHG | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 | |
2021-12-31 | 900957.XSHG | XSHG | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 10 columns
df.set_index(['secShortName','tradeDate'],inplace=True)
df
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | ||
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | tradeDate | ||||||||||
平安银行 | 2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 | |
2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 | |
2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 | |
2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
凌云B股 | 2021-12-27 | 900957.XSHG | XSHG | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
2021-12-28 | 900957.XSHG | XSHG | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 | |
2021-12-29 | 900957.XSHG | XSHG | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 | |
2021-12-30 | 900957.XSHG | XSHG | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 | |
2021-12-31 | 900957.XSHG | XSHG | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 10 columns
df.groupby(['secShortName','tradeDate']).last()
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | ||
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | tradeDate | ||||||||||
*ST万方 | 2015-01-05 | 000638.XSHE | XSHE | 28.364 | 27.304 | 3103725 | 84083888.60 | 4279.0 | 0.0443 | 1.912092e+09 | 1.912092e+09 |
2015-01-06 | 000638.XSHE | XSHE | 27.304 | 27.039 | 1926608 | 51665933.96 | 3060.0 | 0.0275 | 1.893528e+09 | 1.893528e+09 | |
2015-01-07 | 000638.XSHE | XSHE | 27.039 | 26.995 | 1295798 | 35013368.40 | 1919.0 | 0.0185 | 1.890434e+09 | 1.890434e+09 | |
2015-01-08 | 000638.XSHE | XSHE | 26.995 | 27.702 | 2224507 | 61274527.17 | 3560.0 | 0.0318 | 1.939938e+09 | 1.939938e+09 | |
2015-01-09 | 000638.XSHE | XSHE | 27.702 | 27.348 | 1053730 | 29061439.79 | 1860.0 | 0.0151 | 1.915186e+09 | 1.915186e+09 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
龙高股份 | 2021-12-27 | 605086.XSHG | XSHG | 23.320 | 23.340 | 711496 | 16590677.00 | 1586.0 | 0.0222 | 7.468800e+08 | 2.987520e+09 |
2021-12-28 | 605086.XSHG | XSHG | 23.340 | 23.540 | 819496 | 19232165.00 | 1664.0 | 0.0256 | 7.532800e+08 | 3.013120e+09 | |
2021-12-29 | 605086.XSHG | XSHG | 23.540 | 23.510 | 656443 | 15410019.00 | 1646.0 | 0.0205 | 7.523200e+08 | 3.009280e+09 | |
2021-12-30 | 605086.XSHG | XSHG | 23.510 | 23.830 | 1185853 | 28123837.00 | 2482.0 | 0.0371 | 7.625600e+08 | 3.050240e+09 | |
2021-12-31 | 605086.XSHG | XSHG | 23.830 | 23.620 | 833004 | 19768062.00 | 1403.0 | 0.0260 | 7.558400e+08 | 3.023360e+09 |
5838680 rows × 10 columns
df.loc['平安银行','2021']
/var/folders/gh/26025ywx7w128zfds279s_9r0000gn/T/ipykernel_73497/224712730.py:1: PerformanceWarning: indexing past lexsort depth may impact performance. df.loc['平安银行','2021'] /var/folders/gh/26025ywx7w128zfds279s_9r0000gn/T/ipykernel_73497/224712730.py:1: FutureWarning: Indexing a DataFrame with a datetimelike index using a single string to slice the rows, like `frame[string]`, is deprecated and will be removed in a future version. Use `frame.loc[string]` instead. df.loc['平安银行','2021']
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|
tradeDate | ||||||||||
2021-01-04 | 000001.XSHE | XSHE | 2466.388 | 2372.018 | 1873816 | 2.891682e+09 | 119328.0 | 0.0080 | 3.609470e+11 | 3.609501e+11 |
2021-01-05 | 000001.XSHE | XSHE | 2372.018 | 2317.181 | 2195884 | 3.284607e+09 | 187364.0 | 0.0094 | 3.526025e+11 | 3.526055e+11 |
2021-01-06 | 000001.XSHE | XSHE | 2317.181 | 2494.444 | 2332835 | 3.648522e+09 | 154714.0 | 0.0100 | 3.795765e+11 | 3.795798e+11 |
2021-01-07 | 000001.XSHE | XSHE | 2494.444 | 2537.804 | 1909948 | 3.111275e+09 | 123304.0 | 0.0082 | 3.861745e+11 | 3.861778e+11 |
2021-01-08 | 000001.XSHE | XSHE | 2537.804 | 2531.427 | 1441303 | 2.348316e+09 | 95828.0 | 0.0062 | 3.852042e+11 | 3.852075e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-12-27 | 000001.XSHE | XSHE | 2224.866 | 2213.298 | 881462 | 1.260455e+09 | 66911.0 | 0.0038 | 3.341671e+11 | 3.341699e+11 |
2021-12-28 | 000001.XSHE | XSHE | 2213.298 | 2206.872 | 1358314 | 1.934461e+09 | 89671.0 | 0.0058 | 3.331968e+11 | 3.331996e+11 |
2021-12-29 | 000001.XSHE | XSHE | 2206.872 | 2152.889 | 1771527 | 2.480535e+09 | 147243.0 | 0.0076 | 3.250464e+11 | 3.250491e+11 |
2021-12-30 | 000001.XSHE | XSHE | 2152.889 | 2161.886 | 960485 | 1.342374e+09 | 66792.0 | 0.0041 | 3.264048e+11 | 3.264075e+11 |
2021-12-31 | 000001.XSHE | XSHE | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
243 rows × 10 columns
df.loc[('平安银行','2021')] # a tuple is interpreted as one multi-level key
/var/folders/gh/26025ywx7w128zfds279s_9r0000gn/T/ipykernel_73497/2348492628.py:1: PerformanceWarning: indexing past lexsort depth may impact performance. df.loc[('平安银行','2021')] # a tuple is interpreted as one multi-level key /var/folders/gh/26025ywx7w128zfds279s_9r0000gn/T/ipykernel_73497/2348492628.py:1: FutureWarning: Indexing a DataFrame with a datetimelike index using a single string to slice the rows, like `frame[string]`, is deprecated and will be removed in a future version. Use `frame.loc[string]` instead. df.loc[('平安银行','2021')] # a tuple is interpreted as one multi-level key
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|
tradeDate | ||||||||||
2021-01-04 | 000001.XSHE | XSHE | 2466.388 | 2372.018 | 1873816 | 2.891682e+09 | 119328.0 | 0.0080 | 3.609470e+11 | 3.609501e+11 |
2021-01-05 | 000001.XSHE | XSHE | 2372.018 | 2317.181 | 2195884 | 3.284607e+09 | 187364.0 | 0.0094 | 3.526025e+11 | 3.526055e+11 |
2021-01-06 | 000001.XSHE | XSHE | 2317.181 | 2494.444 | 2332835 | 3.648522e+09 | 154714.0 | 0.0100 | 3.795765e+11 | 3.795798e+11 |
2021-01-07 | 000001.XSHE | XSHE | 2494.444 | 2537.804 | 1909948 | 3.111275e+09 | 123304.0 | 0.0082 | 3.861745e+11 | 3.861778e+11 |
2021-01-08 | 000001.XSHE | XSHE | 2537.804 | 2531.427 | 1441303 | 2.348316e+09 | 95828.0 | 0.0062 | 3.852042e+11 | 3.852075e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-12-27 | 000001.XSHE | XSHE | 2224.866 | 2213.298 | 881462 | 1.260455e+09 | 66911.0 | 0.0038 | 3.341671e+11 | 3.341699e+11 |
2021-12-28 | 000001.XSHE | XSHE | 2213.298 | 2206.872 | 1358314 | 1.934461e+09 | 89671.0 | 0.0058 | 3.331968e+11 | 3.331996e+11 |
2021-12-29 | 000001.XSHE | XSHE | 2206.872 | 2152.889 | 1771527 | 2.480535e+09 | 147243.0 | 0.0076 | 3.250464e+11 | 3.250491e+11 |
2021-12-30 | 000001.XSHE | XSHE | 2152.889 | 2161.886 | 960485 | 1.342374e+09 | 66792.0 | 0.0041 | 3.264048e+11 | 3.264075e+11 |
2021-12-31 | 000001.XSHE | XSHE | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
243 rows × 10 columns
df.loc[['平安银行','2016']]# a list is used to specify several keys [on the same level]
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) Cell In[173], line 1 ----> 1 df.loc[['平安银行','2016']] File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1073, in _LocationIndexer.__getitem__(self, key) 1070 axis = self.axis or 0 1072 maybe_callable = com.apply_if_callable(key, self.obj) -> 1073 return self._getitem_axis(maybe_callable, axis=axis) File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1301, in _LocIndexer._getitem_axis(self, key, axis) 1298 if hasattr(key, "ndim") and key.ndim > 1: 1299 raise ValueError("Cannot index with multidimensional key") -> 1301 return self._getitem_iterable(key, axis=axis) 1303 # nested tuple slicing 1304 if is_nested_tuple(key, labels): File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1239, in _LocIndexer._getitem_iterable(self, key, axis) 1236 self._validate_key(key, axis) 1238 # A collection of keys -> 1239 keyarr, indexer = self._get_listlike_indexer(key, axis) 1240 return self.obj._reindex_with_indexers( 1241 {axis: [keyarr, indexer]}, copy=True, allow_dups=True 1242 ) File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1432, in _LocIndexer._get_listlike_indexer(self, key, axis) 1429 ax = self.obj._get_axis(axis) 1430 axis_name = self.obj._get_axis_name(axis) -> 1432 keyarr, indexer = ax._get_indexer_strict(key, axis_name) 1434 return keyarr, indexer File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/multi.py:2623, in MultiIndex._get_indexer_strict(self, key, axis_name) 2620 if len(keyarr) and not isinstance(keyarr[0], tuple): 2621 indexer = self._get_indexer_level_0(keyarr) -> 2623 self._raise_if_missing(key, indexer, axis_name) 2624 return self[indexer], indexer 2626 return super()._get_indexer_strict(key, axis_name) File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/multi.py:2641, in MultiIndex._raise_if_missing(self, key, indexer, axis_name) 2639 cmask = check == -1 2640 if cmask.any(): -> 2641 raise KeyError(f"{keyarr[cmask]} not in index") 2642 # We get here when levels still contain values which are not 2643 # actually in Index anymore 2644 raise KeyError(f"{keyarr} not in index") KeyError: "['2016'] not in index"
df.loc[('平安银行','2015':'2016')]
Cell In[174], line 1 df.loc[('平安银行','2015':'2016')] ^ SyntaxError: invalid syntax
df.loc[('平安银行',pd.Timestamp('2015-01-05')):('平安银行', pd.Timestamp('2015-01-28'))]
--------------------------------------------------------------------------- UnsortedIndexError Traceback (most recent call last) Cell In[175], line 1 ----> 1 df.loc[('平安银行',pd.Timestamp('2015-01-05')):('平安银行', pd.Timestamp('2015-01-28'))] File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1073, in _LocationIndexer.__getitem__(self, key) 1070 axis = self.axis or 0 1072 maybe_callable = com.apply_if_callable(key, self.obj) -> 1073 return self._getitem_axis(maybe_callable, axis=axis) File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1290, in _LocIndexer._getitem_axis(self, key, axis) 1288 if isinstance(key, slice): 1289 self._validate_key(key, axis) -> 1290 return self._get_slice_axis(key, axis=axis) 1291 elif com.is_bool_indexer(key): 1292 return self._getbool_axis(key, axis=axis) File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py:1324, in _LocIndexer._get_slice_axis(self, slice_obj, axis) 1321 return obj.copy(deep=False) 1323 labels = obj._get_axis(axis) -> 1324 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step) 1326 if isinstance(indexer, slice): 1327 return self.obj._slice(indexer, axis=axis) File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:6602, in Index.slice_indexer(self, start, end, step, kind) 6559 """ 6560 Compute the slice indexer for input labels and step. 6561 (...) 6598 slice(1, 3, None) 6599 """ 6600 self._deprecated_arg(kind, "kind", "slice_indexer") -> 6602 start_slice, end_slice = self.slice_locs(start, end, step=step) 6604 # return a slice 6605 if not is_scalar(start_slice): File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/multi.py:2775, in MultiIndex.slice_locs(self, start, end, step, kind) 2772 self._deprecated_arg(kind, "kind", "slice_locs") 2773 # This function adds nothing to its parent implementation (the magic 2774 # happens in get_slice_bound method), but it adds meaningful doc. -> 2775 return super().slice_locs(start, end, step) File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:6810, in Index.slice_locs(self, start, end, step, kind) 6808 start_slice = None 6809 if start is not None: -> 6810 start_slice = self.get_slice_bound(start, "left") 6811 if start_slice is None: 6812 start_slice = 0 File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/multi.py:2714, in MultiIndex.get_slice_bound(self, label, side, kind) 2712 if not isinstance(label, tuple): 2713 label = (label,) -> 2714 return self._partial_tup_index(label, side=side) File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/multi.py:2779, in MultiIndex._partial_tup_index(self, tup, side) 2777 def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left"): 2778 if len(tup) > self._lexsort_depth: -> 2779 raise UnsortedIndexError( 2780 f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth " 2781 f"({self._lexsort_depth})" 2782 ) 2784 n = len(tup) 2785 start, end = 0, len(self) UnsortedIndexError: 'Key length (2) was greater than MultiIndex lexsort depth (0)'
df2 = df.sort_index() # 必须先排序才可做slicing
df2.loc[('平安银行',pd.Timestamp('2015-01-05')):('平安银行', pd.Timestamp('2015-01-28'))]
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | ||
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | tradeDate | ||||||||||
平安银行 | 2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 | |
2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 | |
2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 | |
2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 | |
2015-01-12 | 000001.XSHE | XSHE | 1231.004 | 1205.698 | 2696688 | 2.293105e+09 | 64376.0 | 0.0158 | 1.452882e+11 | 1.687457e+11 | |
2015-01-13 | 000001.XSHE | XSHE | 1205.698 | 1198.351 | 1418187 | 1.204987e+09 | 34924.0 | 0.0083 | 1.444029e+11 | 1.677175e+11 | |
2015-01-14 | 000001.XSHE | XSHE | 1198.351 | 1208.963 | 2192762 | 1.889297e+09 | 50675.0 | 0.0128 | 1.456817e+11 | 1.692027e+11 | |
2015-01-15 | 000001.XSHE | XSHE | 1208.963 | 1253.044 | 2156548 | 1.868796e+09 | 54470.0 | 0.0126 | 1.509935e+11 | 1.753721e+11 | |
2015-01-16 | 000001.XSHE | XSHE | 1253.044 | 1254.677 | 2701125 | 2.403346e+09 | 60637.0 | 0.0158 | 1.511903e+11 | 1.756006e+11 | |
2015-01-19 | 000001.XSHE | XSHE | 1254.677 | 1128.964 | 3710288 | 3.016203e+09 | 103111.0 | 0.0217 | 1.360417e+11 | 1.580063e+11 | |
2015-01-20 | 000001.XSHE | XSHE | 1128.964 | 1128.964 | 2588576 | 2.064281e+09 | 65826.0 | 0.0152 | 1.360417e+11 | 1.580063e+11 | |
2015-01-21 | 000001.XSHE | XSHE | 1128.964 | 1177.127 | 3368980 | 2.758193e+09 | 78709.0 | 0.0197 | 1.418454e+11 | 1.647470e+11 | |
2015-01-22 | 000001.XSHE | XSHE | 1177.127 | 1167.331 | 2178850 | 1.801436e+09 | 53599.0 | 0.0128 | 1.406650e+11 | 1.633760e+11 | |
2015-01-23 | 000001.XSHE | XSHE | 1167.331 | 1175.494 | 2533304 | 2.108747e+09 | 53672.0 | 0.0148 | 1.416487e+11 | 1.645185e+11 | |
2015-01-26 | 000001.XSHE | XSHE | 1175.494 | 1170.596 | 1836123 | 1.508447e+09 | 47025.0 | 0.0108 | 1.410585e+11 | 1.638330e+11 | |
2015-01-27 | 000001.XSHE | XSHE | 1170.596 | 1142.025 | 2325514 | 1.881059e+09 | 61710.0 | 0.0136 | 1.376156e+11 | 1.598343e+11 | |
2015-01-28 | 000001.XSHE | XSHE | 1142.025 | 1147.739 | 2154303 | 1.742176e+09 | 55381.0 | 0.0126 | 1.383042e+11 | 1.606340e+11 |
df2.loc[pd.IndexSlice[('宁德时代','2015-01'):('平安银行','2020-02')]]
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | ||
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | tradeDate | ||||||||||
宁德时代 | 2018-06-11 | 300750.XSHE | XSHE | 25.140 | 36.200 | 78829 | 2.845472e+06 | 154.0 | 0.0004 | 7.864222e+09 | 7.864222e+10 |
2018-06-12 | 300750.XSHE | XSHE | 36.200 | 39.820 | 26579 | 1.058376e+06 | 86.0 | 0.0001 | 8.650644e+09 | 8.650644e+10 | |
2018-06-13 | 300750.XSHE | XSHE | 39.820 | 43.800 | 45030 | 1.972314e+06 | 98.0 | 0.0002 | 9.515274e+09 | 9.515274e+10 | |
2018-06-14 | 300750.XSHE | XSHE | 43.800 | 48.180 | 74267 | 3.578184e+06 | 211.0 | 0.0003 | 1.046680e+10 | 1.046680e+11 | |
2018-06-15 | 300750.XSHE | XSHE | 48.180 | 53.000 | 256510 | 1.359503e+07 | 717.0 | 0.0012 | 1.151392e+10 | 1.151392e+11 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
平安银行 | 2020-01-20 | 000001.XSHE | XSHE | 2055.130 | 2062.654 | 899493 | 1.226465e+09 | 42805.0 | 0.0038 | 3.192246e+11 | 3.192274e+11 |
2020-01-21 | 000001.XSHE | XSHE | 2062.654 | 2006.229 | 1080975 | 1.442171e+09 | 60130.0 | 0.0046 | 3.104920e+11 | 3.104947e+11 | |
2020-01-22 | 000001.XSHE | XSHE | 2006.229 | 2017.514 | 867411 | 1.150933e+09 | 49245.0 | 0.0037 | 3.122386e+11 | 3.122412e+11 | |
2020-01-23 | 000001.XSHE | XSHE | 2017.514 | 1948.549 | 1326911 | 1.723394e+09 | 71683.0 | 0.0057 | 3.015654e+11 | 3.015680e+11 | |
2020-02-03 | 000001.XSHE | XSHE | 1948.549 | 1754.196 | 2723762 | 3.201454e+09 | 105935.0 | 0.0116 | 2.714865e+11 | 2.714888e+11 |
294154 rows × 10 columns
df.loc['平安银行'].loc['2015':'2016']
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|
tradeDate | ||||||||||
2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2016-12-26 | 000001.XSHE | XSHE | 1092.827 | 1097.641 | 364173 | 2.739343e+08 | 11409.0 | 0.0021 | 1.334364e+11 | 1.565942e+11 |
2016-12-27 | 000001.XSHE | XSHE | 1097.641 | 1092.827 | 324124 | 2.447883e+08 | 9433.0 | 0.0018 | 1.328511e+11 | 1.559073e+11 |
2016-12-28 | 000001.XSHE | XSHE | 1092.827 | 1090.420 | 405159 | 3.048986e+08 | 10133.0 | 0.0023 | 1.325585e+11 | 1.555639e+11 |
2016-12-29 | 000001.XSHE | XSHE | 1090.420 | 1092.827 | 408419 | 3.071835e+08 | 9453.0 | 0.0023 | 1.328511e+11 | 1.559073e+11 |
2016-12-30 | 000001.XSHE | XSHE | 1092.827 | 1095.234 | 364834 | 2.748827e+08 | 10822.0 | 0.0021 | 1.331437e+11 | 1.562507e+11 |
488 rows × 10 columns
df1 = pd.DataFrame({'a':[1,2,3],'b':[4,5,6],'c':[10,20,30]})
df2 = pd.DataFrame({'a':np.array([1,2,6]),
'b':[s for s in 'abc'],
'd':[s for s in 'zxz']})
df1
a | b | c | |
---|---|---|---|
0 | 1 | 4 | 10 |
1 | 2 | 5 | 20 |
2 | 3 | 6 | 30 |
df2
a | b | d | |
---|---|---|---|
0 | 1 | a | z |
1 | 2 | b | x |
2 | 6 | c | z |
pd.merge(df1,df2,on='a')
a | b_x | c | b_y | d | |
---|---|---|---|---|---|
0 | 1 | 4 | 10 | a | z |
1 | 2 | 5 | 20 | b | x |
pd.merge(df1,df2,on='a',how='outer')
a | b_x | c | b_y | d | |
---|---|---|---|---|---|
0 | 1 | 4.0 | 10.0 | a | z |
1 | 2 | 5.0 | 20.0 | b | x |
2 | 3 | 6.0 | 30.0 | NaN | NaN |
3 | 6 | NaN | NaN | c | z |
pd.merge(df1,df2,on='a',how='left')
a | b_x | c | b_y | d | |
---|---|---|---|---|---|
0 | 1 | 4 | 10 | a | z |
1 | 2 | 5 | 20 | b | x |
2 | 3 | 6 | 30 | NaN | NaN |
pd.merge(df1,df2,on='a',how='right')
a | b_x | c | b_y | d | |
---|---|---|---|---|---|
0 | 1 | 4.0 | 10.0 | a | z |
1 | 2 | 5.0 | 20.0 | b | x |
2 | 6 | NaN | NaN | c | z |
temp = tm.makeTimeDataFrame(3)
display(temp)
A | B | C | D | |
---|---|---|---|---|
2000-01-03 | 1.314670 | -1.439479 | -0.400497 | -0.759633 |
2000-01-04 | -1.281146 | 0.317622 | 0.562698 | 1.176647 |
2000-01-05 | -0.627046 | 0.507325 | 0.230216 | 1.652183 |
temp.stack().to_frame().reset_index()
level_0 | level_1 | 0 | |
---|---|---|---|
0 | 2000-01-03 | A | 1.314670 |
1 | 2000-01-03 | B | -1.439479 |
2 | 2000-01-03 | C | -0.400497 |
3 | 2000-01-03 | D | -0.759633 |
4 | 2000-01-04 | A | -1.281146 |
5 | 2000-01-04 | B | 0.317622 |
6 | 2000-01-04 | C | 0.562698 |
7 | 2000-01-04 | D | 1.176647 |
8 | 2000-01-05 | A | -0.627046 |
9 | 2000-01-05 | B | 0.507325 |
10 | 2000-01-05 | C | 0.230216 |
11 | 2000-01-05 | D | 1.652183 |
pd.DataFrame(temp.stack()).reset_index()
level_0 | level_1 | 0 | |
---|---|---|---|
0 | 2000-01-03 | A | 1.314670 |
1 | 2000-01-03 | B | -1.439479 |
2 | 2000-01-03 | C | -0.400497 |
3 | 2000-01-03 | D | -0.759633 |
4 | 2000-01-04 | A | -1.281146 |
5 | 2000-01-04 | B | 0.317622 |
6 | 2000-01-04 | C | 0.562698 |
7 | 2000-01-04 | D | 1.176647 |
8 | 2000-01-05 | A | -0.627046 |
9 | 2000-01-05 | B | 0.507325 |
10 | 2000-01-05 | C | 0.230216 |
11 | 2000-01-05 | D | 1.652183 |
temp2 = pd.DataFrame(temp.stack()).reset_index()
temp2.columns=['date','variable','value']
temp2
date | variable | value | |
---|---|---|---|
0 | 2000-01-03 | A | 1.314670 |
1 | 2000-01-03 | B | -1.439479 |
2 | 2000-01-03 | C | -0.400497 |
3 | 2000-01-03 | D | -0.759633 |
4 | 2000-01-04 | A | -1.281146 |
5 | 2000-01-04 | B | 0.317622 |
6 | 2000-01-04 | C | 0.562698 |
7 | 2000-01-04 | D | 1.176647 |
8 | 2000-01-05 | A | -0.627046 |
9 | 2000-01-05 | B | 0.507325 |
10 | 2000-01-05 | C | 0.230216 |
11 | 2000-01-05 | D | 1.652183 |
def unpivot(frame):
"""
Essentially the same as 'stack'
"""
N, K = frame.shape
data = {
"value": frame.to_numpy().ravel("F"),
"variable": np.asarray(frame.columns).repeat(N),
"date": np.tile(np.asarray(frame.index), K),
}
return pd.DataFrame(data, columns=["date", "variable", "value"])
temp2 = unpivot(temp)
temp2
date | variable | value | |
---|---|---|---|
0 | 2000-01-03 | A | 1.314670 |
1 | 2000-01-04 | A | -1.281146 |
2 | 2000-01-05 | A | -0.627046 |
3 | 2000-01-03 | B | -1.439479 |
4 | 2000-01-04 | B | 0.317622 |
5 | 2000-01-05 | B | 0.507325 |
6 | 2000-01-03 | C | -0.400497 |
7 | 2000-01-04 | C | 0.562698 |
8 | 2000-01-05 | C | 0.230216 |
9 | 2000-01-03 | D | -0.759633 |
10 | 2000-01-04 | D | 1.176647 |
11 | 2000-01-05 | D | 1.652183 |
temp2.set_index(['date','variable'])
value | ||
---|---|---|
date | variable | |
2000-01-03 | A | 1.314670 |
2000-01-04 | A | -1.281146 |
2000-01-05 | A | -0.627046 |
2000-01-03 | B | -1.439479 |
2000-01-04 | B | 0.317622 |
2000-01-05 | B | 0.507325 |
2000-01-03 | C | -0.400497 |
2000-01-04 | C | 0.562698 |
2000-01-05 | C | 0.230216 |
2000-01-03 | D | -0.759633 |
2000-01-04 | D | 1.176647 |
2000-01-05 | D | 1.652183 |
temp2.set_index(['date','variable']).unstack()
value | ||||
---|---|---|---|---|
variable | A | B | C | D |
date | ||||
2000-01-03 | 1.314670 | -1.439479 | -0.400497 | -0.759633 |
2000-01-04 | -1.281146 | 0.317622 | 0.562698 | 1.176647 |
2000-01-05 | -0.627046 | 0.507325 | 0.230216 | 1.652183 |
temp2
date | variable | value | |
---|---|---|---|
0 | 2000-01-03 | A | 1.314670 |
1 | 2000-01-04 | A | -1.281146 |
2 | 2000-01-05 | A | -0.627046 |
3 | 2000-01-03 | B | -1.439479 |
4 | 2000-01-04 | B | 0.317622 |
5 | 2000-01-05 | B | 0.507325 |
6 | 2000-01-03 | C | -0.400497 |
7 | 2000-01-04 | C | 0.562698 |
8 | 2000-01-05 | C | 0.230216 |
9 | 2000-01-03 | D | -0.759633 |
10 | 2000-01-04 | D | 1.176647 |
11 | 2000-01-05 | D | 1.652183 |
temp2.pivot(index='date',columns='variable',values='value')
variable | A | B | C | D |
---|---|---|---|---|
date | ||||
2000-01-03 | 1.314670 | -1.439479 | -0.400497 | -0.759633 |
2000-01-04 | -1.281146 | 0.317622 | 0.562698 | 1.176647 |
2000-01-05 | -0.627046 | 0.507325 | 0.230216 | 1.652183 |
temp2['value2'] = temp2['value']**2
temp2
date | variable | value | value2 | |
---|---|---|---|---|
0 | 2000-01-03 | A | 1.314670 | 1.728358 |
1 | 2000-01-04 | A | -1.281146 | 1.641335 |
2 | 2000-01-05 | A | -0.627046 | 0.393187 |
3 | 2000-01-03 | B | -1.439479 | 2.072100 |
4 | 2000-01-04 | B | 0.317622 | 0.100884 |
5 | 2000-01-05 | B | 0.507325 | 0.257379 |
6 | 2000-01-03 | C | -0.400497 | 0.160398 |
7 | 2000-01-04 | C | 0.562698 | 0.316629 |
8 | 2000-01-05 | C | 0.230216 | 0.053000 |
9 | 2000-01-03 | D | -0.759633 | 0.577043 |
10 | 2000-01-04 | D | 1.176647 | 1.384498 |
11 | 2000-01-05 | D | 1.652183 | 2.729708 |
temp2.pivot(index='date',columns='variable')
value | value2 | |||||||
---|---|---|---|---|---|---|---|---|
variable | A | B | C | D | A | B | C | D |
date | ||||||||
2000-01-03 | 1.314670 | -1.439479 | -0.400497 | -0.759633 | 1.728358 | 2.072100 | 0.160398 | 0.577043 |
2000-01-04 | -1.281146 | 0.317622 | 0.562698 | 1.176647 | 1.641335 | 0.100884 | 0.316629 | 1.384498 |
2000-01-05 | -0.627046 | 0.507325 | 0.230216 | 1.652183 | 0.393187 | 0.257379 | 0.053000 | 2.729708 |
df.reset_index()
secShortName | tradeDate | secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 平安银行 | 2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 平安银行 | 2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 平安银行 | 2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 平安银行 | 2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 平安银行 | 2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 凌云B股 | 2021-12-27 | 900957.XSHG | XSHG | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 凌云B股 | 2021-12-28 | 900957.XSHG | XSHG | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 凌云B股 | 2021-12-29 | 900957.XSHG | XSHG | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 凌云B股 | 2021-12-30 | 900957.XSHG | XSHG | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 凌云B股 | 2021-12-31 | 900957.XSHG | XSHG | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 12 columns
df.reset_index(inplace=True)
df
secShortName | tradeDate | secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 平安银行 | 2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 平安银行 | 2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 平安银行 | 2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 平安银行 | 2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 平安银行 | 2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 凌云B股 | 2021-12-27 | 900957.XSHG | XSHG | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 凌云B股 | 2021-12-28 | 900957.XSHG | XSHG | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 凌云B股 | 2021-12-29 | 900957.XSHG | XSHG | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 凌云B股 | 2021-12-30 | 900957.XSHG | XSHG | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 凌云B股 | 2021-12-31 | 900957.XSHG | XSHG | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 12 columns
df.pivot(index='tradeDate',columns='secID',values='closePrice').iloc[:,0:4]
secID | 000001.XSHE | 000002.XSHE | 000004.XSHE | 000005.XSHE |
---|---|---|---|---|
tradeDate | ||||
2015-01-05 | 1307.737 | 1681.312 | 107.225 | NaN |
2015-01-06 | 1288.146 | 1619.292 | 112.487 | NaN |
2015-01-07 | 1263.656 | 1604.632 | 112.145 | NaN |
2015-01-08 | 1221.208 | 1532.463 | 115.630 | NaN |
2015-01-09 | 1231.004 | 1516.676 | 112.282 | NaN |
... | ... | ... | ... | ... |
2021-12-27 | 2213.298 | 2919.261 | 128.342 | 22.207 |
2021-12-28 | 2206.872 | 2928.188 | 133.399 | 22.502 |
2021-12-29 | 2152.889 | 2883.551 | 132.510 | 22.502 |
2021-12-30 | 2161.886 | 2844.866 | 133.945 | 22.502 |
2021-12-31 | 2118.185 | 2940.092 | 132.715 | 22.600 |
1705 rows × 4 columns
df.reset_index().pivot(index='tradeDate',columns='secID',values='closePrice').iloc[:,0:4].plot()
<AxesSubplot: xlabel='tradeDate'>
import matplotlib as mpl
import matplotlib.pyplot as plt
axes: 执行画图的instance,子图
axis: x, y
为什么只画一个图的时候,也经常看到写fig, ax = plt.subplots()
?
因为通常画图时候需要保存。
fig = plt.figure() # an empty figure with no Axes
fig, ax = plt.subplots() # a figure with a single Axes
fig, axs = plt.subplots(2, 2) # a figure with a 2x2 grid of Axes
<Figure size 640x480 with 0 Axes>
fig, ax = plt.subplots()
ax.plot([1, 2, 3, 4], [1, 4, 2, 3])
[<matplotlib.lines.Line2D at 0x7f8bc80cae20>]
temp2
date | variable | value | value2 | |
---|---|---|---|---|
0 | 2000-01-03 | A | 1.314670 | 1.728358 |
1 | 2000-01-04 | A | -1.281146 | 1.641335 |
2 | 2000-01-05 | A | -0.627046 | 0.393187 |
3 | 2000-01-03 | B | -1.439479 | 2.072100 |
4 | 2000-01-04 | B | 0.317622 | 0.100884 |
5 | 2000-01-05 | B | 0.507325 | 0.257379 |
6 | 2000-01-03 | C | -0.400497 | 0.160398 |
7 | 2000-01-04 | C | 0.562698 | 0.316629 |
8 | 2000-01-05 | C | 0.230216 | 0.053000 |
9 | 2000-01-03 | D | -0.759633 | 0.577043 |
10 | 2000-01-04 | D | 1.176647 | 1.384498 |
11 | 2000-01-05 | D | 1.652183 | 2.729708 |
fig, axs = plt.subplots(1,2)
x = range(temp2.shape[0])
y1 = temp2['value']
y2 = temp2['value']**2
axs[0].plot(x, y1)
axs[1].plot(x, y2)
axs[0].plot(x, y1**3)
axs[1].plot(x, y2-y1)
[<matplotlib.lines.Line2D at 0x7f8bc837ba60>]
x = pd.date_range(start='2021-01-03',periods=temp2.shape[0])
plt.plot(x,temp2['value'], label='random value')
plt.plot(x,temp2['value']**2, label='quadratic of random value')
plt.xlabel('date')
plt.ylabel('value')
plt.title('Example')
plt.legend()
<matplotlib.legend.Legend at 0x7f8bc8111a30>
df_pingan.select_dtypes(np.number).corr()
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
preClosePrice | 1.000000 | 0.997575 | 0.064982 | 0.389881 | 0.601451 | -0.039942 | 0.989830 | 0.994295 |
closePrice | 0.997575 | 1.000000 | 0.075458 | 0.400851 | 0.606522 | -0.030710 | 0.991942 | 0.996452 |
turnoverVol | 0.064982 | 0.075458 | 1.000000 | 0.924077 | 0.752066 | 0.981827 | 0.027619 | 0.055961 |
turnoverValue | 0.389881 | 0.400851 | 0.924077 | 1.000000 | 0.914040 | 0.862471 | 0.355060 | 0.383088 |
dealAmount | 0.601451 | 0.606522 | 0.752066 | 0.914040 | 1.000000 | 0.665757 | 0.573258 | 0.596717 |
turnoverRate | -0.039942 | -0.030710 | 0.981827 | 0.862471 | 0.665757 | 1.000000 | -0.090386 | -0.050985 |
negMarketValue | 0.989830 | 0.991942 | 0.027619 | 0.355060 | 0.573258 | -0.090386 | 1.000000 | 0.993615 |
marketValue | 0.994295 | 0.996452 | 0.055961 | 0.383088 | 0.596717 | -0.050985 | 0.993615 | 1.000000 |
corr = df_pingan.select_dtypes(np.number).corr()
corr
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
preClosePrice | 1.000000 | 0.997575 | 0.064982 | 0.389881 | 0.601451 | -0.039942 | 0.989830 | 0.994295 |
closePrice | 0.997575 | 1.000000 | 0.075458 | 0.400851 | 0.606522 | -0.030710 | 0.991942 | 0.996452 |
turnoverVol | 0.064982 | 0.075458 | 1.000000 | 0.924077 | 0.752066 | 0.981827 | 0.027619 | 0.055961 |
turnoverValue | 0.389881 | 0.400851 | 0.924077 | 1.000000 | 0.914040 | 0.862471 | 0.355060 | 0.383088 |
dealAmount | 0.601451 | 0.606522 | 0.752066 | 0.914040 | 1.000000 | 0.665757 | 0.573258 | 0.596717 |
turnoverRate | -0.039942 | -0.030710 | 0.981827 | 0.862471 | 0.665757 | 1.000000 | -0.090386 | -0.050985 |
negMarketValue | 0.989830 | 0.991942 | 0.027619 | 0.355060 | 0.573258 | -0.090386 | 1.000000 | 0.993615 |
marketValue | 0.994295 | 0.996452 | 0.055961 | 0.383088 | 0.596717 | -0.050985 | 0.993615 | 1.000000 |
np.ones_like?
Signature: np.ones_like(a, dtype=None, order='K', subok=True, shape=None) Docstring: Return an array of ones with the same shape and type as a given array. Parameters ---------- a : array_like The shape and data-type of `a` define these same attributes of the returned array. dtype : data-type, optional Overrides the data type of the result. .. versionadded:: 1.6.0 order : {'C', 'F', 'A', or 'K'}, optional Overrides the memory layout of the result. 'C' means C-order, 'F' means F-order, 'A' means 'F' if `a` is Fortran contiguous, 'C' otherwise. 'K' means match the layout of `a` as closely as possible. .. versionadded:: 1.6.0 subok : bool, optional. If True, then the newly created array will use the sub-class type of `a`, otherwise it will be a base-class array. Defaults to True. shape : int or sequence of ints, optional. Overrides the shape of the result. If order='K' and the number of dimensions is unchanged, will try to keep order, otherwise, order='C' is implied. .. versionadded:: 1.17.0 Returns ------- out : ndarray Array of ones with the same shape and type as `a`. See Also -------- empty_like : Return an empty array with shape and type of input. zeros_like : Return an array of zeros with shape and type of input. full_like : Return a new array with shape of input filled with value. ones : Return a new array setting values to one. Examples -------- >>> x = np.arange(6) >>> x = x.reshape((2, 3)) >>> x array([[0, 1, 2], [3, 4, 5]]) >>> np.ones_like(x) array([[1, 1, 1], [1, 1, 1]]) >>> y = np.arange(3, dtype=float) >>> y array([0., 1., 2.]) >>> np.ones_like(y) array([1., 1., 1.]) File: ~/opt/anaconda3/lib/python3.9/site-packages/numpy/core/numeric.py Type: function
np.ones_like(corr, dtype=bool)
array([[ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True]])
np.triu?
Signature: np.triu(m, k=0) Docstring: Upper triangle of an array. Return a copy of an array with the elements below the `k`-th diagonal zeroed. For arrays with ``ndim`` exceeding 2, `triu` will apply to the final two axes. Please refer to the documentation for `tril` for further details. See Also -------- tril : lower triangle of an array Examples -------- >>> np.triu([[1,2,3],[4,5,6],[7,8,9],[10,11,12]], -1) array([[ 1, 2, 3], [ 4, 5, 6], [ 0, 8, 9], [ 0, 0, 12]]) >>> np.triu(np.arange(3*4*5).reshape(3, 4, 5)) array([[[ 0, 1, 2, 3, 4], [ 0, 6, 7, 8, 9], [ 0, 0, 12, 13, 14], [ 0, 0, 0, 18, 19]], [[20, 21, 22, 23, 24], [ 0, 26, 27, 28, 29], [ 0, 0, 32, 33, 34], [ 0, 0, 0, 38, 39]], [[40, 41, 42, 43, 44], [ 0, 46, 47, 48, 49], [ 0, 0, 52, 53, 54], [ 0, 0, 0, 58, 59]]]) File: ~/opt/anaconda3/lib/python3.9/site-packages/numpy/lib/twodim_base.py Type: function
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))
mask
array([[ True, True, True, True, True, True, True, True], [False, True, True, True, True, True, True, True], [False, False, True, True, True, True, True, True], [False, False, False, True, True, True, True, True], [False, False, False, False, True, True, True, True], [False, False, False, False, False, True, True, True], [False, False, False, False, False, False, True, True], [False, False, False, False, False, False, False, True]])
import seaborn as sns
# Set up the matplotlib figure
fig, ax = plt.subplots(figsize=(10, 10))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0, annot=True,
square=True, linewidths=.5, annot_kws={"size": 10},cbar_kws={"shrink": .5})
<AxesSubplot: >
df_pingan_num = df_pingan.select_dtypes(np.number)
df_pingan_num
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
tradeDate | ||||||||
2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-12-27 | 2224.866 | 2213.298 | 881462 | 1.260455e+09 | 66911.0 | 0.0038 | 3.341671e+11 | 3.341699e+11 |
2021-12-28 | 2213.298 | 2206.872 | 1358314 | 1.934461e+09 | 89671.0 | 0.0058 | 3.331968e+11 | 3.331996e+11 |
2021-12-29 | 2206.872 | 2152.889 | 1771527 | 2.480535e+09 | 147243.0 | 0.0076 | 3.250464e+11 | 3.250491e+11 |
2021-12-30 | 2152.889 | 2161.886 | 960485 | 1.342374e+09 | 66792.0 | 0.0041 | 3.264048e+11 | 3.264075e+11 |
2021-12-31 | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
1705 rows × 8 columns
df_pingan_num.plot()
<AxesSubplot: xlabel='tradeDate'>
plt.rcParams['figure.figsize'] = (16.0, 9.0)
df_pingan_num.drop(['negMarketValue','marketValue'],axis=1).plot()
<AxesSubplot: xlabel='tradeDate'>
def standardization(col):
return (col - col.mean()) / col.std()
df_pingan_num.apply(standardization).plot()
<AxesSubplot: xlabel='tradeDate'>
df_pingan_num.apply(standardization)[['preClosePrice','turnoverValue','marketValue']].plot(style=['bs-', 'ro-', 'y^-'])
<AxesSubplot: xlabel='tradeDate'>
df_pingan_num.plot(subplots=True)
array([<AxesSubplot: xlabel='tradeDate'>, <AxesSubplot: xlabel='tradeDate'>, <AxesSubplot: xlabel='tradeDate'>, <AxesSubplot: xlabel='tradeDate'>, <AxesSubplot: xlabel='tradeDate'>, <AxesSubplot: xlabel='tradeDate'>, <AxesSubplot: xlabel='tradeDate'>, <AxesSubplot: xlabel='tradeDate'>], dtype=object)