import numpy as np
import pandas as pd
np.__version__
'1.20.3'
numpy中的数据存储是array, array要求数据类型是一样的,这和python中的list不同
np.array([1,1.0,'2']) # 强制转换成同一种类型
array(['1', '1.0', '2'], dtype='<U32')
np.array([1,1.0,'2'],dtype=int)
array([1, 1, 2])
np.ones((3,5))
array([[1., 1., 1., 1., 1.], [1., 1., 1., 1., 1.], [1., 1., 1., 1., 1.]])
np.full((3,5),1.23)
array([[1.23, 1.23, 1.23, 1.23, 1.23], [1.23, 1.23, 1.23, 1.23, 1.23], [1.23, 1.23, 1.23, 1.23, 1.23]])
np.arange(0, 20, 2)
array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18])
np.random.normal(0, 1, (3,3))
array([[-1.24232068, -1.24041865, 0.06074647], [-0.67727192, 1.07540834, 1.17870305], [-0.355751 , 2.13127248, 1.26080945]])
np.eye(3)
array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]])
np.tile(10,3)
array([10, 10, 10])
np.tile([1,2,3],3)
array([1, 2, 3, 1, 2, 3, 1, 2, 3])
np.asarray([1,2,3]).repeat(3)
array([1, 1, 1, 2, 2, 2, 3, 3, 3])
a = np.random.normal(0, 1, (3,3))
a
array([[ 0.2746094 , -0.7857175 , 0.66115414], [-0.27875693, -2.33796535, -0.80378465], [ 1.14076118, 1.63070818, -0.23000699]])
a[0,1]
-0.7857175015174842
a[0:2]
array([[ 0.2746094 , -0.7857175 , 0.66115414], [-0.27875693, -2.33796535, -0.80378465]])
a[0:2,1]
array([-0.7857175 , -2.33796535])
a[0,1] = 1000
a
array([[ 2.74609395e-01, 1.00000000e+03, 6.61154144e-01], [-2.78756925e-01, -2.33796535e+00, -8.03784645e-01], [ 1.14076118e+00, 1.63070818e+00, -2.30006988e-01]])
a[0:3:2]
array([[ 2.74609395e-01, 1.00000000e+03, 6.61154144e-01], [ 1.14076118e+00, 1.63070818e+00, -2.30006988e-01]])
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
z = [21,21,21]
np.concatenate([x,y,z])
array([ 1, 2, 3, 3, 2, 1, 21, 21, 21])
a1 = np.ones((3,3))
a2 = np.random.normal(0, 1, (3,3))
a1
array([[1., 1., 1.], [1., 1., 1.], [1., 1., 1.]])
a2
array([[ 0.17443441, 0.57968823, -0.28802156], [ 1.81857187, -1.44289989, -0.01735056], [ 1.28819789, -0.31296372, 1.14075806]])
np.concatenate([a1,a2])
array([[ 1. , 1. , 1. ], [ 1. , 1. , 1. ], [ 1. , 1. , 1. ], [ 0.17443441, 0.57968823, -0.28802156], [ 1.81857187, -1.44289989, -0.01735056], [ 1.28819789, -0.31296372, 1.14075806]])
np.concatenate([a1,a2], axis=1)
array([[ 1. , 1. , 1. , 0.17443441, 0.57968823, -0.28802156], [ 1. , 1. , 1. , 1.81857187, -1.44289989, -0.01735056], [ 1. , 1. , 1. , 1.28819789, -0.31296372, 1.14075806]])
a1 = np.array(range(10))
a1[0:3]
array([0, 1, 2])
a1
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
a1.shape
(10,)
a1 = a1.reshape((10,1))
a1.shape
(10, 1)
a1
array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]])
a1 = a1.reshape((1,10))
a1.shape
(1, 10)
a1.reshape((2,6))
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-34-af5cb9e215e2> in <module> ----> 1 a1.reshape((2,6)) ValueError: cannot reshape array of size 10 into shape (2,6)
a1 = a1.reshape((2,5))
a2 = np.array(range(10,20)).reshape((2,5))
a2
array([[10, 11, 12, 13, 14], [15, 16, 17, 18, 19]])
a1
array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])
np.vstack([a1,a2])
array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14], [15, 16, 17, 18, 19]])
np.hstack([a1,a2])
array([[ 0, 1, 2, 3, 4, 10, 11, 12, 13, 14], [ 5, 6, 7, 8, 9, 15, 16, 17, 18, 19]])
做numpy和pandas操作时的一个原则:能用向量操作尽量用向量操作,速度最快。尽量避免写元素的循环
a1 + 1000
array([[1000, 1001, 1002, 1003, 1004], [1005, 1006, 1007, 1008, 1009]])
a1 * a2
array([[ 0, 11, 24, 39, 56], [ 75, 96, 119, 144, 171]])
list1 = [1,2,3] # some manipulations is not possible with raw python
list2 = [4,5,6]
list1 * list2
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-44-7c0157aad319> in <module> ----> 1 list1 * list2 TypeError: can't multiply sequence by non-int of type 'list'
list1 + list2
[1, 2, 3, 4, 5, 6]
x = np.array([[1, 2, 3], [4, 5, 6]])
x.ravel()
array([1, 2, 3, 4, 5, 6])
x.ravel(order='F')
array([1, 4, 2, 5, 3, 6])
a1
array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])
a1.max(axis=1)
array([4, 9])
a1.max(axis=0)
array([5, 6, 7, 8, 9])
a1.sum()
45
a1.sum(axis=0)
array([ 5, 7, 9, 11, 13])
a1.sum(axis=1)
array([10, 35])
Mask 把 array 的一部分“挡住”,来阻止其他的函数对其进行操作。这在有的时候挺有用
import numpy.ma as ma
x = np.array([1,2,3,-1,4])
mx = ma.masked_array(x, mask=[0,0,0,1,0])
mx
masked_array(data=[1, 2, 3, --, 4], mask=[False, False, False, True, False], fill_value=999999)
mx.mean()
2.5
# -999 as an invalid value, as sometimes seen in some databases
a = np.array([0,1,2,-999])
ma.masked_values(a, -999)
masked_array(data=[0, 1, 2, --], mask=[False, False, False, True], fill_value=-999)
df = pd.DataFrame({'name':[chr(i) for i in range(ord('a'),ord('z')+1)],
'value':range(26)}) # string.ascii_lowercase provides the alphabet too.
df
name | value | |
---|---|---|
0 | a | 0 |
1 | b | 1 |
2 | c | 2 |
3 | d | 3 |
4 | e | 4 |
5 | f | 5 |
6 | g | 6 |
7 | h | 7 |
8 | i | 8 |
9 | j | 9 |
10 | k | 10 |
11 | l | 11 |
12 | m | 12 |
13 | n | 13 |
14 | o | 14 |
15 | p | 15 |
16 | q | 16 |
17 | r | 17 |
18 | s | 18 |
19 | t | 19 |
20 | u | 20 |
21 | v | 21 |
22 | w | 22 |
23 | x | 23 |
24 | y | 24 |
25 | z | 25 |
data = [['tom', 10], ['nick', 15], ['juli', 14]]
df = pd.DataFrame(data, columns = ['Name', 'Age'])
df
Name | Age | |
---|---|---|
0 | tom | 10 |
1 | nick | 15 |
2 | juli | 14 |
pandas._testing
¶import pandas._testing as tm
dir(tm)
['ALL_EA_INT_DTYPES', 'ALL_INT_DTYPES', 'ALL_NUMPY_DTYPES', 'ALL_REAL_DTYPES', 'Any', 'BOOL_DTYPES', 'BYTES_DTYPES', 'COMPLEX_DTYPES', 'Callable', 'Categorical', 'CategoricalIndex', 'ContextManager', 'Counter', 'DATETIME64_DTYPES', 'DataFrame', 'DatetimeArray', 'DatetimeIndex', 'DatetimeLikeArrayMixin', 'Dtype', 'ExtensionArray', 'FLOAT_DTYPES', 'FLOAT_EA_DTYPES', 'FilePathOrBuffer', 'FrameOrSeries', 'IO', 'Index', 'IntervalArray', 'IntervalIndex', 'List', 'MultiIndex', 'NULL_OBJECTS', 'OBJECT_DTYPES', 'Optional', 'Path', 'PeriodArray', 'RANDS_CHARS', 'RANDU_CHARS', 'RNGContext', 'RangeIndex', 'SIGNED_EA_INT_DTYPES', 'SIGNED_INT_DTYPES', 'STRING_DTYPES', 'Series', 'SubclassedCategorical', 'SubclassedDataFrame', 'SubclassedSeries', 'TIMEDELTA64_DTYPES', 'TimedeltaArray', 'Type', 'UNSIGNED_EA_INT_DTYPES', 'UNSIGNED_INT_DTYPES', 'Union', '_K', '_N', '_RAISE_NETWORK_ERROR_DEFAULT', '__annotations__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', '_assert_raised_with_correct_stacklevel', '_check_isinstance', '_create_missing_idx', '_get_default_network_errors', '_get_tol_from_less_precise', '_make_skipna_wrapper', '_make_timeseries', '_names', '_network_errno_vals', '_network_error_messages', '_testing', '_testing_mode_warnings', 'all_timeseries_index_generator', 'array_equivalent', 'assert_almost_equal', 'assert_attr_equal', 'assert_categorical_equal', 'assert_class_equal', 'assert_contains_all', 'assert_copy', 'assert_datetime_array_equal', 'assert_dict_equal', 'assert_equal', 'assert_extension_array_equal', 'assert_frame_equal', 'assert_index_equal', 'assert_interval_array_equal', 'assert_is_sorted', 'assert_is_valid_plot_return_object', 'assert_numpy_array_equal', 'assert_period_array_equal', 'assert_produces_warning', 'assert_series_equal', 'assert_sp_array_equal', 'assert_timedelta_array_equal', 'bdate_range', 'box_expected', 'bz2', 'can_connect', 'can_set_locale', 'cast', 'close', 'contextmanager', 'convert_rows_list_to_csv_str', 'cython_table', 'datetime', 'decompress_file', 'ensure_clean', 'ensure_clean_dir', 'ensure_safe_environment_variables', 'equalContents', 'external_error_raised', 'getCols', 'getMixedTypeDict', 'getPeriodData', 'getSeriesData', 'getTimeSeriesData', 'get_cython_table_params', 'get_locales', 'get_lzma_file', 'get_op_from_name', 'gzip', 'import_lzma', 'index_subclass_makers_generator', 'is_bool', 'is_categorical_dtype', 'is_datetime64_dtype', 'is_datetime64tz_dtype', 'is_extension_array_dtype', 'is_extension_array_dtype_and_needs_i8_conversion', 'is_interval_dtype', 'is_number', 'is_numeric_dtype', 'is_period_dtype', 'is_sequence', 'is_timedelta64_dtype', 'lzma', 'makeBoolIndex', 'makeCategoricalIndex', 'makeCustomDataframe', 'makeCustomIndex', 'makeDataFrame', 'makeDateIndex', 'makeFloatIndex', 'makeFloatSeries', 'makeIntIndex', 'makeIntervalIndex', 'makeMissingDataframe', 'makeMixedDataFrame', 'makeMultiIndex', 'makeObjectSeries', 'makePeriodFrame', 'makePeriodIndex', 'makePeriodSeries', 'makeRangeIndex', 'makeStringIndex', 'makeStringSeries', 'makeTimeDataFrame', 'makeTimeSeries', 'makeTimedeltaIndex', 'makeUIntIndex', 'makeUnicodeIndex', 'needs_i8_conversion', 'network', 'no_default', 'np', 'operator', 'optional_args', 'os', 'pd', 'period_array', 'pprint_thing', 'raise_assert_detail', 'rand', 'randbool', 'randn', 'random', 'rands', 'rands_array', 'randu_array', 're', 'reset_display_options', 'reset_testing_mode', 'rmtree', 'round_trip_localpath', 'round_trip_pathlib', 'round_trip_pickle', 'safe_sort', 'set_locale', 'set_testing_mode', 'set_timezone', 'string', 'take_1d', 'tempfile', 'test_parallel', 'to_array', 'urlopen', 'use_numexpr', 'warnings', 'with_connectivity_check', 'with_csv_dialect', 'wraps', 'write_to_compressed', 'zipfile']
tm.makeDataFrame()
A | B | C | D | |
---|---|---|---|---|
7JxSfR86M9 | -1.843940 | 0.543290 | 1.079228 | -0.177037 |
PPSQLFy3RK | -0.820203 | -0.083865 | -0.688721 | 0.559957 |
8Npy8589aY | -0.120590 | 1.073697 | -0.861034 | -3.342645 |
sPRITzuMLh | 0.597026 | 1.061574 | 0.657195 | 2.329681 |
CmQo9G8q6k | -0.042982 | 0.398946 | -0.219825 | -0.882198 |
q8NNmCxsdj | 0.789726 | -0.539442 | 0.099637 | 0.966168 |
VAazSsGeB1 | -0.924305 | -1.251944 | 0.501412 | 0.793516 |
dX6zxMYKHj | 0.464321 | -0.468189 | 0.587540 | -1.068757 |
RG3qpsDXUX | -0.149476 | -1.042036 | 0.583024 | 0.021728 |
mfoRofJ9Oj | -0.684109 | -1.600323 | 0.715670 | 0.110116 |
GP2i5UPUZf | -0.333201 | 1.018445 | -0.468717 | 0.613148 |
jOL1jxY0k4 | -0.259439 | 1.836241 | -0.798402 | 0.192824 |
0V0BAmeBlW | -0.871275 | -0.721622 | 0.271185 | -0.908018 |
g7ihTBrDGZ | 0.408134 | -0.928721 | 2.301100 | 1.149190 |
UkGTSUqfui | -0.078497 | 0.326344 | 1.076145 | -1.617714 |
aMzjl9b74G | 1.416828 | -0.413055 | 0.796244 | 1.747399 |
NmeqBwmuVg | -0.222019 | 0.659880 | 0.090262 | 1.577326 |
vAGzMhXih0 | 0.035049 | -0.271643 | -0.281687 | -1.609761 |
l2FhLKcMoZ | 0.390223 | 0.856266 | -0.841127 | -0.075180 |
bq7PurFgEM | -0.102330 | -0.659740 | -2.805520 | -1.979935 |
8JofZurfug | -1.113862 | -0.576826 | 1.550617 | -1.385767 |
KqqRUTmWuF | -0.647063 | -0.501838 | -1.369632 | -0.980598 |
lpPXiUfBcM | 0.345687 | -0.820138 | 0.959547 | 1.455158 |
PB8w8Hji2y | -0.479926 | -1.193370 | 0.845829 | -2.402051 |
EJdkTGOErt | 0.394405 | -0.911448 | 0.196851 | -0.991442 |
NFlGGtVaRh | 1.205762 | 0.124166 | 0.561849 | 0.128919 |
Lqqhzmrs5Y | -0.433859 | -1.933109 | 1.250324 | 1.084954 |
xzswONrbAx | -1.655500 | -0.342692 | 2.046556 | 0.479981 |
VtFebGDlns | 0.232555 | 0.321643 | -1.376244 | -0.003383 |
EV64UpCY0Q | 0.981508 | 0.295653 | 0.296618 | -0.735820 |
tm.makeMixedDataFrame()
A | B | C | D | |
---|---|---|---|---|
0 | 0.0 | 0.0 | foo1 | 2009-01-01 |
1 | 1.0 | 1.0 | foo2 | 2009-01-02 |
2 | 2.0 | 0.0 | foo3 | 2009-01-05 |
3 | 3.0 | 1.0 | foo4 | 2009-01-06 |
4 | 4.0 | 0.0 | foo5 | 2009-01-07 |
df1 = tm.makeTimeDataFrame()
df1
A | B | C | D | |
---|---|---|---|---|
2000-01-03 | -0.566082 | -0.412164 | -0.917675 | -1.769825 |
2000-01-04 | 1.069662 | 1.623267 | 0.955894 | 2.412347 |
2000-01-05 | 0.256845 | -0.803780 | -1.483637 | 0.398254 |
2000-01-06 | -1.337242 | -0.435310 | -1.354506 | 1.030539 |
2000-01-07 | 0.646727 | 0.791267 | -0.047742 | -1.272576 |
2000-01-10 | -1.659626 | 0.663999 | -2.046059 | -0.668533 |
2000-01-11 | 0.521708 | -0.388952 | 0.488157 | -0.441488 |
2000-01-12 | -0.922287 | 1.544229 | 1.479404 | -2.732198 |
2000-01-13 | -0.097344 | 0.399350 | 0.466873 | 0.275587 |
2000-01-14 | -0.260744 | -1.364674 | 0.714695 | 0.062380 |
2000-01-17 | 1.492977 | -1.993494 | -0.040974 | -0.916235 |
2000-01-18 | 0.202559 | -0.511473 | -1.529170 | 0.497761 |
2000-01-19 | 0.122860 | 0.700209 | 0.928946 | 0.960754 |
2000-01-20 | -1.466905 | -0.544893 | 0.339169 | 1.454759 |
2000-01-21 | -0.769595 | -0.555812 | 0.257154 | -0.886732 |
2000-01-24 | -1.529573 | -0.651623 | 0.044087 | -0.346215 |
2000-01-25 | 0.574232 | 1.179446 | 0.715726 | -0.603236 |
2000-01-26 | -0.385725 | -0.482581 | 0.398204 | 0.494938 |
2000-01-27 | -0.216592 | -0.338154 | 0.725532 | 0.019968 |
2000-01-28 | -0.240490 | -0.227791 | -0.417933 | -0.773071 |
2000-01-31 | 0.239180 | -0.070095 | -0.129478 | -0.719103 |
2000-02-01 | -0.043066 | 0.217660 | -1.299125 | -0.063600 |
2000-02-02 | -0.901398 | -0.504534 | -0.101683 | 1.549783 |
2000-02-03 | -0.551984 | -0.107380 | -0.961869 | -0.298226 |
2000-02-04 | 1.110204 | 1.161737 | 1.403993 | 0.808260 |
2000-02-07 | -0.406732 | 0.726209 | -0.084555 | 0.834588 |
2000-02-08 | 0.507903 | -0.746195 | 1.181583 | -0.374171 |
2000-02-09 | 0.396453 | -0.923951 | 0.408617 | 2.155353 |
2000-02-10 | -1.018897 | 0.766517 | -1.644348 | -0.638495 |
2000-02-11 | -0.076245 | -0.629544 | 0.428748 | 0.358818 |
df2 = tm.makePeriodFrame()
df2
A | B | C | D | |
---|---|---|---|---|
2000-01-03 | 1.840484 | -1.226925 | 1.964919 | -0.188584 |
2000-01-04 | 1.499116 | 0.673563 | -0.323133 | -0.065926 |
2000-01-05 | -0.647275 | 0.493405 | 1.188180 | 0.352904 |
2000-01-06 | 1.427781 | 1.274658 | 0.368720 | -0.343280 |
2000-01-07 | -0.268886 | -0.678313 | -0.006957 | -1.067330 |
2000-01-10 | 1.080200 | -0.710526 | -0.094616 | -0.361054 |
2000-01-11 | 0.450176 | -1.085334 | -0.072673 | 0.620855 |
2000-01-12 | 0.917429 | -0.357414 | 1.440753 | 1.335020 |
2000-01-13 | 1.810843 | 0.188287 | -0.475263 | 0.208246 |
2000-01-14 | 0.482903 | -0.093183 | -0.724464 | -0.773552 |
2000-01-17 | 0.430824 | -0.486270 | -0.366017 | -0.878588 |
2000-01-18 | -0.511784 | 0.662841 | 0.852406 | -1.086859 |
2000-01-19 | -0.503979 | -2.281452 | 0.575089 | 1.524696 |
2000-01-20 | 0.228814 | -0.163978 | 0.722109 | 0.297597 |
2000-01-21 | 0.489627 | -1.223027 | 1.606544 | 0.135421 |
2000-01-24 | 1.529602 | 0.378340 | 1.173306 | 0.542911 |
2000-01-25 | -0.055705 | 1.850102 | 1.906428 | 0.043289 |
2000-01-26 | -0.229832 | -0.522804 | 1.206814 | 1.185826 |
2000-01-27 | 0.969602 | 1.483484 | -0.190419 | -1.035671 |
2000-01-28 | 0.444839 | -0.078305 | -0.958177 | 1.860602 |
2000-01-31 | 0.715301 | 0.964492 | -1.400700 | 0.717159 |
2000-02-01 | -0.459312 | -0.502704 | -1.368180 | -0.092712 |
2000-02-02 | 0.300175 | -1.450606 | 0.664581 | 0.445322 |
2000-02-03 | -0.099412 | 0.132136 | 1.396530 | -3.129603 |
2000-02-04 | -0.547018 | 0.802831 | 1.591881 | -0.216185 |
2000-02-07 | 0.168827 | -1.235460 | 0.399899 | 0.052753 |
2000-02-08 | -0.409770 | -0.937690 | -0.706249 | -0.728566 |
2000-02-09 | -0.734653 | 1.494331 | -1.752827 | 0.248729 |
2000-02-10 | 1.900928 | 0.048877 | -1.065192 | -0.398108 |
2000-02-11 | -0.326944 | 1.575319 | 1.136568 | 0.505951 |
df1.index
DatetimeIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11'], dtype='datetime64[ns]', freq='B')
df2.index
PeriodIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14', '2000-01-17', '2000-01-18', '2000-01-19', '2000-01-20', '2000-01-21', '2000-01-24', '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28', '2000-01-31', '2000-02-01', '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-07', '2000-02-08', '2000-02-09', '2000-02-10', '2000-02-11'], dtype='period[B]', freq='B')
df = pd.read_pickle('../../../data/stk_df_2015_2021.pkl')
df
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-27 | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-28 | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-29 | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-30 | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-31 | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 12 columns
display(df)
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-27 | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-28 | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-29 | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-30 | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-31 | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 12 columns
# 如果读取csv或其他类型的文件, 可能会遇到一些格式转换的问题
pd.read_csv('../../../data/idx_df_csv.csv')
ticker | secShortName | tradeDate | preCloseIndex | openIndex | highestIndex | lowestIndex | closeIndex | turnoverVol | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 上证综指 | 2021-01-04 | 3473.0693 | 3474.6793 | 3511.6554 | 3457.2061 | 3502.9584 | 38079080000 |
1 | 1 | 上证综指 | 2021-01-05 | 3502.9584 | 3492.1912 | 3528.6767 | 3484.7151 | 3528.6767 | 40799593400 |
2 | 1 | 上证综指 | 2021-01-06 | 3528.6767 | 3530.9072 | 3556.8022 | 3513.1262 | 3550.8767 | 37023092600 |
3 | 1 | 上证综指 | 2021-01-07 | 3550.8767 | 3552.9087 | 3576.2046 | 3526.6174 | 3576.2046 | 40534822600 |
4 | 1 | 上证综指 | 2021-01-08 | 3576.2046 | 3577.6923 | 3588.0625 | 3544.8912 | 3570.1082 | 34555789600 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1360 | 399106 | 深证综指 | 2022-02-15 | 2253.1310 | 2256.8670 | 2283.6760 | 2251.3000 | 2283.6300 | 37139160803 |
1361 | 399106 | 深证综指 | 2022-02-16 | 2283.6300 | 2293.0800 | 2304.2800 | 2285.6750 | 2296.9930 | 37530877510 |
1362 | 399106 | 深证综指 | 2022-02-17 | 2296.9930 | 2293.4480 | 2314.8270 | 2287.8960 | 2302.1620 | 42928561256 |
1363 | 399106 | 深证综指 | 2022-02-18 | 2302.1620 | 2287.2640 | 2311.8220 | 2284.1220 | 2311.7900 | 38837049683 |
1364 | 399106 | 深证综指 | 2022-02-21 | 2311.7900 | 2313.5060 | 2325.9670 | 2309.7340 | 2325.7960 | 44801214227 |
1365 rows × 9 columns
df.head()
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
df.tail()
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
5838745 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-27 | 0.625 | 0.638 | 488511 | 308154.0 | 226.0 | 0.0027 | 116472000.0 | 220917000.0 |
5838746 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-28 | 0.638 | 0.637 | 177702 | 111881.0 | 64.0 | 0.0010 | 116288000.0 | 220568000.0 |
5838747 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-29 | 0.637 | 0.630 | 123550 | 77333.0 | 58.0 | 0.0007 | 115000000.0 | 218125000.0 |
5838748 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-30 | 0.630 | 0.635 | 113600 | 71308.0 | 41.0 | 0.0006 | 115920000.0 | 219870000.0 |
5838749 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-31 | 0.635 | 0.636 | 167800 | 105996.0 | 74.0 | 0.0009 | 116104000.0 | 220219000.0 |
df.describe()
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
count | 5.838750e+06 | 5.838750e+06 | 5.838750e+06 | 5.838750e+06 | 5.820845e+06 | 5.838750e+06 | 5.838750e+06 | 5.838750e+06 |
mean | 1.368568e+02 | 1.368763e+02 | 5.525219e+06 | 2.008992e+08 | 1.078089e+04 | 2.986831e-02 | 1.324705e+10 | 1.835989e+10 |
std | 2.086103e+03 | 2.085805e+03 | 2.405725e+07 | 4.866879e+08 | 1.874182e+04 | 4.649860e-02 | 5.686519e+10 | 7.522905e+10 |
min | 3.800000e-02 | 3.800000e-02 | 2.000000e+00 | 1.800000e+01 | 1.000000e+00 | 0.000000e+00 | 3.800000e+06 | 1.045000e+07 |
25% | 2.167700e+01 | 2.168600e+01 | 8.454962e+05 | 2.916335e+07 | 2.565000e+03 | 6.900000e-03 | 2.203200e+09 | 3.527868e+09 |
50% | 3.972300e+01 | 3.973600e+01 | 1.864330e+06 | 7.414118e+07 | 5.410000e+03 | 1.510000e-02 | 4.304763e+09 | 6.153206e+09 |
75% | 7.390000e+01 | 7.392400e+01 | 4.317285e+06 | 1.915373e+08 | 1.179500e+04 | 3.380000e-02 | 9.130280e+09 | 1.286893e+10 |
max | 4.024782e+05 | 4.024782e+05 | 5.109897e+09 | 6.794128e+10 | 3.274456e+06 | 9.896000e-01 | 3.267370e+12 | 3.267370e+12 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5838750 entries, 0 to 5838749 Data columns (total 12 columns): # Column Dtype --- ------ ----- 0 secID object 1 secShortName object 2 exchangeCD object 3 tradeDate object 4 preClosePrice float64 5 closePrice float64 6 turnoverVol int64 7 turnoverValue float64 8 dealAmount float64 9 turnoverRate float64 10 negMarketValue float64 11 marketValue float64 dtypes: float64(7), int64(1), object(4) memory usage: 534.6+ MB
df['secShortName'].nunique() # 多少只股票
4853
df['turnoverValue'].mean()
200899200.96656865
df.select_dtypes(include=np.number).mean()
preClosePrice 1.368568e+02 closePrice 1.368763e+02 turnoverVol 5.525219e+06 turnoverValue 2.008992e+08 dealAmount 1.078089e+04 turnoverRate 2.986831e-02 negMarketValue 1.324705e+10 marketValue 1.835989e+10 dtype: float64
df.select_dtypes(include=np.number).mean(axis=1)
0 4.314767e+10 1 4.237068e+10 2 4.147094e+10 3 4.002552e+10 4 4.055811e+10 ... 5838745 4.227324e+07 5838746 4.214321e+07 5838747 4.166574e+07 5838748 4.199687e+07 5838749 4.207461e+07 Length: 5838750, dtype: float64
df_pingan = df[df['secShortName']=='平安银行'].copy()
df_pingan
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1700 | 000001.XSHE | 平安银行 | XSHE | 2021-12-27 | 2224.866 | 2213.298 | 881462 | 1.260455e+09 | 66911.0 | 0.0038 | 3.341671e+11 | 3.341699e+11 |
1701 | 000001.XSHE | 平安银行 | XSHE | 2021-12-28 | 2213.298 | 2206.872 | 1358314 | 1.934461e+09 | 89671.0 | 0.0058 | 3.331968e+11 | 3.331996e+11 |
1702 | 000001.XSHE | 平安银行 | XSHE | 2021-12-29 | 2206.872 | 2152.889 | 1771527 | 2.480535e+09 | 147243.0 | 0.0076 | 3.250464e+11 | 3.250491e+11 |
1703 | 000001.XSHE | 平安银行 | XSHE | 2021-12-30 | 2152.889 | 2161.886 | 960485 | 1.342374e+09 | 66792.0 | 0.0041 | 3.264048e+11 | 3.264075e+11 |
1704 | 000001.XSHE | 平安银行 | XSHE | 2021-12-31 | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
1705 rows × 12 columns
object
和str
有什么区别:
一些细节:numpy 中允许 object 和 str 两种字符串的格式。但pandas没有采用numpy的这种格式,而是用原生python的字符串格式。numpy的格式更为复杂。例:(来自https://stackoverflow.com/questions/34881079/pandas-distinction-between-str-and-object-types)
x = np.array(['Testing', 'a', 'string'], dtype='|S7') # 长度为7的字符串。|是byteorder指示符
y = np.array(['Testing', 'a', 'string'], dtype=object)
x[1] = 'a really really really long string'
x
array([b'Testing', b'a reall', b'string'], dtype='|S7')
y[1] = 'a really really really long string'
y
array(['Testing', 'a really really really long string', 'string'], dtype=object)
pandas中不允许用固定长度的str
temp = pd.DataFrame({'a':range(5)})
temp['a'].dtype
dtype('int64')
temp['a'].astype(object).dtype
dtype('O')
temp['a'].astype(str).dtype
dtype('O')
def unpivot(frame):
N, K = frame.shape
data = {
"value": frame.to_numpy().ravel("F"),
"variable": np.asarray(frame.columns).repeat(N),
"date": np.tile(np.asarray(frame.index), K),
}
return pd.DataFrame(data, columns=["date", "variable", "value"])
temp = tm.makeTimeDataFrame(3)
temp = unpivot(temp)
temp
date | variable | value | |
---|---|---|---|
0 | 2000-01-03 | A | 0.473516 |
1 | 2000-01-04 | A | 0.254679 |
2 | 2000-01-05 | A | -0.723395 |
3 | 2000-01-03 | B | -0.768497 |
4 | 2000-01-04 | B | -0.184978 |
5 | 2000-01-05 | B | 0.168523 |
6 | 2000-01-03 | C | -1.028204 |
7 | 2000-01-04 | C | 1.172930 |
8 | 2000-01-05 | C | 1.149807 |
9 | 2000-01-03 | D | -0.237852 |
10 | 2000-01-04 | D | 1.539805 |
11 | 2000-01-05 | D | 1.984937 |
temp.sort_values(by='value')
date | variable | value | |
---|---|---|---|
6 | 2000-01-03 | C | -1.028204 |
3 | 2000-01-03 | B | -0.768497 |
2 | 2000-01-05 | A | -0.723395 |
9 | 2000-01-03 | D | -0.237852 |
4 | 2000-01-04 | B | -0.184978 |
5 | 2000-01-05 | B | 0.168523 |
1 | 2000-01-04 | A | 0.254679 |
0 | 2000-01-03 | A | 0.473516 |
8 | 2000-01-05 | C | 1.149807 |
7 | 2000-01-04 | C | 1.172930 |
10 | 2000-01-04 | D | 1.539805 |
11 | 2000-01-05 | D | 1.984937 |
temp.duplicated('variable')
0 False 1 True 2 True 3 False 4 True 5 True 6 False 7 True 8 True 9 False 10 True 11 True dtype: bool
temp.drop_duplicates('variable')
date | variable | value | |
---|---|---|---|
0 | 2000-01-03 | A | 0.473516 |
3 | 2000-01-03 | B | -0.768497 |
6 | 2000-01-03 | C | -1.028204 |
9 | 2000-01-03 | D | -0.237852 |
df.drop_duplicates('variable',keep='last')
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-98-5840adb02707> in <module> ----> 1 df.drop_duplicates('variable',keep='last') ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/frame.py in drop_duplicates(self, subset, keep, inplace, ignore_index) 5269 inplace = validate_bool_kwarg(inplace, "inplace") 5270 ignore_index = validate_bool_kwarg(ignore_index, "ignore_index") -> 5271 duplicated = self.duplicated(subset, keep=keep) 5272 5273 result = self[-duplicated] ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/frame.py in duplicated(self, subset, keep) 5403 diff = Index(subset).difference(self.columns) 5404 if not diff.empty: -> 5405 raise KeyError(diff) 5406 5407 vals = (col.values for name, col in self.items() if name in subset) KeyError: Index(['variable'], dtype='object')
temp = tm.makeDataFrame()
temp['A'] = np.random.randint(0, 100, size=temp.shape[0])
temp.drop(['B','C','D'],axis=1, inplace=True)
temp.columns = ['score']
temp
score | |
---|---|
0cR0rl1CAD | 35 |
YbKjeeIWIu | 17 |
bLnqdDcXH1 | 18 |
MDiwJ7an05 | 71 |
w34x90xMGQ | 47 |
MeWCPkx0iF | 70 |
3QyJtCBsf3 | 63 |
C4Q3D0B3Yj | 17 |
QdwOptJcqh | 73 |
f0WFEFdWNI | 96 |
6bqE83b8MK | 94 |
fCpBnhKyNq | 69 |
lUti9xg4sf | 4 |
fy5jOsiTlD | 45 |
Dvwk3nu0tm | 4 |
383UZYrtOH | 42 |
TrskjhQpCr | 64 |
eeVNEMe2Fp | 53 |
HPomNZ1bH0 | 98 |
hfj12q0GUu | 26 |
4qge0pmzZR | 58 |
701bf2uTxG | 89 |
iTOr3WUGVf | 66 |
yrZCsWgVRA | 70 |
EgzC5Bu3cS | 29 |
1as0JZ80sJ | 1 |
ZaglxpXiTc | 94 |
qFmLtwvmeh | 72 |
Nf2VhMlS8U | 48 |
60VnKOk44h | 99 |
bins = [0, 25, 50, 75, 100]
pd.cut(temp['score'], bins=bins, labels=['D','C','B','A'])
0cR0rl1CAD C YbKjeeIWIu D bLnqdDcXH1 D MDiwJ7an05 B w34x90xMGQ C MeWCPkx0iF B 3QyJtCBsf3 B C4Q3D0B3Yj D QdwOptJcqh B f0WFEFdWNI A 6bqE83b8MK A fCpBnhKyNq B lUti9xg4sf D fy5jOsiTlD C Dvwk3nu0tm D 383UZYrtOH C TrskjhQpCr B eeVNEMe2Fp B HPomNZ1bH0 A hfj12q0GUu C 4qge0pmzZR B 701bf2uTxG A iTOr3WUGVf B yrZCsWgVRA B EgzC5Bu3cS C 1as0JZ80sJ D ZaglxpXiTc A qFmLtwvmeh B Nf2VhMlS8U C 60VnKOk44h A Name: score, dtype: category Categories (4, object): ['D' < 'C' < 'B' < 'A']
bins = [0, 25, 50, 75, 100]
pd.cut(temp['score'], bins=bins, labels=['D','C','B','A'],include_lowest=True)
0cR0rl1CAD C YbKjeeIWIu D bLnqdDcXH1 D MDiwJ7an05 B w34x90xMGQ C MeWCPkx0iF B 3QyJtCBsf3 B C4Q3D0B3Yj D QdwOptJcqh B f0WFEFdWNI A 6bqE83b8MK A fCpBnhKyNq B lUti9xg4sf D fy5jOsiTlD C Dvwk3nu0tm D 383UZYrtOH C TrskjhQpCr B eeVNEMe2Fp B HPomNZ1bH0 A hfj12q0GUu C 4qge0pmzZR B 701bf2uTxG A iTOr3WUGVf B yrZCsWgVRA B EgzC5Bu3cS C 1as0JZ80sJ D ZaglxpXiTc A qFmLtwvmeh B Nf2VhMlS8U C 60VnKOk44h A Name: score, dtype: category Categories (4, object): ['D' < 'C' < 'B' < 'A']
Useful resources: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
df_pingan['tradeDate'] = pd.to_datetime(df_pingan['tradeDate'], format='%Y-%m-%d')
df_pingan.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1705 entries, 0 to 1704 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 secID 1705 non-null object 1 secShortName 1705 non-null object 2 exchangeCD 1705 non-null object 3 tradeDate 1705 non-null datetime64[ns] 4 preClosePrice 1705 non-null float64 5 closePrice 1705 non-null float64 6 turnoverVol 1705 non-null int64 7 turnoverValue 1705 non-null float64 8 dealAmount 1705 non-null float64 9 turnoverRate 1705 non-null float64 10 negMarketValue 1705 non-null float64 11 marketValue 1705 non-null float64 dtypes: datetime64[ns](1), float64(7), int64(1), object(3) memory usage: 173.2+ KB
df_pingan.set_index('tradeDate', inplace=True)
df_pingan
secID | secShortName | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
tradeDate | |||||||||||
2015-01-05 | 000001.XSHE | 平安银行 | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | 平安银行 | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2015-01-07 | 000001.XSHE | 平安银行 | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
2015-01-08 | 000001.XSHE | 平安银行 | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
2015-01-09 | 000001.XSHE | 平安银行 | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-12-27 | 000001.XSHE | 平安银行 | XSHE | 2224.866 | 2213.298 | 881462 | 1.260455e+09 | 66911.0 | 0.0038 | 3.341671e+11 | 3.341699e+11 |
2021-12-28 | 000001.XSHE | 平安银行 | XSHE | 2213.298 | 2206.872 | 1358314 | 1.934461e+09 | 89671.0 | 0.0058 | 3.331968e+11 | 3.331996e+11 |
2021-12-29 | 000001.XSHE | 平安银行 | XSHE | 2206.872 | 2152.889 | 1771527 | 2.480535e+09 | 147243.0 | 0.0076 | 3.250464e+11 | 3.250491e+11 |
2021-12-30 | 000001.XSHE | 平安银行 | XSHE | 2152.889 | 2161.886 | 960485 | 1.342374e+09 | 66792.0 | 0.0041 | 3.264048e+11 | 3.264075e+11 |
2021-12-31 | 000001.XSHE | 平安银行 | XSHE | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
1705 rows × 11 columns
BM: Business Month
df_pingan.resample('BM').last()
secID | secShortName | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
tradeDate | |||||||||||
2015-01-30 | 000001.XSHE | 平安银行 | XSHE | 1134.678 | 1137.127 | 1614788 | 1.298736e+09 | 42061.0 | 0.0095 | 1.370254e+11 | 1.591488e+11 |
2015-02-27 | 000001.XSHE | 平安银行 | XSHE | 1148.556 | 1142.025 | 1377727 | 1.115146e+09 | 36250.0 | 0.0081 | 1.376156e+11 | 1.598343e+11 |
2015-03-31 | 000001.XSHE | 平安银行 | XSHE | 1278.350 | 1285.697 | 4745114 | 4.393135e+09 | 114032.0 | 0.0278 | 1.549282e+11 | 1.799421e+11 |
2015-04-30 | 000001.XSHE | 平安银行 | XSHE | 1645.458 | 1650.399 | 4182058 | 4.899380e+09 | 120845.0 | 0.0245 | 1.971277e+11 | 2.289549e+11 |
2015-05-29 | 000001.XSHE | 平安银行 | XSHE | 1531.808 | 1514.019 | 2915126 | 3.103544e+09 | 99260.0 | 0.0171 | 1.808381e+11 | 2.192089e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-08-31 | 000001.XSHE | 平安银行 | XSHE | 2277.563 | 2287.846 | 1416461 | 2.072007e+09 | 119791.0 | 0.0061 | 3.454224e+11 | 3.454253e+11 |
2021-09-30 | 000001.XSHE | 平安银行 | XSHE | 2332.832 | 2304.555 | 959216 | 1.424676e+09 | 70140.0 | 0.0041 | 3.479452e+11 | 3.479481e+11 |
2021-10-29 | 000001.XSHE | 平安银行 | XSHE | 2534.625 | 2506.348 | 650958 | 1.052760e+09 | 76331.0 | 0.0028 | 3.784122e+11 | 3.784154e+11 |
2021-11-30 | 000001.XSHE | 平安银行 | XSHE | 2250.572 | 2241.575 | 884472 | 1.280385e+09 | 65260.0 | 0.0038 | 3.384364e+11 | 3.384392e+11 |
2021-12-31 | 000001.XSHE | 平安银行 | XSHE | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
84 rows × 11 columns
df_pingan.resample('M').mean()
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
tradeDate | ||||||||
2015-01-31 | 1204.922300 | 1197.126450 | 2.685951e+06 | 2.293232e+09 | 64189.250000 | 0.015725 | 1.442554e+11 | 1.675461e+11 |
2015-02-28 | 1127.222600 | 1127.549133 | 1.506542e+06 | 1.202904e+09 | 37079.066667 | 0.008827 | 1.358712e+11 | 1.578083e+11 |
2015-03-31 | 1193.861227 | 1200.391773 | 2.966580e+06 | 2.552488e+09 | 75256.318182 | 0.017386 | 1.446489e+11 | 1.680031e+11 |
2015-04-30 | 1543.791762 | 1561.158524 | 4.287529e+06 | 4.712147e+09 | 138826.190476 | 0.025114 | 1.869631e+11 | 2.171492e+11 |
2015-05-31 | 1577.762100 | 1570.943100 | 2.897439e+06 | 3.198760e+09 | 96879.850000 | 0.016965 | 1.876373e+11 | 2.213028e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-08-31 | 2440.388364 | 2441.031045 | 1.143970e+06 | 1.800585e+09 | 96686.318182 | 0.004891 | 3.685506e+11 | 3.685537e+11 |
2021-09-30 | 2390.798950 | 2391.634400 | 1.659547e+06 | 2.550127e+09 | 126351.950000 | 0.007095 | 3.610926e+11 | 3.610956e+11 |
2021-10-31 | 2504.098750 | 2516.710813 | 1.081450e+06 | 1.752654e+09 | 97972.312500 | 0.004625 | 3.799768e+11 | 3.799800e+11 |
2021-11-30 | 2318.985409 | 2306.950273 | 1.188170e+06 | 1.768122e+09 | 94558.681818 | 0.005073 | 3.483068e+11 | 3.483098e+11 |
2021-12-31 | 2270.019130 | 2264.654348 | 1.342157e+06 | 1.968544e+09 | 96840.304348 | 0.005735 | 3.419210e+11 | 3.419238e+11 |
84 rows × 8 columns
df_pingan.loc['2015']
secID | secShortName | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
tradeDate | |||||||||||
2015-01-05 | 000001.XSHE | 平安银行 | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | 平安银行 | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2015-01-07 | 000001.XSHE | 平安银行 | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
2015-01-08 | 000001.XSHE | 平安银行 | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
2015-01-09 | 000001.XSHE | 平安银行 | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2015-12-25 | 000001.XSHE | 平安银行 | XSHE | 1219.517 | 1226.435 | 578480 | 4.956381e+08 | 19587.0 | 0.0034 | 1.464883e+11 | 1.775707e+11 |
2015-12-28 | 000001.XSHE | 平安银行 | XSHE | 1226.435 | 1183.939 | 1189828 | 1.003010e+09 | 33043.0 | 0.0070 | 1.414126e+11 | 1.714179e+11 |
2015-12-29 | 000001.XSHE | 平安银行 | XSHE | 1183.939 | 1194.810 | 896705 | 7.447184e+08 | 20170.0 | 0.0053 | 1.427110e+11 | 1.729919e+11 |
2015-12-30 | 000001.XSHE | 平安银行 | XSHE | 1194.810 | 1195.798 | 770642 | 6.412773e+08 | 18064.0 | 0.0045 | 1.428291e+11 | 1.731350e+11 |
2015-12-31 | 000001.XSHE | 平安银行 | XSHE | 1195.798 | 1184.928 | 710734 | 5.916436e+08 | 14765.0 | 0.0042 | 1.415306e+11 | 1.715610e+11 |
244 rows × 11 columns
df_pingan.loc['2015-01':'2018-03']
secID | secShortName | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
tradeDate | |||||||||||
2015-01-05 | 000001.XSHE | 平安银行 | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | 平安银行 | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2015-01-07 | 000001.XSHE | 平安银行 | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
2015-01-08 | 000001.XSHE | 平安银行 | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
2015-01-09 | 000001.XSHE | 平安银行 | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2018-03-26 | 000001.XSHE | 平安银行 | XSHE | 1384.775 | 1334.708 | 1668113 | 1.519435e+09 | 63646.0 | 0.0082 | 1.849136e+11 | 1.876726e+11 |
2018-03-27 | 000001.XSHE | 平安银行 | XSHE | 1334.708 | 1335.929 | 1330940 | 1.213990e+09 | 49311.0 | 0.0065 | 1.850827e+11 | 1.878443e+11 |
2018-03-28 | 000001.XSHE | 平安银行 | XSHE | 1335.929 | 1329.824 | 1325020 | 1.201023e+09 | 45201.0 | 0.0065 | 1.842368e+11 | 1.869858e+11 |
2018-03-29 | 000001.XSHE | 平安银行 | XSHE | 1329.824 | 1349.362 | 1604219 | 1.445282e+09 | 61870.0 | 0.0079 | 1.869437e+11 | 1.897330e+11 |
2018-03-30 | 000001.XSHE | 平安银行 | XSHE | 1349.362 | 1331.045 | 906846 | 8.234382e+08 | 31869.0 | 0.0044 | 1.844060e+11 | 1.871575e+11 |
791 rows × 11 columns
注意:pandas中的apply操作有时会很慢(包括groupby apply)
df.select_dtypes(np.number).apply(lambda x: x**2+3)
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
0 | 1.671966e+06 | 1.710179e+06 | 24661553281603 | 2.084277e+19 | 8.552180e+09 | 3.000847 | 2.483276e+22 | 3.349881e+22 |
1 | 1.710179e+06 | 1.659323e+06 | 14146264367107 | 1.192629e+19 | 6.452106e+09 | 3.000484 | 2.409428e+22 | 3.250262e+22 |
2 | 1.659323e+06 | 1.596829e+06 | 8711948463204 | 6.942152e+18 | 5.284854e+09 | 3.000299 | 2.318685e+22 | 3.127853e+22 |
3 | 1.596829e+06 | 1.491352e+06 | 5972896490404 | 4.528399e+18 | 4.724363e+09 | 3.000204 | 2.165525e+22 | 2.921242e+22 |
4 | 1.491352e+06 | 1.515374e+06 | 18966364691524 | 1.471013e+19 | 9.976414e+09 | 3.000650 | 2.200405e+22 | 2.968295e+22 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 3.390625e+00 | 3.407044e+00 | 238642997124 | 9.495889e+10 | 5.107900e+04 | 3.000007 | 1.356573e+16 | 4.880432e+16 |
5838746 | 3.407044e+00 | 3.405769e+00 | 31578000807 | 1.251736e+10 | 4.099000e+03 | 3.000001 | 1.352290e+16 | 4.865024e+16 |
5838747 | 3.405769e+00 | 3.396900e+00 | 15264602503 | 5.980393e+09 | 3.367000e+03 | 3.000000 | 1.322500e+16 | 4.757852e+16 |
5838748 | 3.396900e+00 | 3.403225e+00 | 12904960003 | 5.084831e+09 | 1.684000e+03 | 3.000000 | 1.343745e+16 | 4.834282e+16 |
5838749 | 3.403225e+00 | 3.404496e+00 | 28156840003 | 1.123515e+10 | 5.479000e+03 | 3.000001 | 1.348014e+16 | 4.849641e+16 |
5838750 rows × 8 columns
df.groupby('secShortName')
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fc71d4d3df0>
df.groupby('secShortName').last()
secID | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | |||||||||||
*ST万方 | 000638.XSHE | XSHE | 2021-12-31 | 21.428 | 21.560 | 550679 | 1.185508e+07 | 794.0 | 0.0079 | 1.509872e+09 | 1.509872e+09 |
*ST上普 | 600680.XSHG | XSHG | 2018-04-27 | 28.879 | 30.133 | 1272828 | 3.498398e+07 | 2843.0 | 0.0180 | 1.979601e+09 | 2.939313e+09 |
*ST东海A | 000613.XSHE | XSHE | 2021-12-31 | 5.678 | 5.836 | 3854756 | 2.219116e+07 | 1384.0 | 0.0207 | 1.088870e+09 | 1.485528e+09 |
*ST东海B | 200613.XSHE | XSHE | 2021-12-31 | 1.757 | 1.757 | 33909 | 6.009500e+04 | 13.0 | 0.0004 | 1.408000e+08 | 5.825600e+08 |
*ST东电 | 000585.XSHE | XSHE | 2021-12-31 | 2.617 | 2.617 | 5993090 | 1.543082e+07 | 1268.0 | 0.0123 | 1.249313e+09 | 1.790408e+09 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
龙竹科技 | 831445.XBEI | XBEI | 2021-12-31 | 20.824 | 21.098 | 456508 | 8.807597e+06 | NaN | 0.0068 | 1.297350e+09 | 2.217318e+09 |
龙腾光电 | 688055.XSHG | XSHG | 2021-12-31 | 6.808 | 6.858 | 4664701 | 3.186770e+07 | 3830.0 | 0.0026 | 1.223867e+10 | 2.283333e+10 |
龙蟠科技 | 603906.XSHG | XSHG | 2021-12-31 | 77.083 | 77.208 | 3700591 | 2.766969e+08 | 20421.0 | 0.0155 | 1.785667e+10 | 1.785667e+10 |
龙软科技 | 688078.XSHG | XSHG | 2021-12-31 | 62.467 | 59.057 | 963837 | 5.722752e+07 | 2401.0 | 0.0339 | 1.657041e+09 | 4.117650e+09 |
龙高股份 | 605086.XSHG | XSHG | 2021-12-31 | 23.830 | 23.620 | 833004 | 1.976806e+07 | 1403.0 | 0.0260 | 7.558400e+08 | 3.023360e+09 |
4853 rows × 11 columns
df.groupby('secShortName').first()
secID | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | |||||||||||
*ST万方 | 000638.XSHE | XSHE | 2015-01-05 | 28.364 | 27.304 | 3103725 | 8.408389e+07 | 4279.0 | 0.0443 | 1.912092e+09 | 1.912092e+09 |
*ST上普 | 600680.XSHG | XSHG | 2015-01-05 | 57.366 | 57.562 | 483414 | 2.566049e+07 | 1787.0 | 0.0069 | 3.781578e+09 | 5.614890e+09 |
*ST东海A | 000613.XSHE | XSHE | 2015-01-05 | 9.769 | 9.783 | 2242078 | 2.191941e+07 | 1159.0 | 0.0123 | 1.782263e+09 | 2.490444e+09 |
*ST东海B | 200613.XSHE | XSHE | 2015-01-05 | 6.642 | 6.576 | 164636 | 1.085171e+06 | 50.0 | 0.0021 | 5.271200e+08 | 2.180959e+09 |
*ST东电 | 000585.XSHE | XSHE | 2015-01-05 | 5.183 | 5.272 | 9358955 | 4.800403e+07 | 4519.0 | 0.0192 | 2.516909e+09 | 3.607018e+09 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
龙竹科技 | 831445.XBEI | XBEI | 2020-07-27 | 9.180 | 7.840 | 4804739 | 4.053933e+07 | NaN | 0.0730 | 5.157141e+08 | 8.976079e+08 |
龙腾光电 | 688055.XSHG | XSHG | 2020-08-17 | 1.220 | 9.850 | 215585877 | 1.892487e+09 | 211520.0 | 0.8003 | 2.653273e+09 | 3.283333e+10 |
龙蟠科技 | 603906.XSHG | XSHG | 2017-04-10 | 9.520 | 13.710 | 11500 | 1.576650e+05 | 15.0 | 0.0002 | 7.129200e+08 | 2.851680e+09 |
龙软科技 | 688078.XSHG | XSHG | 2019-12-30 | 21.590 | 48.900 | 12454266 | 6.069590e+08 | 28417.0 | 0.7736 | 7.872460e+08 | 3.459675e+09 |
龙高股份 | 605086.XSHG | XSHG | 2021-04-16 | 12.860 | 18.520 | 226150 | 4.186135e+06 | 345.0 | 0.0071 | 5.926400e+08 | 2.370560e+09 |
4853 rows × 11 columns
df.groupby('secShortName').head(3)
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
1705 | 000002.XSHE | 万科A | XSHE | 2015-01-05 | 1567.420 | 1681.312 | 11835160 | 9.700712e+09 | 185938.0 | 0.0676 | 1.447266e+11 | 1.645692e+11 |
1706 | 000002.XSHE | 万科A | XSHE | 2015-01-06 | 1681.312 | 1619.292 | 6036510 | 4.839616e+09 | 123274.0 | 0.0345 | 1.393879e+11 | 1.584986e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5835695 | 900956.XSHG | 东贝B股 | XSHG | 2015-01-06 | 1.159 | 1.158 | 438800 | 4.708660e+05 | 265.0 | 0.0038 | 1.242000e+08 | 2.538000e+08 |
5835696 | 900956.XSHG | 东贝B股 | XSHG | 2015-01-07 | 1.158 | 1.159 | 429200 | 4.600200e+05 | 236.0 | 0.0037 | 1.243150e+08 | 2.540350e+08 |
5837045 | 900957.XSHG | 凌云B股 | XSHG | 2015-01-05 | 0.620 | 0.628 | 615555 | 3.823390e+05 | 268.0 | 0.0033 | 1.146320e+08 | 2.174270e+08 |
5837046 | 900957.XSHG | 凌云B股 | XSHG | 2015-01-06 | 0.628 | 0.623 | 459481 | 2.838860e+05 | 194.0 | 0.0025 | 1.137120e+08 | 2.156820e+08 |
5837047 | 900957.XSHG | 凌云B股 | XSHG | 2015-01-07 | 0.623 | 0.631 | 703095 | 4.412970e+05 | 324.0 | 0.0038 | 1.151840e+08 | 2.184740e+08 |
14554 rows × 12 columns
df.groupby('secShortName').nth(-2)
secID | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | |||||||||||
*ST万方 | 000638.XSHE | XSHE | 2021-12-30 | 21.163 | 21.428 | 793779 | 1.689394e+07 | 1009.0 | 0.0113 | 1.500590e+09 | 1.500590e+09 |
*ST上普 | 600680.XSHG | XSHG | 2018-04-26 | 28.800 | 28.879 | 867415 | 2.348474e+07 | 1907.0 | 0.0123 | 1.897224e+09 | 2.817000e+09 |
*ST东海A | 000613.XSHE | XSHE | 2021-12-30 | 5.521 | 5.678 | 2735594 | 1.541656e+07 | 1158.0 | 0.0147 | 1.059513e+09 | 1.445477e+09 |
*ST东海B | 200613.XSHE | XSHE | 2021-12-30 | 1.757 | 1.757 | 261068 | 4.593800e+05 | 10.0 | 0.0033 | 1.408000e+08 | 5.825600e+08 |
*ST东电 | 000585.XSHE | XSHE | 2021-12-30 | 2.604 | 2.617 | 2416167 | 6.230545e+06 | 678.0 | 0.0050 | 1.249313e+09 | 1.790408e+09 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
龙竹科技 | 831445.XBEI | XBEI | 2021-12-30 | 20.167 | 20.824 | 561059 | 1.063724e+07 | NaN | 0.0083 | 1.280519e+09 | 2.188552e+09 |
龙腾光电 | 688055.XSHG | XSHG | 2021-12-30 | 6.768 | 6.808 | 4752067 | 3.232808e+07 | 3395.0 | 0.0027 | 1.214933e+10 | 2.266667e+10 |
龙蟠科技 | 603906.XSHG | XSHG | 2021-12-30 | 79.167 | 77.083 | 5338171 | 4.007450e+08 | 31920.0 | 0.0223 | 1.782774e+10 | 1.782774e+10 |
龙软科技 | 688078.XSHG | XSHG | 2021-12-30 | 61.980 | 62.467 | 387495 | 2.392429e+07 | 1454.0 | 0.0136 | 1.752706e+09 | 4.355370e+09 |
龙高股份 | 605086.XSHG | XSHG | 2021-12-30 | 23.510 | 23.830 | 1185853 | 2.812384e+07 | 2482.0 | 0.0371 | 7.625600e+08 | 3.050240e+09 |
4852 rows × 11 columns
df.groupby('secShortName').mean()
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
secShortName | ||||||||
*ST万方 | 39.618968 | 39.614163 | 2.316634e+06 | 1.102542e+08 | 7475.640537 | 0.033094 | 2.774162e+09 | 2.774162e+09 |
*ST上普 | 106.362365 | 106.328535 | 1.877760e+06 | 2.342776e+08 | 10843.311801 | 0.026628 | 6.985375e+09 | 1.037189e+10 |
*ST东海A | 12.435056 | 12.432537 | 7.520607e+06 | 9.535778e+07 | 6657.424087 | 0.040626 | 2.296483e+09 | 3.164773e+09 |
*ST东海B | 4.751169 | 4.748037 | 2.556820e+05 | 1.502798e+06 | 117.267949 | 0.003196 | 3.805944e+08 | 1.574709e+09 |
*ST东电 | 4.580043 | 4.578312 | 1.199534e+07 | 8.335420e+07 | 6491.688259 | 0.024605 | 2.185586e+09 | 3.132240e+09 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
龙竹科技 | 14.015801 | 14.049755 | 3.165277e+05 | 4.924385e+06 | NaN | 0.004721 | 8.849954e+08 | 1.512647e+09 |
龙腾光电 | 7.765244 | 7.782024 | 1.878519e+07 | 1.680616e+08 | 9361.264881 | 0.063032 | 5.000334e+09 | 2.593125e+10 |
龙蟠科技 | 29.892603 | 29.951208 | 4.275701e+06 | 1.522334e+08 | 10397.070996 | 0.043078 | 5.225983e+09 | 6.620047e+09 |
龙软科技 | 43.000576 | 43.077352 | 1.276160e+06 | 5.953778e+07 | 2374.625000 | 0.063196 | 9.502590e+08 | 3.024019e+09 |
龙高股份 | 25.374743 | 25.436229 | 3.622853e+06 | 1.005611e+08 | 7862.857143 | 0.113213 | 8.139593e+08 | 3.255837e+09 |
4853 rows × 8 columns
df
secID | secShortName | exchangeCD | tradeDate | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000001.XSHE | 平安银行 | XSHE | 2015-01-05 | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 000001.XSHE | 平安银行 | XSHE | 2015-01-06 | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 000001.XSHE | 平安银行 | XSHE | 2015-01-07 | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 000001.XSHE | 平安银行 | XSHE | 2015-01-08 | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 000001.XSHE | 平安银行 | XSHE | 2015-01-09 | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-27 | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-28 | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-29 | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-30 | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 900957.XSHG | 凌云B股 | XSHG | 2021-12-31 | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 12 columns
df['tradeDate'] = pd.to_datetime(df['tradeDate'], format='%Y-%m-%d')
df.set_index(['secShortName','tradeDate'])
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | ||
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | tradeDate | ||||||||||
平安银行 | 2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 | |
2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 | |
2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 | |
2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
凌云B股 | 2021-12-27 | 900957.XSHG | XSHG | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
2021-12-28 | 900957.XSHG | XSHG | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 | |
2021-12-29 | 900957.XSHG | XSHG | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 | |
2021-12-30 | 900957.XSHG | XSHG | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 | |
2021-12-31 | 900957.XSHG | XSHG | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 10 columns
df.set_index(['secShortName','tradeDate'],inplace=True)
df.groupby(['secShortName','tradeDate']).last()
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | ||
---|---|---|---|---|---|---|---|---|---|---|---|
secShortName | tradeDate | ||||||||||
*ST万方 | 2015-01-05 | 000638.XSHE | XSHE | 28.364 | 27.304 | 3103725 | 84083888.60 | 4279.0 | 0.0443 | 1.912092e+09 | 1.912092e+09 |
2015-01-06 | 000638.XSHE | XSHE | 27.304 | 27.039 | 1926608 | 51665933.96 | 3060.0 | 0.0275 | 1.893528e+09 | 1.893528e+09 | |
2015-01-07 | 000638.XSHE | XSHE | 27.039 | 26.995 | 1295798 | 35013368.40 | 1919.0 | 0.0185 | 1.890434e+09 | 1.890434e+09 | |
2015-01-08 | 000638.XSHE | XSHE | 26.995 | 27.702 | 2224507 | 61274527.17 | 3560.0 | 0.0318 | 1.939938e+09 | 1.939938e+09 | |
2015-01-09 | 000638.XSHE | XSHE | 27.702 | 27.348 | 1053730 | 29061439.79 | 1860.0 | 0.0151 | 1.915186e+09 | 1.915186e+09 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
龙高股份 | 2021-12-27 | 605086.XSHG | XSHG | 23.320 | 23.340 | 711496 | 16590677.00 | 1586.0 | 0.0222 | 7.468800e+08 | 2.987520e+09 |
2021-12-28 | 605086.XSHG | XSHG | 23.340 | 23.540 | 819496 | 19232165.00 | 1664.0 | 0.0256 | 7.532800e+08 | 3.013120e+09 | |
2021-12-29 | 605086.XSHG | XSHG | 23.540 | 23.510 | 656443 | 15410019.00 | 1646.0 | 0.0205 | 7.523200e+08 | 3.009280e+09 | |
2021-12-30 | 605086.XSHG | XSHG | 23.510 | 23.830 | 1185853 | 28123837.00 | 2482.0 | 0.0371 | 7.625600e+08 | 3.050240e+09 | |
2021-12-31 | 605086.XSHG | XSHG | 23.830 | 23.620 | 833004 | 19768062.00 | 1403.0 | 0.0260 | 7.558400e+08 | 3.023360e+09 |
5838680 rows × 10 columns
df.loc[('平安银行','2021')] # a tuple is interpreted as one multi-level key
<ipython-input-125-640a6717305b>:1: PerformanceWarning: indexing past lexsort depth may impact performance. df.loc[('平安银行','2021')] # a tuple is interpreted as one multi-level key /Users/Bert/opt/anaconda3/lib/python3.9/site-packages/pandas/core/generic.py:3724: FutureWarning: Indexing a DataFrame with a datetimelike index using a single string to slice the rows, like `frame[string]`, is deprecated and will be removed in a future version. Use `frame.loc[string]` instead. return self[key]
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|
tradeDate | ||||||||||
2021-01-04 | 000001.XSHE | XSHE | 2466.388 | 2372.018 | 1873816 | 2.891682e+09 | 119328.0 | 0.0080 | 3.609470e+11 | 3.609501e+11 |
2021-01-05 | 000001.XSHE | XSHE | 2372.018 | 2317.181 | 2195884 | 3.284607e+09 | 187364.0 | 0.0094 | 3.526025e+11 | 3.526055e+11 |
2021-01-06 | 000001.XSHE | XSHE | 2317.181 | 2494.444 | 2332835 | 3.648522e+09 | 154714.0 | 0.0100 | 3.795765e+11 | 3.795798e+11 |
2021-01-07 | 000001.XSHE | XSHE | 2494.444 | 2537.804 | 1909948 | 3.111275e+09 | 123304.0 | 0.0082 | 3.861745e+11 | 3.861778e+11 |
2021-01-08 | 000001.XSHE | XSHE | 2537.804 | 2531.427 | 1441303 | 2.348316e+09 | 95828.0 | 0.0062 | 3.852042e+11 | 3.852075e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-12-27 | 000001.XSHE | XSHE | 2224.866 | 2213.298 | 881462 | 1.260455e+09 | 66911.0 | 0.0038 | 3.341671e+11 | 3.341699e+11 |
2021-12-28 | 000001.XSHE | XSHE | 2213.298 | 2206.872 | 1358314 | 1.934461e+09 | 89671.0 | 0.0058 | 3.331968e+11 | 3.331996e+11 |
2021-12-29 | 000001.XSHE | XSHE | 2206.872 | 2152.889 | 1771527 | 2.480535e+09 | 147243.0 | 0.0076 | 3.250464e+11 | 3.250491e+11 |
2021-12-30 | 000001.XSHE | XSHE | 2152.889 | 2161.886 | 960485 | 1.342374e+09 | 66792.0 | 0.0041 | 3.264048e+11 | 3.264075e+11 |
2021-12-31 | 000001.XSHE | XSHE | 2161.886 | 2118.185 | 2110777 | 2.899617e+09 | 152423.0 | 0.0090 | 3.198068e+11 | 3.198095e+11 |
243 rows × 10 columns
df.loc[['平安银行','2021']]# a list is used to specify several keys [on the same level]
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-126-6b97d705d991> in <module> ----> 1 df.loc[['平安银行','2021']]# a list is used to specify several keys [on the same level] ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in __getitem__(self, key) 893 894 maybe_callable = com.apply_if_callable(key, self.obj) --> 895 return self._getitem_axis(maybe_callable, axis=axis) 896 897 def _is_scalar_access(self, key: Tuple): ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis) 1111 raise ValueError("Cannot index with multidimensional key") 1112 -> 1113 return self._getitem_iterable(key, axis=axis) 1114 1115 # nested tuple slicing ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_iterable(self, key, axis) 1051 1052 # A collection of keys -> 1053 keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False) 1054 return self.obj._reindex_with_indexers( 1055 {axis: [keyarr, indexer]}, copy=True, allow_dups=True ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis, raise_missing) 1252 # Have the index compute an indexer or return None 1253 # if it cannot handle: -> 1254 indexer, keyarr = ax._convert_listlike_indexer(key) 1255 # We only act on all found values: 1256 if indexer is not None and (indexer != -1).all(): ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/multi.py in _convert_listlike_indexer(self, keyarr) 2566 mask = check == -1 2567 if mask.any(): -> 2568 raise KeyError(f"{keyarr[mask]} not in index") 2569 2570 return indexer, keyarr KeyError: "['2021'] not in index"
df.loc[('平安银行','2015':'2016')]
File "<ipython-input-127-ecaebecf1e5f>", line 1 df.loc[('平安银行','2015':'2016')] ^ SyntaxError: invalid syntax
idx = pd.IndexSlice
df.loc[idx['平安银行','2015']]
<ipython-input-129-ac95cdb9f109>:1: PerformanceWarning: indexing past lexsort depth may impact performance. df.loc[idx['平安银行','2015']] /Users/Bert/opt/anaconda3/lib/python3.9/site-packages/pandas/core/generic.py:3724: FutureWarning: Indexing a DataFrame with a datetimelike index using a single string to slice the rows, like `frame[string]`, is deprecated and will be removed in a future version. Use `frame.loc[string]` instead. return self[key]
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|
tradeDate | ||||||||||
2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2015-12-25 | 000001.XSHE | XSHE | 1219.517 | 1226.435 | 578480 | 4.956381e+08 | 19587.0 | 0.0034 | 1.464883e+11 | 1.775707e+11 |
2015-12-28 | 000001.XSHE | XSHE | 1226.435 | 1183.939 | 1189828 | 1.003010e+09 | 33043.0 | 0.0070 | 1.414126e+11 | 1.714179e+11 |
2015-12-29 | 000001.XSHE | XSHE | 1183.939 | 1194.810 | 896705 | 7.447184e+08 | 20170.0 | 0.0053 | 1.427110e+11 | 1.729919e+11 |
2015-12-30 | 000001.XSHE | XSHE | 1194.810 | 1195.798 | 770642 | 6.412773e+08 | 18064.0 | 0.0045 | 1.428291e+11 | 1.731350e+11 |
2015-12-31 | 000001.XSHE | XSHE | 1195.798 | 1184.928 | 710734 | 5.916436e+08 | 14765.0 | 0.0042 | 1.415306e+11 | 1.715610e+11 |
244 rows × 10 columns
df.loc[pd.IndexSlice['平安银行','2015-01']]
<ipython-input-130-e8c90482e808>:1: PerformanceWarning: indexing past lexsort depth may impact performance. df.loc[pd.IndexSlice['平安银行','2015-01']] /Users/Bert/opt/anaconda3/lib/python3.9/site-packages/pandas/core/generic.py:3724: FutureWarning: Indexing a DataFrame with a datetimelike index using a single string to slice the rows, like `frame[string]`, is deprecated and will be removed in a future version. Use `frame.loc[string]` instead. return self[key]
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|
tradeDate | ||||||||||
2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
2015-01-12 | 000001.XSHE | XSHE | 1231.004 | 1205.698 | 2696688 | 2.293105e+09 | 64376.0 | 0.0158 | 1.452882e+11 | 1.687457e+11 |
2015-01-13 | 000001.XSHE | XSHE | 1205.698 | 1198.351 | 1418187 | 1.204987e+09 | 34924.0 | 0.0083 | 1.444029e+11 | 1.677175e+11 |
2015-01-14 | 000001.XSHE | XSHE | 1198.351 | 1208.963 | 2192762 | 1.889297e+09 | 50675.0 | 0.0128 | 1.456817e+11 | 1.692027e+11 |
2015-01-15 | 000001.XSHE | XSHE | 1208.963 | 1253.044 | 2156548 | 1.868796e+09 | 54470.0 | 0.0126 | 1.509935e+11 | 1.753721e+11 |
2015-01-16 | 000001.XSHE | XSHE | 1253.044 | 1254.677 | 2701125 | 2.403346e+09 | 60637.0 | 0.0158 | 1.511903e+11 | 1.756006e+11 |
2015-01-19 | 000001.XSHE | XSHE | 1254.677 | 1128.964 | 3710288 | 3.016203e+09 | 103111.0 | 0.0217 | 1.360417e+11 | 1.580063e+11 |
2015-01-20 | 000001.XSHE | XSHE | 1128.964 | 1128.964 | 2588576 | 2.064281e+09 | 65826.0 | 0.0152 | 1.360417e+11 | 1.580063e+11 |
2015-01-21 | 000001.XSHE | XSHE | 1128.964 | 1177.127 | 3368980 | 2.758193e+09 | 78709.0 | 0.0197 | 1.418454e+11 | 1.647470e+11 |
2015-01-22 | 000001.XSHE | XSHE | 1177.127 | 1167.331 | 2178850 | 1.801436e+09 | 53599.0 | 0.0128 | 1.406650e+11 | 1.633760e+11 |
2015-01-23 | 000001.XSHE | XSHE | 1167.331 | 1175.494 | 2533304 | 2.108747e+09 | 53672.0 | 0.0148 | 1.416487e+11 | 1.645185e+11 |
2015-01-26 | 000001.XSHE | XSHE | 1175.494 | 1170.596 | 1836123 | 1.508447e+09 | 47025.0 | 0.0108 | 1.410585e+11 | 1.638330e+11 |
2015-01-27 | 000001.XSHE | XSHE | 1170.596 | 1142.025 | 2325514 | 1.881059e+09 | 61710.0 | 0.0136 | 1.376156e+11 | 1.598343e+11 |
2015-01-28 | 000001.XSHE | XSHE | 1142.025 | 1147.739 | 2154303 | 1.742176e+09 | 55381.0 | 0.0126 | 1.383042e+11 | 1.606340e+11 |
2015-01-29 | 000001.XSHE | XSHE | 1147.739 | 1134.678 | 1765198 | 1.408825e+09 | 43493.0 | 0.0103 | 1.367303e+11 | 1.588060e+11 |
2015-01-30 | 000001.XSHE | XSHE | 1134.678 | 1137.127 | 1614788 | 1.298736e+09 | 42061.0 | 0.0095 | 1.370254e+11 | 1.591488e+11 |
df.loc[idx['平安银行','2015':'2016']] # multiindex is difficult involving time
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance) 3079 try: -> 3080 return self._engine.get_loc(casted_key) 3081 except KeyError as err: pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc() pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc() pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item() pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item() KeyError: '2015' The above exception was the direct cause of the following exception: KeyError Traceback (most recent call last) <ipython-input-131-6885d4316869> in <module> ----> 1 df.loc[idx['平安银行','2015':'2016']] # multiindex is difficult involving time ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in __getitem__(self, key) 887 # AttributeError for IntervalTree get_value 888 return self.obj._get_value(*key, takeable=self._takeable) --> 889 return self._getitem_tuple(key) 890 else: 891 # we by definition only have the 0th axis ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_tuple(self, tup) 1058 def _getitem_tuple(self, tup: Tuple): 1059 with suppress(IndexingError): -> 1060 return self._getitem_lowerdim(tup) 1061 1062 # no multi-index, so validate all of the indexers ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_lowerdim(self, tup) 789 # we may have a nested tuples indexer here 790 if self._is_nested_tuple_indexer(tup): --> 791 return self._getitem_nested_tuple(tup) 792 793 # we maybe be using a tuple to represent multiple dimensions here ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_nested_tuple(self, tup) 863 864 current_ndim = obj.ndim --> 865 obj = getattr(obj, self.name)._getitem_axis(key, axis=axis) 866 axis += 1 867 ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis) 1100 if isinstance(key, slice): 1101 self._validate_key(key, axis) -> 1102 return self._get_slice_axis(key, axis=axis) 1103 elif com.is_bool_indexer(key): 1104 return self._getbool_axis(key, axis=axis) ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _get_slice_axis(self, slice_obj, axis) 1134 1135 labels = obj._get_axis(axis) -> 1136 indexer = labels.slice_indexer( 1137 slice_obj.start, slice_obj.stop, slice_obj.step, kind="loc" 1138 ) ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py in slice_indexer(self, start, end, step, kind) 5275 slice(1, 3, None) 5276 """ -> 5277 start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind) 5278 5279 # return a slice ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py in slice_locs(self, start, end, step, kind) 5474 start_slice = None 5475 if start is not None: -> 5476 start_slice = self.get_slice_bound(start, "left", kind) 5477 if start_slice is None: 5478 start_slice = 0 ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py in get_slice_bound(self, label, side, kind) 5394 except ValueError: 5395 # raise the original KeyError -> 5396 raise err 5397 5398 if isinstance(slc, np.ndarray): ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py in get_slice_bound(self, label, side, kind) 5388 # we need to look up the label 5389 try: -> 5390 slc = self.get_loc(label) 5391 except KeyError as err: 5392 try: ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance) 3080 return self._engine.get_loc(casted_key) 3081 except KeyError as err: -> 3082 raise KeyError(key) from err 3083 3084 if tolerance is not None: KeyError: '2015'
df.loc['平安银行'].loc['2015':'2016']
secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|
tradeDate | ||||||||||
2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2016-12-26 | 000001.XSHE | XSHE | 1092.827 | 1097.641 | 364173 | 2.739343e+08 | 11409.0 | 0.0021 | 1.334364e+11 | 1.565942e+11 |
2016-12-27 | 000001.XSHE | XSHE | 1097.641 | 1092.827 | 324124 | 2.447883e+08 | 9433.0 | 0.0018 | 1.328511e+11 | 1.559073e+11 |
2016-12-28 | 000001.XSHE | XSHE | 1092.827 | 1090.420 | 405159 | 3.048986e+08 | 10133.0 | 0.0023 | 1.325585e+11 | 1.555639e+11 |
2016-12-29 | 000001.XSHE | XSHE | 1090.420 | 1092.827 | 408419 | 3.071835e+08 | 9453.0 | 0.0023 | 1.328511e+11 | 1.559073e+11 |
2016-12-30 | 000001.XSHE | XSHE | 1092.827 | 1095.234 | 364834 | 2.748827e+08 | 10822.0 | 0.0021 | 1.331437e+11 | 1.562507e+11 |
488 rows × 10 columns
df1 = pd.DataFrame({'a':[1,2,3],'b':[4,5,6],'c':[10,20,30]})
df2 = pd.DataFrame({'a':np.array([1,2,6]),
'b':[s for s in 'abc'],
'd':[s for s in 'zxz']})
df1
a | b | c | |
---|---|---|---|
0 | 1 | 4 | 10 |
1 | 2 | 5 | 20 |
2 | 3 | 6 | 30 |
df2
a | b | d | |
---|---|---|---|
0 | 1 | a | z |
1 | 2 | b | x |
2 | 6 | c | z |
pd.merge(df1,df2,on='a')
a | b_x | c | b_y | d | |
---|---|---|---|---|---|
0 | 1 | 4 | 10 | a | z |
1 | 2 | 5 | 20 | b | x |
pd.merge(df1,df2,on='a',how='outer')
a | b_x | c | b_y | d | |
---|---|---|---|---|---|
0 | 1 | 4.0 | 10.0 | a | z |
1 | 2 | 5.0 | 20.0 | b | x |
2 | 3 | 6.0 | 30.0 | NaN | NaN |
3 | 6 | NaN | NaN | c | z |
pd.merge(df1,df2,on='a',how='left')
a | b_x | c | b_y | d | |
---|---|---|---|---|---|
0 | 1 | 4 | 10 | a | z |
1 | 2 | 5 | 20 | b | x |
2 | 3 | 6 | 30 | NaN | NaN |
pd.merge(df1,df2,on='a',how='right')
a | b_x | c | b_y | d | |
---|---|---|---|---|---|
0 | 1 | 4.0 | 10.0 | a | z |
1 | 2 | 5.0 | 20.0 | b | x |
2 | 6 | NaN | NaN | c | z |
def unpivot(frame):
N, K = frame.shape
data = {
"value": frame.to_numpy().ravel("F"),
"variable": np.asarray(frame.columns).repeat(N),
"date": np.tile(np.asarray(frame.index), K),
}
return pd.DataFrame(data, columns=["date", "variable", "value"])
tm.makeTimeDataFrame(3)
A | B | C | D | |
---|---|---|---|---|
2000-01-03 | -0.923514 | 1.096446 | 0.141411 | -0.001163 |
2000-01-04 | 0.227181 | 0.446211 | 0.401810 | 1.658210 |
2000-01-05 | 0.143355 | -0.599731 | 2.453257 | 0.124079 |
temp = unpivot(tm.makeTimeDataFrame(3))
temp
date | variable | value | |
---|---|---|---|
0 | 2000-01-03 | A | 0.838609 |
1 | 2000-01-04 | A | -0.973108 |
2 | 2000-01-05 | A | 0.318187 |
3 | 2000-01-03 | B | 0.412418 |
4 | 2000-01-04 | B | 0.110005 |
5 | 2000-01-05 | B | 0.129594 |
6 | 2000-01-03 | C | 0.381854 |
7 | 2000-01-04 | C | 0.282011 |
8 | 2000-01-05 | C | 1.519140 |
9 | 2000-01-03 | D | 0.060368 |
10 | 2000-01-04 | D | -0.117170 |
11 | 2000-01-05 | D | -0.894176 |
temp.pivot(index='date',columns='variable',values='value')
variable | A | B | C | D |
---|---|---|---|---|
date | ||||
2000-01-03 | 0.838609 | 0.412418 | 0.381854 | 0.060368 |
2000-01-04 | -0.973108 | 0.110005 | 0.282011 | -0.117170 |
2000-01-05 | 0.318187 | 0.129594 | 1.519140 | -0.894176 |
temp['value2'] = temp['value']**2
temp.pivot(index='date',columns='variable')
value | value2 | |||||||
---|---|---|---|---|---|---|---|---|
variable | A | B | C | D | A | B | C | D |
date | ||||||||
2000-01-03 | 0.838609 | 0.412418 | 0.381854 | 0.060368 | 0.703265 | 0.170089 | 0.145813 | 0.003644 |
2000-01-04 | -0.973108 | 0.110005 | 0.282011 | -0.117170 | 0.946938 | 0.012101 | 0.079530 | 0.013729 |
2000-01-05 | 0.318187 | 0.129594 | 1.519140 | -0.894176 | 0.101243 | 0.016795 | 2.307786 | 0.799551 |
df.reset_index()
secShortName | tradeDate | secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 平安银行 | 2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 平安银行 | 2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 平安银行 | 2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 平安银行 | 2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 平安银行 | 2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 凌云B股 | 2021-12-27 | 900957.XSHG | XSHG | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 凌云B股 | 2021-12-28 | 900957.XSHG | XSHG | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 凌云B股 | 2021-12-29 | 900957.XSHG | XSHG | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 凌云B股 | 2021-12-30 | 900957.XSHG | XSHG | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 凌云B股 | 2021-12-31 | 900957.XSHG | XSHG | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 12 columns
df.reset_index(inplace=True)
df.reset_index()
index | secShortName | tradeDate | secID | exchangeCD | preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 平安银行 | 2015-01-05 | 000001.XSHE | XSHE | 1293.044 | 1307.737 | 4966040 | 4.565388e+09 | 92478.0 | 0.0291 | 1.575841e+11 | 1.830268e+11 |
1 | 1 | 平安银行 | 2015-01-06 | 000001.XSHE | XSHE | 1307.737 | 1288.146 | 3761152 | 3.453446e+09 | 80325.0 | 0.0220 | 1.552233e+11 | 1.802848e+11 |
2 | 2 | 平安银行 | 2015-01-07 | 000001.XSHE | XSHE | 1288.146 | 1263.656 | 2951601 | 2.634796e+09 | 72697.0 | 0.0173 | 1.522723e+11 | 1.768574e+11 |
3 | 3 | 平安银行 | 2015-01-08 | 000001.XSHE | XSHE | 1263.656 | 1221.208 | 2443951 | 2.128003e+09 | 68734.0 | 0.0143 | 1.471572e+11 | 1.709164e+11 |
4 | 4 | 平安银行 | 2015-01-09 | 000001.XSHE | XSHE | 1221.208 | 1231.004 | 4355039 | 3.835378e+09 | 99882.0 | 0.0255 | 1.483376e+11 | 1.722874e+11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5838745 | 5838745 | 凌云B股 | 2021-12-27 | 900957.XSHG | XSHG | 0.625 | 0.638 | 488511 | 3.081540e+05 | 226.0 | 0.0027 | 1.164720e+08 | 2.209170e+08 |
5838746 | 5838746 | 凌云B股 | 2021-12-28 | 900957.XSHG | XSHG | 0.638 | 0.637 | 177702 | 1.118810e+05 | 64.0 | 0.0010 | 1.162880e+08 | 2.205680e+08 |
5838747 | 5838747 | 凌云B股 | 2021-12-29 | 900957.XSHG | XSHG | 0.637 | 0.630 | 123550 | 7.733300e+04 | 58.0 | 0.0007 | 1.150000e+08 | 2.181250e+08 |
5838748 | 5838748 | 凌云B股 | 2021-12-30 | 900957.XSHG | XSHG | 0.630 | 0.635 | 113600 | 7.130800e+04 | 41.0 | 0.0006 | 1.159200e+08 | 2.198700e+08 |
5838749 | 5838749 | 凌云B股 | 2021-12-31 | 900957.XSHG | XSHG | 0.635 | 0.636 | 167800 | 1.059960e+05 | 74.0 | 0.0009 | 1.161040e+08 | 2.202190e+08 |
5838750 rows × 13 columns
df.reset_index().pivot(index='tradeDate',columns='secID',values='closePrice')
secID | 000001.XSHE | 000002.XSHE | 000004.XSHE | 000005.XSHE | 000006.XSHE | 000007.XSHE | 000008.XSHE | 000009.XSHE | 000010.XSHE | 000011.XSHE | ... | 900946.XSHG | 900947.XSHG | 900948.XSHG | 900950.XSHG | 900951.XSHG | 900952.XSHG | 900953.XSHG | 900955.XSHG | 900956.XSHG | 900957.XSHG |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
tradeDate | |||||||||||||||||||||
2015-01-05 | 1307.737 | 1681.312 | 107.225 | NaN | 410.529 | 116.458 | 131.850 | NaN | NaN | 35.839 | ... | 0.426 | 6.184 | 20.059 | NaN | 0.450 | 1.040 | 0.561 | 1.647 | 1.159 | 0.628 |
2015-01-06 | 1288.146 | 1619.292 | 112.487 | NaN | 397.193 | 114.453 | 132.640 | NaN | NaN | 34.597 | ... | 0.430 | 6.129 | 19.781 | NaN | 0.446 | 1.046 | 0.561 | 1.629 | 1.158 | 0.623 |
2015-01-07 | 1263.656 | 1604.632 | 112.145 | NaN | 397.773 | 117.019 | 135.678 | 81.375 | NaN | 35.094 | ... | 0.427 | 6.129 | 19.675 | NaN | 0.448 | 1.048 | 0.556 | 1.616 | 1.159 | 0.631 |
2015-01-08 | 1221.208 | 1532.463 | 115.630 | NaN | 393.134 | 116.939 | 139.992 | 81.497 | NaN | 34.207 | ... | 0.430 | 5.932 | 19.767 | NaN | 0.448 | 1.059 | 0.552 | 1.609 | 1.140 | 0.624 |
2015-01-09 | 1231.004 | 1516.676 | 112.282 | NaN | 388.495 | 116.297 | 140.782 | 79.238 | NaN | 33.532 | ... | 0.430 | 5.987 | 19.781 | NaN | 0.453 | 1.059 | 0.553 | 1.609 | 1.139 | 0.625 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-12-27 | 2213.298 | 2919.261 | 128.342 | 22.207 | 306.584 | 69.899 | 77.998 | 140.247 | 40.728 | 48.496 | ... | 0.252 | 4.078 | 15.932 | NaN | NaN | 0.554 | 0.348 | 0.413 | NaN | 0.638 |
2021-12-28 | 2206.872 | 2928.188 | 133.399 | 22.502 | 303.124 | 68.816 | 73.156 | 143.778 | 40.836 | 47.957 | ... | 0.244 | 4.078 | 15.682 | NaN | NaN | 0.559 | 0.345 | 0.407 | NaN | 0.637 |
2021-12-29 | 2152.889 | 2883.551 | 132.510 | 22.502 | 303.816 | 69.417 | 70.736 | 143.980 | 40.512 | 47.584 | ... | 0.246 | 4.092 | 15.682 | NaN | NaN | 0.559 | 0.342 | 0.403 | NaN | 0.630 |
2021-12-30 | 2161.886 | 2844.866 | 133.945 | 22.502 | 303.124 | 71.102 | 73.963 | 143.072 | 40.512 | 47.501 | ... | 0.251 | 4.121 | 15.700 | NaN | NaN | 0.561 | 0.344 | 0.407 | NaN | 0.635 |
2021-12-31 | 2118.185 | 2940.092 | 132.715 | 22.600 | 308.660 | 73.267 | 79.074 | 145.594 | 40.620 | 48.330 | ... | 0.253 | 4.107 | 15.575 | NaN | NaN | 0.561 | 0.350 | 0.413 | NaN | 0.636 |
1705 rows × 4854 columns
import matplotlib as mpl
import matplotlib.pyplot as plt
fig = plt.figure() # an empty figure with no Axes
fig, ax = plt.subplots() # a figure with a single Axes
fig, axs = plt.subplots(2, 2) # a figure with a 2x2 grid of Axes
<Figure size 432x288 with 0 Axes>
fig, ax = plt.subplots()
ax.plot([1, 2, 3, 4], [1, 4, 2, 3])
[<matplotlib.lines.Line2D at 0x7fc69e3dcb80>]
temp
date | variable | value | value2 | |
---|---|---|---|---|
0 | 2000-01-03 | A | 0.838609 | 0.703265 |
1 | 2000-01-04 | A | -0.973108 | 0.946938 |
2 | 2000-01-05 | A | 0.318187 | 0.101243 |
3 | 2000-01-03 | B | 0.412418 | 0.170089 |
4 | 2000-01-04 | B | 0.110005 | 0.012101 |
5 | 2000-01-05 | B | 0.129594 | 0.016795 |
6 | 2000-01-03 | C | 0.381854 | 0.145813 |
7 | 2000-01-04 | C | 0.282011 | 0.079530 |
8 | 2000-01-05 | C | 1.519140 | 2.307786 |
9 | 2000-01-03 | D | 0.060368 | 0.003644 |
10 | 2000-01-04 | D | -0.117170 | 0.013729 |
11 | 2000-01-05 | D | -0.894176 | 0.799551 |
fig, axs = plt.subplots(1,2)
x = range(temp.shape[0])
y1 = temp['value']
y2 = temp['value']**2
axs[0].plot(x, y1)
axs[1].plot(x, y2)
axs[0].plot(x, y1**3)
[<matplotlib.lines.Line2D at 0x7fc69e069ee0>]
x = pd.date_range(start='2021-01-03',periods=temp.shape[0])
plt.plot(x,temp['value'], label='random value')
plt.plot(x,temp['value']**2, label='quadratic of random value')
plt.xlabel('date')
plt.ylabel('value')
plt.title('Example')
plt.legend()
<matplotlib.legend.Legend at 0x7fc69e15cdf0>
df_pingan.select_dtypes(np.number).corr()
preClosePrice | closePrice | turnoverVol | turnoverValue | dealAmount | turnoverRate | negMarketValue | marketValue | |
---|---|---|---|---|---|---|---|---|
preClosePrice | 1.000000 | 0.997575 | 0.064982 | 0.389881 | 0.601451 | -0.039942 | 0.989830 | 0.994295 |
closePrice | 0.997575 | 1.000000 | 0.075458 | 0.400851 | 0.606522 | -0.030710 | 0.991942 | 0.996452 |
turnoverVol | 0.064982 | 0.075458 | 1.000000 | 0.924077 | 0.752066 | 0.981827 | 0.027619 | 0.055961 |
turnoverValue | 0.389881 | 0.400851 | 0.924077 | 1.000000 | 0.914040 | 0.862471 | 0.355060 | 0.383088 |
dealAmount | 0.601451 | 0.606522 | 0.752066 | 0.914040 | 1.000000 | 0.665757 | 0.573258 | 0.596717 |
turnoverRate | -0.039942 | -0.030710 | 0.981827 | 0.862471 | 0.665757 | 1.000000 | -0.090386 | -0.050985 |
negMarketValue | 0.989830 | 0.991942 | 0.027619 | 0.355060 | 0.573258 | -0.090386 | 1.000000 | 0.993615 |
marketValue | 0.994295 | 0.996452 | 0.055961 | 0.383088 | 0.596717 | -0.050985 | 0.993615 | 1.000000 |
corr = df_pingan.select_dtypes(np.number).corr()
np.ones_like(corr, dtype=bool)
array([[ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True], [ True, True, True, True, True, True, True, True]])
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))
import seaborn as sns
# Set up the matplotlib figure
fig, ax = plt.subplots(figsize=(18, 18))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0, annot=True,
square=True, linewidths=.5, annot_kws={"size": 10},cbar_kws={"shrink": .5})
<AxesSubplot:>
df_pingan_num = df_pingan.select_dtypes(np.number)
df_pingan_num.plot()
<AxesSubplot:xlabel='tradeDate'>
plt.rcParams['figure.figsize'] = (16.0, 9.0)
df_pingan_num.drop(['negMarketValue','marketValue'],axis=1).plot()
<AxesSubplot:xlabel='tradeDate'>
def standardization(col):
return (col - col.mean()) / col.std()
df_pingan_num.apply(standardization).plot()
<AxesSubplot:xlabel='tradeDate'>
df_pingan_num.apply(standardization)[['preClosePrice','turnoverValue','marketValue']].plot(style=['bs-', 'ro-', 'y^-'])
<AxesSubplot:xlabel='tradeDate'>