파이썬 판다스(Pandas) 데이터프레임 데이터 추출
데이터프레임 데이터 추출
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/jin0choi1216/dataset/main/AAPL.csv", index_col = 0)
df.head(5)
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
Date | ||||||
2012-01-03 | 14.621429 | 14.732143 | 14.607143 | 14.686786 | 12.466090 | 302220800 |
2012-01-04 | 14.642857 | 14.810000 | 14.617143 | 14.765714 | 12.533089 | 260022000 |
2012-01-05 | 14.819643 | 14.948214 | 14.738214 | 14.929643 | 12.672229 | 271269600 |
2012-01-06 | 14.991786 | 15.098214 | 14.972143 | 15.085714 | 12.804703 | 318292800 |
2012-01-09 | 15.196429 | 15.276786 | 15.048214 | 15.061786 | 12.784389 | 394024400 |
1. 데이터 추출하기
1) 비교 연산
df[(df["Close"] >= 150) & (df["Open"] >= 160) ]
df.query("(Close >= 150) and (Open >= 160)") # 같은 쿼리
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
Date | ||||||
2021-11-22 | 161.679993 | 165.699997 | 161.000000 | 161.020004 | 159.410767 | 117467900 |
2021-11-23 | 161.119995 | 161.800003 | 159.059998 | 161.410004 | 159.796844 | 96041900 |
2021-11-24 | 160.750000 | 162.139999 | 159.639999 | 161.940002 | 160.321533 | 69463600 |
2021-12-01 | 167.479996 | 170.300003 | 164.529999 | 164.770004 | 163.123276 | 152052500 |
2021-12-03 | 164.020004 | 164.960007 | 159.720001 | 161.839996 | 160.222549 | 118023100 |
... | ... | ... | ... | ... | ... | ... |
2023-08-21 | 175.070007 | 176.130005 | 173.740005 | 175.839996 | 175.839996 | 46311900 |
2023-08-22 | 177.059998 | 177.679993 | 176.250000 | 177.229996 | 177.229996 | 42084200 |
2023-08-23 | 178.520004 | 181.550003 | 178.330002 | 181.119995 | 181.119995 | 52722800 |
2023-08-24 | 180.669998 | 181.100006 | 176.009995 | 176.380005 | 176.380005 | 54945800 |
2023-08-25 | 177.380005 | 179.149994 | 175.820007 | 178.610001 | 178.610001 | 51418700 |
224 rows × 6 columns
2) 질의
# 문자열 query
df.query("Volume > 100000000")
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
Date | ||||||
2012-01-03 | 14.621429 | 14.732143 | 14.607143 | 14.686786 | 12.466090 | 302220800 |
2012-01-04 | 14.642857 | 14.810000 | 14.617143 | 14.765714 | 12.533089 | 260022000 |
2012-01-05 | 14.819643 | 14.948214 | 14.738214 | 14.929643 | 12.672229 | 271269600 |
2012-01-06 | 14.991786 | 15.098214 | 14.972143 | 15.085714 | 12.804703 | 318292800 |
2012-01-09 | 15.196429 | 15.276786 | 15.048214 | 15.061786 | 12.784389 | 394024400 |
... | ... | ... | ... | ... | ... | ... |
2023-02-03 | 148.029999 | 157.380005 | 147.830002 | 154.500000 | 153.843628 | 154357300 |
2023-05-05 | 170.979996 | 174.300003 | 170.759995 | 173.570007 | 173.096512 | 113316400 |
2023-06-05 | 182.630005 | 184.949997 | 178.039993 | 179.580002 | 179.337830 | 121946500 |
2023-06-16 | 186.729996 | 186.990005 | 184.270004 | 184.919998 | 184.670624 | 101235600 |
2023-08-04 | 185.520004 | 187.380005 | 181.919998 | 181.990005 | 181.744583 | 115799700 |
1997 rows × 6 columns
# 인덱스 컬럼 기준 query
df.query("Date > '2020-01-01'")
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
Date | ||||||
2020-01-02 | 74.059998 | 75.150002 | 73.797501 | 75.087502 | 73.249023 | 135480400 |
2020-01-03 | 74.287498 | 75.144997 | 74.125000 | 74.357498 | 72.536888 | 146322800 |
2020-01-06 | 73.447502 | 74.989998 | 73.187500 | 74.949997 | 73.114883 | 118387200 |
2020-01-07 | 74.959999 | 75.224998 | 74.370003 | 74.597504 | 72.771019 | 108872000 |
2020-01-08 | 74.290001 | 76.110001 | 74.290001 | 75.797501 | 73.941643 | 132079200 |
... | ... | ... | ... | ... | ... | ... |
2023-08-21 | 175.070007 | 176.130005 | 173.740005 | 175.839996 | 175.839996 | 46311900 |
2023-08-22 | 177.059998 | 177.679993 | 176.250000 | 177.229996 | 177.229996 | 42084200 |
2023-08-23 | 178.520004 | 181.550003 | 178.330002 | 181.119995 | 181.119995 | 52722800 |
2023-08-24 | 180.669998 | 181.100006 | 176.009995 | 176.380005 | 176.380005 | 54945800 |
2023-08-25 | 177.380005 | 179.149994 | 175.820007 | 178.610001 | 178.610001 | 51418700 |
919 rows × 6 columns
# 컬럼 값 비교 쿼리
df.query("Open > Close")
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
Date | ||||||
2012-01-09 | 15.196429 | 15.276786 | 15.048214 | 15.061786 | 12.784389 | 394024400 |
2012-01-10 | 15.211071 | 15.214286 | 15.053571 | 15.115714 | 12.830169 | 258196400 |
2012-01-11 | 15.095714 | 15.101786 | 14.975357 | 15.091071 | 12.809250 | 215084800 |
2012-01-12 | 15.081429 | 15.103571 | 14.955357 | 15.049643 | 12.774082 | 212587200 |
2012-01-19 | 15.362500 | 15.406071 | 15.232500 | 15.276786 | 12.966883 | 261738400 |
... | ... | ... | ... | ... | ... | ... |
2023-08-10 | 179.479996 | 180.750000 | 177.600006 | 177.970001 | 177.729996 | 54686900 |
2023-08-15 | 178.880005 | 179.479996 | 177.050003 | 177.449997 | 177.449997 | 43622600 |
2023-08-16 | 177.130005 | 178.539993 | 176.500000 | 176.570007 | 176.570007 | 46964900 |
2023-08-17 | 177.139999 | 177.509995 | 173.479996 | 174.000000 | 174.000000 | 66062900 |
2023-08-24 | 180.669998 | 181.100006 | 176.009995 | 176.380005 | 176.380005 | 54945800 |
1388 rows × 6 columns
# isin 값에 해당하는 인덱스 목록 반환
isin = df["Volume"].isin([394024400, 258196400])
df.query("Volume in [394024400, 258196400]") # 동일한 쿼리
print(f"isin :\n{isin}")
df[isin]
isin :
Date
2012-01-03 False
2012-01-04 False
2012-01-05 False
2012-01-06 False
2012-01-09 True
...
2023-08-21 False
2023-08-22 False
2023-08-23 False
2023-08-24 False
2023-08-25 False
Name: Volume, Length: 2931, dtype: bool
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
Date | ||||||
2012-01-09 | 15.196429 | 15.276786 | 15.048214 | 15.061786 | 12.784389 | 394024400 |
2012-01-10 | 15.211071 | 15.214286 | 15.053571 | 15.115714 | 12.830169 | 258196400 |
# 일치하는 시리즈 반환
df.query("Volume == 1000")
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
Date |
# 변수 치환
value = 1000
df.query("Volume == @value")
df.query(f"Volume == {value}")
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
Date |
2. 인덱스 컬럼 변환
df.reset_index()
Date | Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|---|
0 | 2012-01-03 | 14.621429 | 14.732143 | 14.607143 | 14.686786 | 12.466090 | 302220800 |
1 | 2012-01-04 | 14.642857 | 14.810000 | 14.617143 | 14.765714 | 12.533089 | 260022000 |
2 | 2012-01-05 | 14.819643 | 14.948214 | 14.738214 | 14.929643 | 12.672229 | 271269600 |
3 | 2012-01-06 | 14.991786 | 15.098214 | 14.972143 | 15.085714 | 12.804703 | 318292800 |
4 | 2012-01-09 | 15.196429 | 15.276786 | 15.048214 | 15.061786 | 12.784389 | 394024400 |
... | ... | ... | ... | ... | ... | ... | ... |
2926 | 2023-08-21 | 175.070007 | 176.130005 | 173.740005 | 175.839996 | 175.839996 | 46311900 |
2927 | 2023-08-22 | 177.059998 | 177.679993 | 176.250000 | 177.229996 | 177.229996 | 42084200 |
2928 | 2023-08-23 | 178.520004 | 181.550003 | 178.330002 | 181.119995 | 181.119995 | 52722800 |
2929 | 2023-08-24 | 180.669998 | 181.100006 | 176.009995 | 176.380005 | 176.380005 | 54945800 |
2930 | 2023-08-25 | 177.380005 | 179.149994 | 175.820007 | 178.610001 | 178.610001 | 51418700 |
2931 rows × 7 columns
3. Apply
df_kospi = pd.read_csv('https://raw.githubusercontent.com/jin0choi1216/dataset/main/KOSPI_stocks.csv', index_col = 0)
df_kospi.head()
Code | ISU_CD | Name | Market | Dept | Close | ChangeCode | Changes | ChagesRatio | Open | High | Low | Volume | Amount | Marcap | Stocks | MarketId | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 005930 | KR7005930003 | 삼성전자 | KOSPI | NaN | 67100 | 2 | -1100 | -1.61 | 67100 | 67400 | 66900 | 7032462 | 471934306900 | 400572409105000 | 5969782550 | STK |
1 | 373220 | KR7373220003 | LG에너지솔루션 | KOSPI | NaN | 546000 | 1 | 11000 | 2.06 | 527000 | 549000 | 525000 | 249493 | 135513119000 | 127764000000000 | 234000000 | STK |
2 | 000660 | KR7000660001 | SK하이닉스 | KOSPI | NaN | 116500 | 2 | -4400 | -3.64 | 117400 | 118300 | 115300 | 3533647 | 412064064200 | 84812275522500 | 728002365 | STK |
3 | 207940 | KR7207940008 | 삼성바이오로직스 | KOSPI | NaN | 757000 | 2 | -6000 | -0.79 | 755000 | 764000 | 754000 | 23435 | 17763884000 | 53878718000000 | 71174000 | STK |
4 | 005490 | KR7005490008 | POSCO홀딩스 | KOSPI | NaN | 577000 | 2 | -2000 | -0.35 | 567000 | 583000 | 561000 | 904736 | 519579225000 | 48797599710000 | 84571230 | STK |
# 함수를 매개변수로 받아 호출한다
def checkChangesRatio(val: int) -> str:
if val > 0:
return "오른 주식"
else:
return "내린 주식"
df_kospi.ChagesRatio.apply(checkChangesRatio)
0 내린 주식
1 오른 주식
2 내린 주식
3 내린 주식
4 내린 주식
...
946 오른 주식
947 오른 주식
948 내린 주식
949 내린 주식
950 내린 주식
Name: ChagesRatio, Length: 951, dtype: object
# 람다 표현식
df_kospi.ChagesRatio.apply(lambda val: "오른 주식" if val > 0 else "내린 주식")
0 내린 주식
1 오른 주식
2 내린 주식
3 내린 주식
4 내린 주식
...
946 오른 주식
947 오른 주식
948 내린 주식
949 내린 주식
950 내린 주식
Name: ChagesRatio, Length: 951, dtype: object
4. With Numpy
import numpy as np
df_kospi["ADD"] = np.where(df_kospi.ChagesRatio > 0, "오른 주식", "내린 주식")
df_kospi
Code | ISU_CD | Name | Market | Dept | Close | ChangeCode | Changes | ChagesRatio | Open | High | Low | Volume | Amount | Marcap | Stocks | MarketId | TT | ADD | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 005930 | KR7005930003 | 삼성전자 | KOSPI | NaN | 67100 | 2 | -1100 | -1.61 | 67100 | 67400 | 66900 | 7032462 | 471934306900 | 400572409105000 | 5969782550 | STK | 내린 주식 | 내린 주식 |
1 | 373220 | KR7373220003 | LG에너지솔루션 | KOSPI | NaN | 546000 | 1 | 11000 | 2.06 | 527000 | 549000 | 525000 | 249493 | 135513119000 | 127764000000000 | 234000000 | STK | 오른 주식 | 오른 주식 |
2 | 000660 | KR7000660001 | SK하이닉스 | KOSPI | NaN | 116500 | 2 | -4400 | -3.64 | 117400 | 118300 | 115300 | 3533647 | 412064064200 | 84812275522500 | 728002365 | STK | 내린 주식 | 내린 주식 |
3 | 207940 | KR7207940008 | 삼성바이오로직스 | KOSPI | NaN | 757000 | 2 | -6000 | -0.79 | 755000 | 764000 | 754000 | 23435 | 17763884000 | 53878718000000 | 71174000 | STK | 내린 주식 | 내린 주식 |
4 | 005490 | KR7005490008 | POSCO홀딩스 | KOSPI | NaN | 577000 | 2 | -2000 | -0.35 | 567000 | 583000 | 561000 | 904736 | 519579225000 | 48797599710000 | 84571230 | STK | 내린 주식 | 내린 주식 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
946 | 014915 | KR7014911002 | 성문전자우 | KOSPI | NaN | 5830 | 1 | 50 | 0.87 | 5780 | 5830 | 5780 | 123 | 711790 | 3498000000 | 600000 | STK | 오른 주식 | 오른 주식 |
947 | 002787 | KR7002782001 | 진흥기업2우B | KOSPI | NaN | 11170 | 1 | 150 | 1.36 | 11020 | 11170 | 10910 | 148 | 1623110 | 3293005360 | 294808 | STK | 오른 주식 | 오른 주식 |
948 | 000227 | KR7000222000 | 유유제약2우B | KOSPI | NaN | 10080 | 2 | -240 | -2.33 | 10090 | 10090 | 10080 | 75 | 756060 | 3281644800 | 325560 | STK | 내린 주식 | 내린 주식 |
949 | 001525 | KR7001521004 | 동양우 | KOSPI | NaN | 5210 | 2 | -10 | -0.19 | 5220 | 5220 | 5210 | 37 | 192810 | 3218378510 | 617731 | STK | 내린 주식 | 내린 주식 |
950 | 001527 | KR7001522002 | 동양2우B | KOSPI | NaN | 10150 | 2 | -10 | -0.10 | 10040 | 10220 | 10040 | 53 | 534860 | 3131975350 | 308569 | STK | 내린 주식 | 내린 주식 |
951 rows × 19 columns
5. 값 집계
df_value_count = df_kospi.ADD.value_counts()
print(type(df_value_count))
df_value_count
<class 'pandas.core.series.Series'>
ADD
내린 주식 583
오른 주식 368
Name: count, dtype: int64
6. 순위
df_kospi.Marcap.rank(ascending = False)
0 1.0
1 2.0
2 3.0
3 4.0
4 5.0
...
946 947.0
947 948.0
948 949.0
949 950.0
950 951.0
Name: Marcap, Length: 951, dtype: float64
7. Group by
df_krx = pd.read_csv("https://raw.githubusercontent.com/jin0choi1216/dataset/main/KRX_stocks.csv", index_col=0)
print(df_krx.head())
df_krx[["Market", "ChagesRatio"]].groupby("Market").mean()
Code ISU_CD Name Market Dept Close ChangeCode Changes \
0 005930 KR7005930003 삼성전자 KOSPI NaN 67100 2 -1100
1 373220 KR7373220003 LG에너지솔루션 KOSPI NaN 546000 1 11000
2 000660 KR7000660001 SK하이닉스 KOSPI NaN 116500 2 -4400
3 207940 KR7207940008 삼성바이오로직스 KOSPI NaN 757000 2 -6000
4 005490 KR7005490008 POSCO홀딩스 KOSPI NaN 577000 2 -2000
ChagesRatio Open High Low Volume Amount \
0 -1.61 67100 67400 66900 7032462 471934306900
1 2.06 527000 549000 525000 249493 135513119000
2 -3.64 117400 118300 115300 3533647 412064064200
3 -0.79 755000 764000 754000 23435 17763884000
4 -0.35 567000 583000 561000 904736 519579225000
Marcap Stocks MarketId
0 400572409105000 5969782550 STK
1 127764000000000 234000000 STK
2 84812275522500 728002365 STK
3 53878718000000 71174000 STK
4 48797599710000 84571230 STK
ChagesRatio | |
---|---|
Market | |
KONEX | -0.168281 |
KOSDAQ | -0.391093 |
KOSDAQ GLOBAL | -0.813600 |
KOSPI | 0.107560 |
df_krx[["Market", "ChagesRatio", "Open"]].groupby("Market").agg({"ChagesRatio": "mean", "Open": "sum"})
ChagesRatio | Open | |
---|---|---|
Market | ||
KONEX | -0.168281 | 624219 |
KOSDAQ | -0.391093 | 17308486 |
KOSDAQ GLOBAL | -0.813600 | 2906500 |
KOSPI | 0.107560 | 34526986 |
df_krx[["Market", "ChagesRatio", "Open"]].groupby("Market").agg({"ChagesRatio": ["max", "sum"], "Open": ["min", "mean"]})
ChagesRatio | Open | |||
---|---|---|---|---|
max | sum | min | mean | |
Market | ||||
KONEX | 14.89 | -21.54 | 0 | 4876.710938 |
KOSDAQ | 29.99 | -633.18 | 0 | 10690.849907 |
KOSDAQ GLOBAL | 13.70 | -40.68 | 7170 | 58130.000000 |
KOSPI | 30.00 | 102.29 | 0 | 36305.978970 |
8. Merge
market_price_change = pd.read_csv('https://raw.githubusercontent.com/jin0choi1216/dataset/main/market_price_change_2022.csv')
print(market_price_change.head(2))
print(market_price_change.shape)
purchases_of_equities = pd.read_csv('https://raw.githubusercontent.com/jin0choi1216/dataset/main/purchases_of_equities_2022.csv')
print(purchases_of_equities.head(2))
print(purchases_of_equities.shape)
# 컬럼 이름이 같을 경우
# inner
merged_inner = pd.merge(left = market_price_change, right = purchases_of_equities, how = "inner", on = ["티커", "종목명"])
print(f"inner shape : {merged_inner.shape}")
# left
merged_left = pd.merge(left = market_price_change, right = purchases_of_equities, how = "left", on = ["티커", "종목명"])
print(f"left shape : {merged_left.shape}")
# right
merged_right = pd.merge(left = market_price_change, right = purchases_of_equities, how = "right", on = ["티커", "종목명"])
print(f"right shape : {merged_right.shape}")
# outer
merged_outer = pd.merge(left = market_price_change, right = purchases_of_equities, how = "outer", on = ["티커", "종목명"])
print(f"outer shape : {merged_outer.shape}")
티커 종목명 시가 종가 변동폭 등락률 거래량 거래대금
0 095570 AJ네트웍스 6110 5720 -390 -6.38 2947645 17162545200
1 006840 AK홀딩스 15150 17200 2050 13.53 455492 7706148650
(942, 8)
티커 종목명 매도거래량 매수거래량 순매수거래량 매도거래대금 매수거래대금 순매수거래대금
0 060310 3S 3318765 3318765 0 7721430565 7721430565 0
1 095570 AJ네트웍스 2947645 2947645 0 17162545200 17162545200 0
(2625, 8)
inner shape : (916, 14)
left shape : (942, 14)
right shape : (2625, 14)
outer shape : (2651, 14)
# 컬럼 이름이 다를 경우
pur2 = purchases_of_equities.rename(columns = { "티커": "티커2", "종목명": "종목명2" })
diff_column = pd.merge(
left = market_price_change,
right = pur2,
how = "inner",
left_on = ["티커", "종목명"],
right_on = ["티커2", "종목명2"],
suffixes = ("_L", "_R")
)
diff_column
티커 | 종목명 | 시가 | 종가 | 변동폭 | 등락률 | 거래량 | 거래대금 | 티커2 | 종목명2 | 매도거래량 | 매수거래량 | 순매수거래량 | 매도거래대금 | 매수거래대금 | 순매수거래대금 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 095570 | AJ네트웍스 | 6110 | 5720 | -390 | -6.38 | 2947645 | 17162545200 | 095570 | AJ네트웍스 | 2947645 | 2947645 | 0 | 17162545200 | 17162545200 | 0 |
1 | 006840 | AK홀딩스 | 15150 | 17200 | 2050 | 13.53 | 455492 | 7706148650 | 006840 | AK홀딩스 | 455492 | 455492 | 0 | 7706148650 | 7706148650 | 0 |
2 | 027410 | BGF | 4365 | 4305 | -60 | -1.37 | 14388855 | 62685829240 | 027410 | BGF | 14388855 | 14388855 | 0 | 62685829240 | 62685829240 | 0 |
3 | 282330 | BGF리테일 | 205500 | 210500 | 5000 | 2.43 | 712994 | 146603104000 | 282330 | BGF리테일 | 712994 | 712994 | 0 | 146603104000 | 146603104000 | 0 |
4 | 138930 | BNK금융지주 | 7390 | 6500 | -890 | -12.04 | 37130723 | 264104001940 | 138930 | BNK금융지주 | 37130723 | 37130723 | 0 | 264104001940 | 264104001940 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
911 | 005010 | 휴스틸 | 5420 | 5070 | -350 | -6.46 | 9608972 | 49226534830 | 005010 | 휴스틸 | 9608972 | 9608972 | 0 | 49226534830 | 49226534830 | 0 |
912 | 000540 | 흥국화재 | 3165 | 3370 | 205 | 6.48 | 15972406 | 59888402133 | 000540 | 흥국화재 | 15972406 | 15972406 | 0 | 59888402133 | 59888402133 | 0 |
913 | 000547 | 흥국화재2우B | 19300 | 16200 | -3100 | -16.06 | 18918 | 364549550 | 000547 | 흥국화재2우B | 18918 | 18918 | 0 | 364549550 | 364549550 | 0 |
914 | 000545 | 흥국화재우 | 6020 | 6150 | 130 | 2.16 | 68957 | 432345040 | 000545 | 흥국화재우 | 68957 | 68957 | 0 | 432345040 | 432345040 | 0 |
915 | 003280 | 흥아해운 | 1625 | 1355 | -270 | -16.62 | 4552374 | 6915159460 | 003280 | 흥아해운 | 4552374 | 4552374 | 0 | 6915159460 | 6915159460 | 0 |
916 rows × 16 columns
9. Pivot Table
# 2012 ~ 2023
GOOGL = pd.read_csv('https://raw.githubusercontent.com/jin0choi1216/dataset/main/GOOGL.csv')
AAPL = pd.read_csv('https://raw.githubusercontent.com/jin0choi1216/dataset/main/AAPL.csv')
GOOGL["Symbol"] = "Google"
AAPL["Symbol"] = "Apple"
df = pd.concat([GOOGL, AAPL])
# 날짜를 기준으로 구글과 애플별로 피벗
pivot = pd.pivot_table(data = df, index = "Date", columns = "Symbol", values = "Close")
print(pivot)
# 인덱스 여러개 지정
pivots = pd.pivot_table(data = df, index = ["Date", "Symbol"], values = "Close", aggfunc = "mean")
print(pivots)
Symbol Apple Google
Date
2012-01-03 14.686786 16.651901
2012-01-04 14.765714 16.723724
2012-01-05 14.929643 16.491741
2012-01-06 15.085714 16.266768
2012-01-09 15.061786 15.577077
... ... ...
2023-08-21 175.839996 128.369995
2023-08-22 177.229996 129.080002
2023-08-23 181.119995 132.369995
2023-08-24 176.380005 129.779999
2023-08-25 178.610001 129.880005
[2931 rows x 2 columns]
Close
Date Symbol
2012-01-03 Apple 14.686786
Google 16.651901
2012-01-04 Apple 14.765714
Google 16.723724
2012-01-05 Apple 14.929643
... ...
2023-08-23 Google 132.369995
2023-08-24 Apple 176.380005
Google 129.779999
2023-08-25 Apple 178.610001
Google 129.880005
[5862 rows x 1 columns]
9. 이전 값 비교
# 이전 값 과의 차이
GOOGL.Low.diff()
# 이전 값 과의 차이 퍼센트 수치
GOOGL.Low.pct_change()
0 NaN
1 0.012646
2 -0.006645
3 -0.009814
4 -0.043953
...
2926 0.001424
2927 0.013907
2928 0.012079
2929 -0.002310
2930 -0.017905
Name: Low, Length: 2931, dtype: float64
10. 결측치 수치 확인
is_null = merged_outer.isnull()
print(is_null.head(5))
print(is_null.sum())
티커 종목명 시가 종가 변동폭 등락률 거래량 거래대금 매도거래량 매수거래량 \
0 False False False False False False False False False False
1 False False False False False False False False False False
2 False False False False False False False False False False
3 False False False False False False False False False False
4 False False False False False False False False False False
순매수거래량 매도거래대금 매수거래대금 순매수거래대금
0 False False False False
1 False False False False
2 False False False False
3 False False False False
4 False False False False
티커 0
종목명 0
시가 1709
종가 1709
변동폭 1709
등락률 1709
거래량 1709
거래대금 1709
매도거래량 26
매수거래량 26
순매수거래량 26
매도거래대금 26
매수거래대금 26
순매수거래대금 26
dtype: int64
11. 데이터 밀기
GOOGL["Low_2"] = GOOGL.Low.shift(1, fill_value = 9999)
GOOGL
Date | Open | High | Low | Close | Adj Close | Volume | Symbol | Low_2 | |
---|---|---|---|---|---|---|---|---|---|
0 | 2012-01-03 | 16.339840 | 16.720470 | 16.325577 | 16.651901 | 16.651901 | 146912940 | 9999.000000 | |
1 | 2012-01-04 | 16.642391 | 16.773024 | 16.532032 | 16.723724 | 16.723724 | 114445440 | 16.325577 | |
2 | 2012-01-05 | 16.569820 | 16.615866 | 16.422173 | 16.491741 | 16.491741 | 131184684 | 16.532032 | |
3 | 2012-01-06 | 16.495245 | 16.516518 | 16.261011 | 16.266768 | 16.266768 | 107608284 | 16.422173 | |
4 | 2012-01-09 | 16.178679 | 16.191191 | 15.546296 | 15.577077 | 15.577077 | 232671096 | 16.261011 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2926 | 2023-08-21 | 127.180000 | 128.729996 | 126.559998 | 128.369995 | 128.369995 | 25248700 | 126.379997 | |
2927 | 2023-08-22 | 128.509995 | 130.279999 | 128.320007 | 129.080002 | 129.080002 | 22067500 | 126.559998 | |
2928 | 2023-08-23 | 130.179993 | 133.410004 | 129.869995 | 132.369995 | 132.369995 | 27819700 | 128.320007 | |
2929 | 2023-08-24 | 133.949997 | 134.250000 | 129.570007 | 129.779999 | 129.779999 | 28500700 | 129.869995 | |
2930 | 2023-08-25 | 129.539993 | 130.759995 | 127.250000 | 129.880005 | 129.880005 | 26744800 | 129.570007 |
2931 rows × 9 columns
12. 이동 평균, 누적 평균
expanding = GOOGL.Low.expanding() # 누적 평균
print(expanding.mean())
rolling = GOOGL.Low.rolling(window = 5) # 이동평균
print(rolling.mean())
0 16.325577
1 16.428804
2 16.426594
3 16.385198
4 16.217418
...
2926 57.929351
2927 57.953392
2928 57.977945
2929 58.002379
2930 58.026005
Name: Low, Length: 2931, dtype: float64
0 NaN
1 NaN
2 NaN
3 NaN
4 16.217418
...
2926 127.875998
2927 127.684000
2928 128.083998
2929 128.140001
2930 128.314001
Name: Low, Length: 2931, dtype: float64
Comments