2022/01/11/TUE
• 19 min read
import numpy as np
import pandas as pd
df=pd.read_csv('https://raw.githubusercontent.com/PacktPublishing/Pandas-Cookbook/master/data/flights.csv')
df.head(2)
| MONTH | DAY | WEEKDAY | AIRLINE | ORG_AIR | DEST_AIR | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 4 | WN | LAX | SLC | 1625 | 58.0 | 94.0 | 590 | 1905 | 65.0 | 0 | 0 |
| 1 | 1 | 1 | 4 | UA | DEN | IAD | 823 | 7.0 | 154.0 | 1452 | 1333 | -13.0 | 0 | 0 |
grouped_df = df.groupby(by='AIRLINE') # 데이터프레임을 각 항공사 별로 나눔
grouped_df.get_group('AA') # groupby로 나눈 표에서 AA만 얻을 때
| MONTH | DAY | WEEKDAY | AIRLINE | ORG_AIR | DEST_AIR | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | 1 | 1 | 4 | AA | DFW | DCA | 1555 | 7.0 | 126.0 | 1192 | 1935 | -7.0 | 0 | 0 |
| 6 | 1 | 1 | 4 | AA | DFW | MSY | 1250 | 84.0 | 64.0 | 447 | 1410 | 83.0 | 0 | 0 |
| 8 | 1 | 1 | 4 | AA | ORD | STL | 1845 | -5.0 | 44.0 | 258 | 1950 | -5.0 | 0 | 0 |
| 15 | 1 | 1 | 4 | AA | DEN | DFW | 1445 | -6.0 | 93.0 | 641 | 1745 | 4.0 | 0 | 0 |
| 26 | 1 | 1 | 4 | AA | LAX | AUS | 1430 | 33.0 | 157.0 | 1242 | 1925 | 41.0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58470 | 12 | 31 | 4 | AA | DFW | FAT | 1020 | -3.0 | 196.0 | 1313 | 1156 | -2.0 | 0 | 0 |
| 58475 | 12 | 31 | 4 | AA | IAH | CLT | 710 | 1.0 | 113.0 | 912 | 1037 | -12.0 | 0 | 0 |
| 58476 | 12 | 31 | 4 | AA | DFW | TPA | 1020 | -3.0 | 121.0 | 929 | 1340 | -6.0 | 0 | 0 |
| 58479 | 12 | 31 | 4 | AA | DFW | ELP | 1200 | 3.0 | 94.0 | 551 | 1250 | 13.0 | 0 | 0 |
| 58487 | 12 | 31 | 4 | AA | SFO | DFW | 515 | 5.0 | 166.0 | 1464 | 1045 | -19.0 | 0 | 0 |
8900 rows × 14 columns
grouped_df.groups
- 각 항공사별 data가 어떤 행에 들어가 있는지 dict형태로 들어가있음
for a in grouped_df.groups:
print(a) # 각 표마다 위에 항공사 이름 표기
display(grouped_df.get_group(a)) # 항공사 별 df를 얻을 수 있음
AA
| MONTH | DAY | WEEKDAY | AIRLINE | ORG_AIR | DEST_AIR | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | 1 | 1 | 4 | AA | DFW | DCA | 1555 | 7.0 | 126.0 | 1192 | 1935 | -7.0 | 0 | 0 |
| 6 | 1 | 1 | 4 | AA | DFW | MSY | 1250 | 84.0 | 64.0 | 447 | 1410 | 83.0 | 0 | 0 |
| 8 | 1 | 1 | 4 | AA | ORD | STL | 1845 | -5.0 | 44.0 | 258 | 1950 | -5.0 | 0 | 0 |
| 15 | 1 | 1 | 4 | AA | DEN | DFW | 1445 | -6.0 | 93.0 | 641 | 1745 | 4.0 | 0 | 0 |
| 26 | 1 | 1 | 4 | AA | LAX | AUS | 1430 | 33.0 | 157.0 | 1242 | 1925 | 41.0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58470 | 12 | 31 | 4 | AA | DFW | FAT | 1020 | -3.0 | 196.0 | 1313 | 1156 | -2.0 | 0 | 0 |
| 58475 | 12 | 31 | 4 | AA | IAH | CLT | 710 | 1.0 | 113.0 | 912 | 1037 | -12.0 | 0 | 0 |
| 58476 | 12 | 31 | 4 | AA | DFW | TPA | 1020 | -3.0 | 121.0 | 929 | 1340 | -6.0 | 0 | 0 |
| 58479 | 12 | 31 | 4 | AA | DFW | ELP | 1200 | 3.0 | 94.0 | 551 | 1250 | 13.0 | 0 | 0 |
| 58487 | 12 | 31 | 4 | AA | SFO | DFW | 515 | 5.0 | 166.0 | 1464 | 1045 | -19.0 | 0 | 0 |
8900 rows × 14 columns
AS
| MONTH | DAY | WEEKDAY | AIRLINE | ORG_AIR | DEST_AIR | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 38 | 1 | 1 | 4 | AS | PHX | SEA | 1505 | -2.0 | 155.0 | 1107 | 1702 | -3.0 | 0 | 0 |
| 198 | 1 | 2 | 5 | AS | LAX | SEA | 2110 | 5.0 | 145.0 | 954 | 2352 | 8.0 | 0 | 0 |
| 241 | 1 | 2 | 5 | AS | LAS | PDX | 650 | -5.0 | 117.0 | 763 | 906 | -3.0 | 0 | 0 |
| 277 | 1 | 2 | 5 | AS | ORD | ANC | 935 | -1.0 | 402.0 | 2846 | 1339 | -6.0 | 0 | 0 |
| 397 | 1 | 3 | 6 | AS | LAS | SEA | 1300 | 48.0 | 137.0 | 867 | 1535 | 47.0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58305 | 12 | 30 | 3 | AS | LAX | SEA | 1325 | -2.0 | 134.0 | 954 | 1608 | -7.0 | 0 | 0 |
| 58355 | 12 | 31 | 4 | AS | PHX | SEA | 1200 | -5.0 | 145.0 | 1107 | 1407 | -24.0 | 0 | 0 |
| 58404 | 12 | 31 | 4 | AS | SFO | SLC | 2110 | -2.0 | 80.0 | 599 | 2358 | -4.0 | 0 | 0 |
| 58407 | 12 | 31 | 4 | AS | SFO | PDX | 645 | -2.0 | 81.0 | 550 | 832 | -3.0 | 0 | 0 |
| 58428 | 12 | 31 | 4 | AS | LAX | SEA | 1420 | -8.0 | 127.0 | 954 | 1709 | -25.0 | 0 | 0 |
768 rows × 14 columns
B6
| MONTH | DAY | WEEKDAY | AIRLINE | ORG_AIR | DEST_AIR | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 123 | 1 | 1 | 4 | B6 | LAS | BOS | 1230 | 0.0 | 246.0 | 2381 | 2026 | -27.0 | 0 | 0 |
| 127 | 1 | 1 | 4 | B6 | LAS | BOS | 2359 | 68.0 | 247.0 | 2381 | 749 | 46.0 | 0 | 0 |
| 239 | 1 | 2 | 5 | B6 | ORD | BOS | 540 | -8.0 | 96.0 | 867 | 856 | -22.0 | 0 | 0 |
| 333 | 1 | 3 | 6 | B6 | LAX | FLL | 2237 | 32.0 | 270.0 | 2342 | 619 | 42.0 | 0 | 0 |
| 548 | 1 | 4 | 7 | B6 | SFO | FLL | 2307 | -4.0 | 298.0 | 2583 | 724 | -1.0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58262 | 12 | 30 | 3 | B6 | SFO | LGB | 1921 | -6.0 | 57.0 | 354 | 2038 | -14.0 | 0 | 0 |
| 58301 | 12 | 30 | 3 | B6 | LAX | JFK | 630 | 4.0 | 285.0 | 2475 | 1445 | -6.0 | 0 | 0 |
| 58425 | 12 | 31 | 4 | B6 | ORD | SJU | 700 | 239.0 | 250.0 | 2072 | 1335 | 239.0 | 0 | 0 |
| 58477 | 12 | 31 | 4 | B6 | DFW | BOS | 1145 | 12.0 | 161.0 | 1562 | 1608 | -14.0 | 0 | 0 |
| 58483 | 12 | 31 | 4 | B6 | PHX | BOS | 2236 | -12.0 | 231.0 | 2300 | 515 | -45.0 | 0 | 0 |
543 rows × 14 columns
DL
| MONTH | DAY | WEEKDAY | AIRLINE | ORG_AIR | DEST_AIR | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 53 | 1 | 1 | 4 | DL | LAS | MSP | 713 | -5.0 | 156.0 | 1299 | 1220 | -18.0 | 0 | 0 |
| 57 | 1 | 1 | 4 | DL | MSP | RSW | 700 | -1.0 | 169.0 | 1416 | 1130 | -20.0 | 0 | 0 |
| 77 | 1 | 1 | 4 | DL | LAX | ATL | 1130 | 24.0 | 217.0 | 1947 | 1840 | 16.0 | 0 | 0 |
| 79 | 1 | 1 | 4 | DL | LAX | CMH | 2146 | -3.0 | 223.0 | 1995 | 459 | -13.0 | 0 | 0 |
| 85 | 1 | 1 | 4 | DL | ATL | OKC | 2059 | -4.0 | 116.0 | 761 | 2227 | -12.0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58440 | 12 | 31 | 4 | DL | ATL | CVG | 1611 | -4.0 | 61.0 | 373 | 1736 | -6.0 | 0 | 0 |
| 58448 | 12 | 31 | 4 | DL | ATL | SRQ | 1610 | 0.0 | 61.0 | 444 | 1740 | -13.0 | 0 | 0 |
| 58464 | 12 | 31 | 4 | DL | LAX | SFO | 700 | 108.0 | 54.0 | 337 | 825 | 105.0 | 0 | 0 |
| 58467 | 12 | 31 | 4 | DL | ATL | IND | 1235 | -3.0 | 63.0 | 432 | 1407 | -13.0 | 0 | 0 |
| 58485 | 12 | 31 | 4 | DL | ATL | CMH | 2206 | 2.0 | 64.0 | 447 | 2338 | -8.0 | 0 | 0 |
10601 rows × 14 columns
EV
| MONTH | DAY | WEEKDAY | AIRLINE | ORG_AIR | DEST_AIR | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 11 | 1 | 1 | 4 | EV | ORD | JAN | 1155 | 6.0 | 113.0 | 677 | 1403 | 5.0 | 0 | 0 |
| 13 | 1 | 1 | 4 | EV | ORD | CMH | 1010 | -2.0 | 46.0 | 296 | 1228 | -9.0 | 0 | 0 |
| 29 | 1 | 1 | 4 | EV | ORD | IND | 1025 | -6.0 | 29.0 | 177 | 1228 | -19.0 | 0 | 0 |
| 40 | 1 | 1 | 4 | EV | IAH | CLE | 1038 | -3.0 | 126.0 | 1091 | 1425 | -18.0 | 0 | 0 |
| 69 | 1 | 1 | 4 | EV | ATL | RAP | 1930 | -5.0 | 181.0 | 1230 | 2104 | -15.0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58445 | 12 | 31 | 4 | EV | DFW | TXK | 850 | -5.0 | 30.0 | 181 | 948 | -17.0 | 0 | 0 |
| 58452 | 12 | 31 | 4 | EV | DFW | SHV | 1650 | -4.0 | 32.0 | 190 | 1746 | -12.0 | 0 | 0 |
| 58459 | 12 | 31 | 4 | EV | MSP | ORD | 1435 | 18.0 | 61.0 | 334 | 1609 | 3.0 | 0 | 0 |
| 58463 | 12 | 31 | 4 | EV | ORD | MSN | 1220 | 18.0 | 32.0 | 108 | 1319 | 27.0 | 0 | 0 |
| 58486 | 12 | 31 | 4 | EV | DFW | LFT | 850 | 21.0 | 52.0 | 351 | 1012 | 14.0 | 0 | 0 |
5858 rows × 14 columns
F9
| MONTH | DAY | WEEKDAY | AIRLINE | ORG_AIR | DEST_AIR | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 7 | 1 | 1 | 4 | F9 | SFO | PHX | 1020 | -7.0 | 91.0 | 651 | 1315 | -6.0 | 0 | 0 |
| 93 | 1 | 1 | 4 | F9 | ATL | DEN | 859 | 16.0 | 181.0 | 1199 | 1026 | 10.0 | 0 | 0 |
| 209 | 1 | 2 | 5 | F9 | MSP | DEN | 1025 | -6.0 | 97.0 | 680 | 1134 | -13.0 | 0 | 0 |
| 232 | 1 | 2 | 5 | F9 | DEN | PHX | 2040 | -7.0 | 83.0 | 602 | 2228 | -18.0 | 0 | 0 |
| 247 | 1 | 2 | 5 | F9 | ORD | ATL | 730 | 10.0 | 86.0 | 606 | 1020 | 23.0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58288 | 12 | 30 | 3 | F9 | DEN | ORD | 625 | -4.0 | 136.0 | 888 | 1000 | 14.0 | 0 | 0 |
| 58331 | 12 | 30 | 3 | F9 | ORD | PHX | 825 | 18.0 | 207.0 | 1440 | 1127 | 14.0 | 0 | 0 |
| 58447 | 12 | 31 | 4 | F9 | DEN | LAS | 1245 | 13.0 | 94.0 | 628 | 1340 | 13.0 | 0 | 0 |
| 58449 | 12 | 31 | 4 | F9 | DEN | MCO | 645 | 11.0 | 169.0 | 1546 | 1224 | -11.0 | 0 | 0 |
| 58488 | 12 | 31 | 4 | F9 | LAS | SFO | 1910 | 13.0 | 71.0 | 414 | 2050 | 4.0 | 0 | 0 |
1317 rows × 14 columns
HA
| MONTH | DAY | WEEKDAY | AIRLINE | ORG_AIR | DEST_AIR | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 582 | 1 | 4 | 7 | HA | LAX | OGG | 1115 | -11.0 | 310.0 | 2486 | 1500 | -27.0 | 0 | 0 |
| 712 | 1 | 5 | 1 | HA | LAS | HNL | 900 | -5.0 | 357.0 | 2762 | 1315 | 5.0 | 0 | 0 |
| 878 | 1 | 6 | 2 | HA | PHX | HNL | 800 | 1.0 | 374.0 | 2917 | 1140 | 3.0 | 0 | 0 |
| 1053 | 1 | 7 | 3 | HA | LAX | HNL | 1705 | 0.0 | 332.0 | 2556 | 2055 | -2.0 | 0 | 0 |
| 1269 | 1 | 8 | 4 | HA | LAX | HNL | 1000 | -1.0 | 335.0 | 2556 | 1350 | 0.0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 55883 | 12 | 16 | 3 | HA | LAX | HNL | 835 | 1.0 | 314.0 | 2556 | 1235 | -18.0 | 0 | 0 |
| 56174 | 12 | 18 | 5 | HA | LAX | HNL | 835 | -5.0 | 342.0 | 2556 | 1235 | -4.0 | 0 | 0 |
| 56350 | 12 | 19 | 6 | HA | PHX | HNL | 800 | -5.0 | 363.0 | 2917 | 1155 | -34.0 | 0 | 0 |
| 56816 | 12 | 21 | 1 | HA | LAX | LIH | 740 | 20.0 | 303.0 | 2615 | 1145 | -11.0 | 0 | 0 |
| 58391 | 12 | 31 | 4 | HA | LAX | HNL | 1000 | 0.0 | 324.0 | 2556 | 1350 | -9.0 | 0 | 0 |
112 rows × 14 columns
MQ
| MONTH | DAY | WEEKDAY | AIRLINE | ORG_AIR | DEST_AIR | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 1 | 1 | 4 | MQ | DFW | VPS | 1305 | 36.0 | 85.0 | 641 | 1453 | 35.0 | 0 | 0 |
| 10 | 1 | 1 | 4 | MQ | DFW | DRO | 1335 | 28.0 | 104.0 | 674 | 1438 | 28.0 | 0 | 0 |
| 18 | 1 | 1 | 4 | MQ | ORD | DAY | 2220 | 19.0 | 37.0 | 240 | 23 | 20.0 | 0 | 0 |
| 24 | 1 | 1 | 4 | MQ | DFW | BTR | 730 | NaN | NaN | 383 | 853 | NaN | 0 | 1 |
| 50 | 1 | 1 | 4 | MQ | ORD | CID | 1135 | -7.0 | 37.0 | 196 | 1238 | -15.0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58415 | 12 | 31 | 4 | MQ | ORD | FWA | 845 | -2.0 | 37.0 | 157 | 1045 | -4.0 | 0 | 0 |
| 58426 | 12 | 31 | 4 | MQ | DFW | FAR | 1154 | 4.0 | 124.0 | 968 | 1437 | -13.0 | 0 | 0 |
| 58468 | 12 | 31 | 4 | MQ | DFW | OKC | 1720 | -3.0 | 31.0 | 175 | 1819 | -10.0 | 0 | 0 |
| 58474 | 12 | 31 | 4 | MQ | ORD | FNT | 829 | 4.0 | 40.0 | 223 | 1034 | -4.0 | 0 | 0 |
| 58484 | 12 | 31 | 4 | MQ | ORD | DSM | 1333 | 1.0 | 57.0 | 299 | 1455 | -7.0 | 0 | 0 |
3471 rows × 14 columns
NK
| MONTH | DAY | WEEKDAY | AIRLINE | ORG_AIR | DEST_AIR | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 17 | 1 | 1 | 4 | NK | DEN | DTW | 1952 | 37.0 | 124.0 | 1123 | 31 | 54.0 | 0 | 0 |
| 74 | 1 | 1 | 4 | NK | PHX | DFW | 159 | -1.0 | 103.0 | 868 | 502 | 1.0 | 0 | 0 |
| 95 | 1 | 1 | 4 | NK | LAS | OAK | 1115 | 22.0 | 62.0 | 407 | 1246 | 10.0 | 0 | 0 |
| 109 | 1 | 1 | 4 | NK | MSP | ORD | 616 | 2.0 | 49.0 | 334 | 745 | -19.0 | 0 | 0 |
| 166 | 1 | 2 | 5 | NK | LAS | PDX | 1535 | -8.0 | 123.0 | 763 | 1754 | -4.0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58160 | 12 | 29 | 2 | NK | MSP | MCO | 740 | 0.0 | 171.0 | 1310 | 1158 | 33.0 | 0 | 0 |
| 58197 | 12 | 30 | 3 | NK | IAH | ORD | 755 | -8.0 | 136.0 | 925 | 1030 | -2.0 | 0 | 0 |
| 58437 | 12 | 31 | 4 | NK | ORD | DFW | 1952 | 15.0 | 135.0 | 802 | 2225 | 23.0 | 0 | 0 |
| 58461 | 12 | 31 | 4 | NK | ORD | LGA | 1801 | -5.0 | 84.0 | 733 | 2109 | -26.0 | 0 | 0 |
| 58469 | 12 | 31 | 4 | NK | LAS | MSY | 1950 | 124.0 | 163.0 | 1500 | 112 | 101.0 | 0 | 0 |
1516 rows × 14 columns
OO
| MONTH | DAY | WEEKDAY | AIRLINE | ORG_AIR | DEST_AIR | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 12 | 1 | 1 | 4 | OO | ORD | MSP | 1510 | 2.0 | 65.0 | 334 | 1646 | 4.0 | 0 | 0 |
| 16 | 1 | 1 | 4 | OO | DEN | SGU | 1105 | 21.0 | 66.0 | 517 | 1249 | 20.0 | 0 | 0 |
| 22 | 1 | 1 | 4 | OO | LAS | LAX | 1544 | -4.0 | 39.0 | 236 | 1655 | -12.0 | 0 | 0 |
| 25 | 1 | 1 | 4 | OO | ORD | SPI | 2110 | -4.0 | 31.0 | 174 | 2205 | 5.0 | 0 | 0 |
| 27 | 1 | 1 | 4 | OO | IAH | JAC | 1104 | -1.0 | 161.0 | 1265 | 1316 | -1.0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58451 | 12 | 31 | 4 | OO | ATL | FWA | 1905 | -3.0 | 72.0 | 508 | 2051 | -14.0 | 0 | 0 |
| 58480 | 12 | 31 | 4 | OO | MSP | BIS | 1310 | -2.0 | 65.0 | 386 | 1449 | -9.0 | 0 | 0 |
| 58482 | 12 | 31 | 4 | OO | DEN | CPR | 1850 | -2.0 | 38.0 | 230 | 1956 | 1.0 | 0 | 0 |
| 58489 | 12 | 31 | 4 | OO | SFO | SBA | 1846 | -6.0 | 46.0 | 262 | 1956 | -5.0 | 0 | 0 |
| 58491 | 12 | 31 | 4 | OO | SFO | BOI | 859 | 5.0 | 73.0 | 522 | 1146 | -1.0 | 0 | 0 |
6588 rows × 14 columns
UA
| MONTH | DAY | WEEKDAY | AIRLINE | ORG_AIR | DEST_AIR | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 1 | 1 | 4 | UA | DEN | IAD | 823 | 7.0 | 154.0 | 1452 | 1333 | -13.0 | 0 | 0 |
| 5 | 1 | 1 | 4 | UA | IAH | SAN | 1450 | 1.0 | 178.0 | 1303 | 1620 | -14.0 | 0 | 0 |
| 9 | 1 | 1 | 4 | UA | IAH | SJC | 925 | 3.0 | 215.0 | 1608 | 1136 | -14.0 | 0 | 0 |
| 14 | 1 | 1 | 4 | UA | IAH | IND | 1426 | -1.0 | 102.0 | 844 | 1742 | -20.0 | 0 | 0 |
| 21 | 1 | 1 | 4 | UA | ORD | CLE | 2102 | 48.0 | 47.0 | 315 | 2320 | 41.0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58422 | 12 | 31 | 4 | UA | DEN | SAN | 1535 | 0.0 | 124.0 | 853 | 1704 | -13.0 | 0 | 0 |
| 58432 | 12 | 31 | 4 | UA | ORD | SAN | 1915 | 7.0 | 238.0 | 1723 | 2143 | -3.0 | 0 | 0 |
| 58457 | 12 | 31 | 4 | UA | ORD | LAX | 659 | -1.0 | 241.0 | 1744 | 946 | 0.0 | 0 | 0 |
| 58460 | 12 | 31 | 4 | UA | SFO | PHL | 2235 | -6.0 | 265.0 | 2521 | 700 | -42.0 | 0 | 0 |
| 58481 | 12 | 31 | 4 | UA | IAH | LAX | 1433 | 1.0 | 197.0 | 1379 | 1625 | -13.0 | 0 | 0 |
7792 rows × 14 columns
US
| MONTH | DAY | WEEKDAY | AIRLINE | ORG_AIR | DEST_AIR | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 31 | 1 | 1 | 4 | US | PHX | DEN | 1810 | 29.0 | 94.0 | 602 | 1954 | 49.0 | 0 | 0 |
| 35 | 1 | 1 | 4 | US | ORD | PHL | 1600 | -2.0 | 80.0 | 678 | 1857 | -9.0 | 0 | 0 |
| 49 | 1 | 1 | 4 | US | IAH | PHX | 1445 | -1.0 | 147.0 | 1009 | 1638 | -7.0 | 0 | 0 |
| 96 | 1 | 1 | 4 | US | ATL | PHL | 1445 | -4.0 | 90.0 | 666 | 1644 | -11.0 | 0 | 0 |
| 104 | 1 | 1 | 4 | US | MSP | PHX | 730 | -3.0 | 174.0 | 1276 | 1010 | -20.0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 31514 | 6 | 30 | 2 | US | DEN | PHL | 705 | -4.0 | 188.0 | 1558 | 1240 | 1.0 | 0 | 0 |
| 31523 | 6 | 30 | 2 | US | PHX | DEN | 1451 | 6.0 | 85.0 | 602 | 1738 | 7.0 | 0 | 0 |
| 31535 | 6 | 30 | 2 | US | PHX | AUS | 840 | -3.0 | 116.0 | 872 | 1304 | -11.0 | 0 | 0 |
| 31561 | 6 | 30 | 2 | US | ORD | PHX | 710 | -5.0 | 170.0 | 1440 | 901 | -50.0 | 0 | 0 |
| 31582 | 6 | 30 | 2 | US | PHX | OGG | 800 | -4.0 | 356.0 | 2845 | 1127 | -13.0 | 0 | 0 |
1615 rows × 14 columns
VX
| MONTH | DAY | WEEKDAY | AIRLINE | ORG_AIR | DEST_AIR | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 56 | 1 | 1 | 4 | VX | LAS | SFO | 900 | 23.0 | 65.0 | 414 | 1035 | 11.0 | 0 | 0 |
| 227 | 1 | 2 | 5 | VX | SFO | LAS | 1220 | -5.0 | 68.0 | 414 | 1350 | -5.0 | 0 | 0 |
| 243 | 1 | 2 | 5 | VX | SFO | SEA | 700 | -4.0 | 104.0 | 679 | 905 | -1.0 | 0 | 0 |
| 417 | 1 | 3 | 6 | VX | SFO | LAS | 900 | -2.0 | 62.0 | 414 | 1030 | -11.0 | 0 | 0 |
| 432 | 1 | 3 | 6 | VX | SFO | SEA | 2035 | -2.0 | 106.0 | 679 | 2240 | -2.0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58332 | 12 | 30 | 3 | VX | SFO | LAS | 1950 | -3.0 | 58.0 | 414 | 2120 | -4.0 | 0 | 0 |
| 58383 | 12 | 31 | 4 | VX | SFO | PSP | 1630 | -7.0 | 65.0 | 421 | 1755 | -12.0 | 0 | 0 |
| 58400 | 12 | 31 | 4 | VX | SFO | LAX | 1125 | -4.0 | 54.0 | 337 | 1245 | -10.0 | 0 | 0 |
| 58471 | 12 | 31 | 4 | VX | SFO | LAX | 700 | 6.0 | 51.0 | 337 | 820 | 3.0 | 0 | 0 |
| 58478 | 12 | 31 | 4 | VX | SFO | LAX | 1530 | 29.0 | 52.0 | 337 | 1650 | 22.0 | 0 | 0 |
993 rows × 14 columns
WN
| MONTH | DAY | WEEKDAY | AIRLINE | ORG_AIR | DEST_AIR | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 4 | WN | LAX | SLC | 1625 | 58.0 | 94.0 | 590 | 1905 | 65.0 | 0 | 0 |
| 4 | 1 | 1 | 4 | WN | LAX | MCI | 1720 | 48.0 | 166.0 | 1363 | 2225 | 39.0 | 0 | 0 |
| 19 | 1 | 1 | 4 | WN | PHX | LAX | 1640 | 51.0 | 58.0 | 370 | 1700 | 59.0 | 0 | 0 |
| 20 | 1 | 1 | 4 | WN | ATL | BWI | 1115 | 1.0 | 76.0 | 577 | 1305 | -15.0 | 0 | 0 |
| 23 | 1 | 1 | 4 | WN | ATL | HOU | 1555 | 30.0 | 113.0 | 696 | 1720 | 18.0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58455 | 12 | 31 | 4 | WN | LAX | SMF | 1420 | -2.0 | 64.0 | 373 | 1540 | -7.0 | 0 | 0 |
| 58458 | 12 | 31 | 4 | WN | LAS | SFO | 1825 | 25.0 | 67.0 | 414 | 1955 | 17.0 | 0 | 0 |
| 58472 | 12 | 31 | 4 | WN | PHX | HOU | 845 | 5.0 | 119.0 | 1020 | 1210 | 7.0 | 0 | 0 |
| 58473 | 12 | 31 | 4 | WN | DEN | PDX | 1205 | 4.0 | 130.0 | 991 | 1400 | -13.0 | 0 | 0 |
| 58490 | 12 | 31 | 4 | WN | MSP | ATL | 525 | 39.0 | 124.0 | 907 | 855 | 34.0 | 0 | 0 |
8418 rows × 14 columns
df.groupby(by='AIRLINE').agg({'ARR_DELAY':np.mean})
df.groupby(by='AIRLINE')['ARR_DELAY'].agg(np.mean)
- 아래 셀과 동일하게 사용 가능
df.groupby(by='AIRLINE').agg({'ARR_DELAY':'mean'})
| ARR_DELAY | |
|---|---|
| AIRLINE | |
| AA | 5.542661 |
| AS | -0.833333 |
| B6 | 8.692593 |
| DL | 0.339691 |
| EV | 7.034580 |
| F9 | 13.630651 |
| HA | 4.972973 |
| MQ | 6.860591 |
| NK | 18.436070 |
| OO | 7.593463 |
| UA | 7.765755 |
| US | 1.681105 |
| VX | 5.348884 |
| WN | 6.397353 |
def f(x): return -np.mean(x)
df.groupby(by='AIRLINE').agg({'ARR_DELAY':f})
| ARR_DELAY | |
|---|---|
| AIRLINE | |
| AA | -5.542661 |
| AS | 0.833333 |
| B6 | -8.692593 |
| DL | -0.339691 |
| EV | -7.034580 |
| F9 | -13.630651 |
| HA | -4.972973 |
| MQ | -6.860591 |
| NK | -18.436070 |
| OO | -7.593463 |
| UA | -7.765755 |
| US | -1.681105 |
| VX | -5.348884 |
| WN | -6.397353 |
df.groupby(by='AIRLINE').agg({'ARR_DELAY':lambda x: -np.mean(x)})
| ARR_DELAY | |
|---|---|
| AIRLINE | |
| AA | -5.542661 |
| AS | 0.833333 |
| B6 | -8.692593 |
| DL | -0.339691 |
| EV | -7.034580 |
| F9 | -13.630651 |
| HA | -4.972973 |
| MQ | -6.860591 |
| NK | -18.436070 |
| OO | -7.593463 |
| UA | -7.765755 |
| US | -1.681105 |
| VX | -5.348884 |
| WN | -6.397353 |
df.groupby(by='AIRLINE')['ARR_DELAY'].agg(lambda x: -np.mean(x))
AIRLINE AA -5.542661 AS 0.833333 B6 -8.692593 DL -0.339691 EV -7.034580 F9 -13.630651 HA -4.972973 MQ -6.860591 NK -18.436070 OO -7.593463 UA -7.765755 US -1.681105 VX -5.348884 WN -6.397353 Name: ARR_DELAY, dtype: float64
def f(x,y): return np.mean(x)**y
df.groupby(by='AIRLINE')['ARR_DELAY'].agg(f,2) #2는 y값임
AIRLINE AA 30.721086 AS 0.694444 B6 75.561166 DL 0.115390 EV 49.485310 F9 185.794656 HA 24.730460 MQ 47.067715 NK 339.888677 OO 57.660681 UA 60.306954 US 2.826113 VX 28.610564 WN 40.926120 Name: ARR_DELAY, dtype: float64
df.groupby(by='AIRLINE').agg({'ARR_DELAY': lambda x: f(x,2)})
df.groupby(by=['AIRLINE','WEEKDAY']).agg({'CANCELLED':'sum'})
| CANCELLED | ||
|---|---|---|
| AIRLINE | WEEKDAY | |
| AA | 1 | 41 |
| 2 | 9 | |
| 3 | 16 | |
| 4 | 20 | |
| 5 | 18 | |
| ... | ... | ... |
| WN | 3 | 18 |
| 4 | 10 | |
| 5 | 7 | |
| 6 | 10 | |
| 7 | 7 |
98 rows × 1 columns
df.groupby(by=['AIRLINE','WEEKDAY']).agg({'CANCELLED':np.sum})
df.groupby(by=['AIRLINE','WEEKDAY'])['CANCELLED'].agg('sum')
df.groupby(by=['AIRLINE','WEEKDAY'])['CANCELLED'].agg(np.sum)
df.groupby(by=['AIRLINE','WEEKDAY'])['CANCELLED'].sum()
- 위 셀과 동일하게 적용 가능
df.groupby(by=['AIRLINE','WEEKDAY']).agg({'CANCELLED':['sum','mean'],'DIVERTED':['sum','mean']})
| CANCELLED | DIVERTED | ||||
|---|---|---|---|---|---|
| sum | mean | sum | mean | ||
| AIRLINE | WEEKDAY | ||||
| AA | 1 | 41 | 0.032106 | 6 | 0.004699 |
| 2 | 9 | 0.007341 | 2 | 0.001631 | |
| 3 | 16 | 0.011949 | 2 | 0.001494 | |
| 4 | 20 | 0.015004 | 5 | 0.003751 | |
| 5 | 18 | 0.014151 | 1 | 0.000786 | |
| ... | ... | ... | ... | ... | ... |
| WN | 3 | 18 | 0.014118 | 2 | 0.001569 |
| 4 | 10 | 0.007911 | 4 | 0.003165 | |
| 5 | 7 | 0.005828 | 0 | 0.000000 | |
| 6 | 10 | 0.010132 | 3 | 0.003040 | |
| 7 | 7 | 0.006066 | 3 | 0.002600 | |
98 rows × 4 columns
df.groupby(by=['AIRLINE','WEEKDAY']).agg({'CANCELLED':[np.sum,np.mean],'DIVERTED':[np.sum,np.mean]})
df.groupby(by=['AIRLINE','WEEKDAY'])[['CANCELLED','DIVERTED']].agg(['sum','mean'])
df.groupby(by=['AIRLINE','WEEKDAY'])[['CANCELLED','DIVERTED']].agg([np.sum,np.mean])
- 위 셀과 동일하게 적용 가능
df.groupby(by=['AIRLINE','WEEKDAY']).agg({'CANCELLED':['sum','mean','size'],'AIR_TIME':['mean','var']})
| CANCELLED | AIR_TIME | |||||
|---|---|---|---|---|---|---|
| sum | mean | size | mean | var | ||
| AIRLINE | WEEKDAY | |||||
| AA | 1 | 41 | 0.032106 | 1277 | 147.610569 | 5393.806723 |
| 2 | 9 | 0.007341 | 1226 | 143.851852 | 5359.890719 | |
| 3 | 16 | 0.011949 | 1339 | 144.514005 | 5378.854539 | |
| 4 | 20 | 0.015004 | 1333 | 141.124618 | 4791.524627 | |
| 5 | 18 | 0.014151 | 1272 | 145.430966 | 5884.592076 | |
| ... | ... | ... | ... | ... | ... | ... |
| WN | 3 | 18 | 0.014118 | 1275 | 104.219920 | 2901.873447 |
| 4 | 10 | 0.007911 | 1264 | 107.200800 | 2966.568935 | |
| 5 | 7 | 0.005828 | 1201 | 107.893635 | 3268.717093 | |
| 6 | 10 | 0.010132 | 987 | 109.247433 | 3152.753719 | |
| 7 | 7 | 0.006066 | 1154 | 107.602273 | 3183.126889 | |
98 rows × 5 columns
df.groupby(by=['AIRLINE','WEEKDAY'])\
.agg({'CANCELLED':[np.sum,np.mean,len],'AIR_TIME':[np.mean,lambda x: np.std(x,ddof=1)**2]})
| CANCELLED | AIR_TIME | |||||
|---|---|---|---|---|---|---|
| sum | mean | len | mean | <lambda_0> | ||
| AIRLINE | WEEKDAY | |||||
| AA | 1 | 41 | 0.032106 | 1277 | 147.610569 | 5393.806723 |
| 2 | 9 | 0.007341 | 1226 | 143.851852 | 5359.890719 | |
| 3 | 16 | 0.011949 | 1339 | 144.514005 | 5378.854539 | |
| 4 | 20 | 0.015004 | 1333 | 141.124618 | 4791.524627 | |
| 5 | 18 | 0.014151 | 1272 | 145.430966 | 5884.592076 | |
| ... | ... | ... | ... | ... | ... | ... |
| WN | 3 | 18 | 0.014118 | 1275 | 104.219920 | 2901.873447 |
| 4 | 10 | 0.007911 | 1264 | 107.200800 | 2966.568935 | |
| 5 | 7 | 0.005828 | 1201 | 107.893635 | 3268.717093 | |
| 6 | 10 | 0.010132 | 987 | 109.247433 | 3152.753719 | |
| 7 | 7 | 0.006066 | 1154 | 107.602273 | 3183.126889 | |
98 rows × 5 columns
df.DIST.hist()
# DIST라는 열이 있음
<AxesSubplot:>
bins=[-np.inf, 400, 600, 800, 1000, 1200, np.inf]
cuts=pd.cut(df.DIST,bins=bins,labels=['Q1','Q2','Q3','Q4','Q5','Q6'])
cuts
0 Q2
1 Q6
2 Q3
3 Q5
4 Q6
..
58487 Q6
58488 Q2
58489 Q1
58490 Q4
58491 Q2
Name: DIST, Length: 58492, dtype: category
Categories (6, object): ['Q1' < 'Q2' < 'Q3' < 'Q4' < 'Q5' < 'Q6']
df.groupby(by=[cuts,'AIRLINE']).agg({'DIVERTED':sum})
| DIVERTED | ||
|---|---|---|
| DIST | AIRLINE | |
| Q1 | AA | 0 |
| AS | 0 | |
| B6 | 0 | |
| DL | 1 | |
| EV | 3 | |
| ... | ... | ... |
| Q6 | OO | 4 |
| UA | 12 | |
| US | 1 | |
| VX | 1 | |
| WN | 8 |
84 rows × 1 columns
df.groupby(cuts).agg({'DIVERTED':len})
| DIVERTED | |
|---|---|
| DIST | |
| Q1 | 15027 |
| Q2 | 9130 |
| Q3 | 8553 |
| Q4 | 7542 |
| Q5 | 3889 |
| Q6 | 14351 |