< Presentation >
< Preprocessing & EDA (R Code) >
######
# SK #
######
###########
# setting #
###########
library(dplyr)
library(tidyr)
library(forcats)
library(lubridate)
library(ggplot2)
library(scales)
library(tidytext)
library(readr)
library(showtext)
font_add_google('Nanum Gothic', 'Gothic')
showtext_auto()
theme_set(theme_minimal() +
theme(plot.title = element_text(face = 'bold', colour = 'grey10'),
plot.subtitle = element_text(colour = 'grey25'),
panel.grid.major = element_line(colour = 'grey90', size = 1),
panel.grid.minor = element_line(colour = 'grey80', size = 0.5, linetype = 'dashed'),
legend.position = 'top',
legend.spacing.x = unit(0.125, 'cm'),
legend.background = element_rect(fill = NULL, linetype = 'dotted'),
strip.background = element_blank(),
strip.text = element_text(face = 'bold', colour = 'grey25', size = 11.25)))
data_sk_age = read_csv('C:/Users/user/Desktop/Big_Contest/Data/sk_flow_age.csv')
data_sk_time = read_csv('C:/Users/user/Desktop/Big_Contest/Data/sk_flow_time.csv')
data_sk_age$STD_Y = substr(data_sk_age$STD_YMD, 1, 4)
data_sk_age$STD_M = substr(data_sk_age$STD_YMD, 5, 6)
data_sk_age$STD_D = substr(data_sk_age$STD_YMD, 7, 8)
data_sk_age$STD_YMD = as.Date(paste(data_sk_age$STD_Y, '-', data_sk_age$STD_M, '-', data_sk_age$STD_D, sep = ''))
data_sk_time$STD_Y = substr(data_sk_time$STD_YMD, 1, 4)
data_sk_time$STD_M = substr(data_sk_time$STD_YMD, 5, 6)
data_sk_time$STD_D = substr(data_sk_time$STD_YMD, 7, 8)
data_sk_time$STD_YMD = as.Date(paste(data_sk_time$STD_Y, '-', data_sk_time$STD_M, '-', data_sk_time$STD_D, sep = ''))
data_sk_age$city = 'μμΈνΉλ³μ'
data_sk_age$city[data_sk_age$HDONG_CD > 2e9] = 'λꡬκ΄μμ'
data_sk_time$city = 'μμΈνΉλ³μ'
data_sk_time$city[data_sk_time$HDONG_CD > 2e9] = 'λꡬκ΄μμ'
df_age = data_sk_age %>%
select(STD_YMD, STD_Y, MAN_FLOW_POP_CNT_0004:WMAN_FLOW_POP_CNT_70U)
df_age = df_age %>%
gather(key = key, value = value, MAN_FLOW_POP_CNT_0004:WMAN_FLOW_POP_CNT_70U)
df_age$gender = substr(df_age$key, 1, 4)
df_age$gender[df_age$gender == 'MAN_'] = 'λ¨μ'
df_age$gender[df_age$gender == 'WMAN'] = 'μ¬μ'
df_age$age = substr(df_age$key, 18, 22)
df_age$age[nchar(df_age$age) == 5] = paste(substr(df_age$age[nchar(df_age$age) == 5], 2, 3), '-', substr(df_age$age[nchar(df_age$age) == 5], 4, 5), sep = '')
df_age$age[nchar(df_age$age) == 4] = paste(substr(df_age$age[nchar(df_age$age) == 4], 1, 2), '-', substr(df_age$age[nchar(df_age$age) == 4], 3, 4), sep = '')
df_age$age[df_age$age == '70U'] = '70-100'
df_age$age[df_age$age == '_7-0U'] = '70-100'
df_age$week = week(df_age$STD_YMD - 3) - 4
df_time = data_sk_time %>%
select(city, HDONG_NM, STD_YMD, STD_Y, TMST_00:TMST_23)
df_time = df_time %>%
gather(key = key, value = value, TMST_00:TMST_23)
df_time$time = substr(df_time$key, 6, 7)
df_time$week = week(df_time$STD_YMD - 3) - 4
#######
# eda #
#######
df_time %>%
group_by(STD_Y, STD_YMD) %>%
summarise(total_value = sum(value)) %>%
ggplot(aes(STD_YMD, total_value/1e4, colour = as.factor(STD_Y))) +
geom_point(size = 1.5) +
geom_line(size = 1) +
facet_wrap(~ STD_Y, scales = 'free', ncol = 1) +
guides(colour = F) +
labs(x = 'λ¬', y = 'μ λμΈκ΅¬ (λ¨μ: λ§)')
df_age %>%
filter(week != 18) %>%
mutate(STD_Y = paste(STD_Y, 'λ
', sep = '')) %>%
group_by(week, STD_Y) %>%
summarise(total_value = sum(value)/1e4) %>%
ggplot(aes(week, total_value, group = STD_Y, colour = STD_Y)) +
geom_point(size = 2.5) +
geom_line(size = 1) +
scale_y_continuous(labels = comma) +
labs(x = 'κΈ°κ° (λ¨μ: 7μΌ)', y = 'μ λμΈκ΅¬ (λ¨μ: λ§)') +
theme(legend.title = element_blank())
df_age_19 = df_age %>%
filter(STD_Y == 2019) %>%
mutate(STD_Y = paste(STD_Y, 'λ
', sep = '')) %>%
group_by(age, gender, STD_Y) %>%
summarise(total_value = sum(value)/1e4)
df_age_20 = df_age %>%
filter(STD_Y == 2020) %>%
mutate(STD_Y = paste(STD_Y, 'λ
', sep = '')) %>%
group_by(age, gender, STD_Y) %>%
summarise(total_value = sum(value)/1e4)
df_age_total = df_age_19 %>%
select(-total_value, -STD_Y)
df_age_total$decre_prop_value = -(df_age_20$total_value - df_age_19$total_value)/df_age_19$total_value*100
df_age_total %>%
ggplot(aes(age, decre_prop_value, fill = gender)) +
geom_bar(stat = 'identity', position = 'dodge', size = 1, alpha = 0.75, colour = 'black') +
scale_fill_manual(values = c('steelblue', 'pink')) +
labs(x = 'λμ΄', y = 'μ λ
λλΉ μ λμΈκ΅¬ κ°μμ¨ (%)') +
theme(legend.title = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
df_age$is_weekends = (weekdays(df_age$STD_YMD) == 'ν μμΌ' | weekdays(df_age$STD_YMD) == 'μΌμμΌ')
df_age_19 = df_age %>%
filter(STD_Y == 2019) %>%
mutate(STD_Y = paste(STD_Y, 'λ
', sep = '')) %>%
group_by(is_weekends, STD_Y) %>%
summarise(total_value = sum(value)/1e4)
df_age_20 = df_age %>%
filter(STD_Y == 2020) %>%
mutate(STD_Y = paste(STD_Y, 'λ
', sep = '')) %>%
group_by(is_weekends, STD_Y) %>%
summarise(total_value = sum(value)/1e4)
df_age_total = df_age_19 %>%
select(-total_value, -STD_Y)
df_age_total$decre_prop_value = -(df_age_20$total_value - df_age_19$total_value)/df_age_19$total_value*100
df_age_total$is_weekends = c('νμΌ', 'μ£Όλ§')
df_age_total %>%
ggplot(aes(is_weekends, decre_prop_value, fill = is_weekends)) +
geom_bar(stat = 'identity', alpha = 0.5, size = 1, colour = 'black') +
labs(x = NULL, y = 'μ λ
λλΉ μ λμΈκ΅¬ κ°μμ¨ (%)') +
scale_fill_manual(values = c('red', 'black')) +
coord_cartesian(ylim = c(22, 26)) +
guides(fill = F, colour = F)
df_age_19 = df_age %>%
filter(STD_Y == 2019) %>%
mutate(STD_Y = paste(STD_Y, 'λ
', sep = '')) %>%
group_by(is_weekends, age, STD_Y) %>%
summarise(total_value = sum(value)/1e4)
df_age_20 = df_age %>%
filter(STD_Y == 2020) %>%
mutate(STD_Y = paste(STD_Y, 'λ
', sep = '')) %>%
group_by(is_weekends, age, STD_Y) %>%
summarise(total_value = sum(value)/1e4)
df_age_total = df_age_19 %>%
select(-total_value, -STD_Y)
df_age_total$decre_prop_value = -(df_age_20$total_value - df_age_19$total_value)/df_age_19$total_value*100
df_age_total$is_weekends = if_else(df_age_total$is_weekends == T, 'μ£Όλ§', 'νμΌ')
df_age_total %>%
ggplot(aes(age, decre_prop_value, fill = is_weekends)) +
geom_bar(stat = 'identity', position = 'dodge', size = 1, alpha = 0.5, colour = 'black') +
labs(x = 'λμ΄', y = 'μ λ
λλΉ μ λμΈκ΅¬ κ°μμ¨ (%)') +
scale_fill_manual(values = c('red', 'black')) +
theme(legend.title = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
df_time$is_weekends = (weekdays(df_time$STD_YMD) == 'ν μμΌ' | weekdays(df_time$STD_YMD) == 'μΌμμΌ')
df_time$is_weekends = if_else(df_time$is_weekends == T, 'μ£Όλ§', 'νμΌ')
df_time %>%
group_by(is_weekends, STD_Y, time) %>%
summarise(total_value = sum(value)/1e4) %>%
ungroup() %>%
mutate(STD_Y = paste(STD_Y, 'λ
', sep = '')) %>%
ggplot(aes(time, total_value, group = STD_Y, colour = STD_Y)) +
geom_point(size = 2.5) +
geom_line(size = 1) +
labs(x = 'μ', y = 'μ λμΈκ΅¬ (λ¨μ: λ§)') +
scale_y_continuous(labels = comma) +
facet_wrap(~ is_weekends, scales = 'free_y') +
theme(legend.title = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
df_time_19 = df_time %>%
filter(STD_Y == 2019) %>%
group_by(is_weekends, STD_Y, time) %>%
summarise(total_value = sum(value)/1e4) %>%
ungroup()
df_time_20 = df_time %>%
filter(STD_Y == 2020) %>%
group_by(is_weekends, STD_Y, time) %>%
summarise(total_value = sum(value)/1e4) %>%
ungroup()
df_time_total = df_time_19 %>%
select(-STD_Y, -total_value)
df_time_total$decre_prop_value = -(df_time_20$total_value - df_time_19$total_value)/(df_time_19$total_value)*100
df_time_total %>%
ggplot(aes(time, decre_prop_value, group = is_weekends, colour = is_weekends)) +
geom_point(size = 2.5) +
geom_line(size = 1) +
labs(x = 'μ', y = 'μ λ
λλΉ μ λμΈκ΅¬ κ°μμ¨ (%)') +
scale_color_manual(values = c('red', 'black')) +
theme(legend.title = element_blank())
######
# SH #
######
###########
# setting #
###########
library(dplyr)
library(forcats)
library(ggplot2)
library(scales)
library(tidytext)
library(readr)
library(showtext)
font_add_google('Nanum Gothic', 'Gothic')
showtext_auto()
theme_set(theme_minimal() +
theme(plot.title = element_text(face = 'bold', colour = 'grey10'),
plot.subtitle = element_text(colour = 'grey25'),
panel.grid.major = element_line(colour = 'grey90', size = 1),
panel.grid.minor = element_line(colour = 'grey80', size = 0.5, linetype = 'dashed'),
legend.position = 'top',
legend.spacing.x = unit(0.125, 'cm'),
legend.background = element_rect(fill = NULL, linetype = 'dotted'),
strip.background = element_blank(),
strip.text = element_text(face = 'bold', colour = 'grey25', size = 11.25)))
# data_sh_resid = read_csv('C:/Users/user/Desktop/Big_Contest/Data/sh_card_resident.csv')
# data_sh_fore = read_csv('C:/Users/user/Desktop/Big_Contest/Data/sh_card_foreigner.csv')
#
# data_sh_resid$STD_Y = substr(data_sh_resid$STD_DD, 1, 4)
# data_sh_resid$STD_M = substr(data_sh_resid$STD_DD, 5, 6)
# data_sh_resid$STD_D = substr(data_sh_resid$STD_DD, 7, 8)
# data_sh_resid$STD_YMD = as.Date(paste(data_sh_resid$STD_Y, '-', data_sh_resid$STD_M, '-', data_sh_resid$STD_D, sep = ''))
#
# data_sh_fore$STD_Y = substr(data_sh_fore$STD_DD, 1, 4)
# data_sh_fore$STD_M = substr(data_sh_fore$STD_DD, 5, 6)
# data_sh_fore$STD_D = substr(data_sh_fore$STD_DD, 7, 8)
# data_sh_fore$STD_YMD = as.Date(paste(data_sh_fore$STD_Y, '-', data_sh_fore$STD_M, '-', data_sh_fore$STD_D, sep = ''))
#
# data_sh_resid$DONG_NM = data_sh_resid %>%
# select(GU_CD, DONG_CD) %>%
# mutate(DONG_NM = case_when(
# GU_CD == '140' & DONG_CD == '520' ~ 'μ곡λ',
# GU_CD == '140' & DONG_CD == '540' ~ 'ννλ',
# GU_CD == '140' & DONG_CD == '550' ~ 'λͺ
λ',
# GU_CD == '140' & DONG_CD == '570' ~ 'νλ',
# GU_CD == '140' & DONG_CD == '580' ~ 'μ₯μΆ©λ',
# GU_CD == '140' & DONG_CD == '590' ~ 'κ΄ν¬λ',
# GU_CD == '140' & DONG_CD == '605' ~ 'μμ§λ‘λ',
# GU_CD == '140' & DONG_CD == '615' ~ 'μ λΉλ',
# GU_CD == '140' & DONG_CD == '625' ~ 'λ€μ°λ',
# GU_CD == '140' & DONG_CD == '635' ~ 'μ½μλ',
# GU_CD == '140' & DONG_CD == '645' ~ 'μ²κ΅¬λ',
# GU_CD == '140' & DONG_CD == '650' ~ 'μ λΉ5λ',
# GU_CD == '140' & DONG_CD == '665' ~ 'λνλ',
# GU_CD == '140' & DONG_CD == '670' ~ 'ν©νλ',
# GU_CD == '140' & DONG_CD == '680' ~ 'μ€λ¦Όλ',
# GU_CD == '350' & DONG_CD == '560' ~ 'μκ³1λ',
# GU_CD == '350' & DONG_CD == '570' ~ 'μκ³2λ',
# GU_CD == '350' & DONG_CD == '580' ~ 'μκ³3λ',
# GU_CD == '350' & DONG_CD == '595' ~ '곡λ¦1λ',
# GU_CD == '350' & DONG_CD == '600' ~ '곡λ¦2λ',
# GU_CD == '350' & DONG_CD == '611' ~ 'νκ³1λ',
# GU_CD == '350' & DONG_CD == '612' ~ 'νκ²2λ',
# GU_CD == '350' & DONG_CD == '619' ~ 'μ€κ³λ³Έλ',
# GU_CD == '350' & DONG_CD == '621' ~ 'μ€κ³1λ',
# GU_CD == '350' & DONG_CD == '624' ~ 'μ€κ³4λ',
# GU_CD == '350' & DONG_CD == '625' ~ 'μ€κ³2,3λ',
# GU_CD == '350' & DONG_CD == '630' ~ 'μκ³1λ',
# GU_CD == '350' & DONG_CD == '640' ~ 'μκ³2λ',
# GU_CD == '350' & DONG_CD == '665' ~ 'μκ³3,4λ',
# GU_CD == '350' & DONG_CD == '670' ~ 'μκ³5λ',
# GU_CD == '350' & DONG_CD == '695' ~ 'μκ³6,7λ',
# GU_CD == '350' & DONG_CD == '700' ~ 'μκ³8λ',
# GU_CD == '350' & DONG_CD == '710' ~ 'μκ³9λ',
# GU_CD == '350' & DONG_CD == '720' ~ 'μκ³10λ',
# GU_CD == '260' & DONG_CD == '510' ~ 'λ²μ΄1λ',
# GU_CD == '260' & DONG_CD == '520' ~ 'λ²μ΄2λ',
# GU_CD == '260' & DONG_CD == '530' ~ 'λ²μ΄3λ',
# GU_CD == '260' & DONG_CD == '540' ~ 'λ²μ΄4λ',
# GU_CD == '260' & DONG_CD == '550' ~ 'λ§μ΄1λ',
# GU_CD == '260' & DONG_CD == '560' ~ 'λ§μ΄2λ',
# GU_CD == '260' & DONG_CD == '561' ~ 'λ§μ΄3λ',
# GU_CD == '260' & DONG_CD == '570' ~ 'μμ±1κ°λ',
# GU_CD == '260' & DONG_CD == '580' ~ 'μμ±2,3κ°λ',
# GU_CD == '260' & DONG_CD == '590' ~ 'μμ±4κ°λ',
# GU_CD == '260' & DONG_CD == '601' ~ 'ν©κΈ1λ',
# GU_CD == '260' & DONG_CD == '602' ~ 'ν©κΈ2λ',
# GU_CD == '260' & DONG_CD == '610' ~ 'μ€λ',
# GU_CD == '260' & DONG_CD == '620' ~ 'μλ',
# GU_CD == '260' & DONG_CD == '630' ~ 'νλ',
# GU_CD == '260' & DONG_CD == '640' ~ 'λμ°λ',
# GU_CD == '260' & DONG_CD == '651' ~ 'μ§μ°1λ',
# GU_CD == '260' & DONG_CD == '652' ~ 'μ§μ°2λ',
# GU_CD == '260' & DONG_CD == '661' ~ 'λ²λ¬Ό1λ',
# GU_CD == '260' & DONG_CD == '662' ~ 'λ²λ¬Ό2λ',
# GU_CD == '260' & DONG_CD == '670' ~ 'κ³ μ°1λ',
# GU_CD == '260' & DONG_CD == '680' ~ 'κ³ μ°2λ',
# GU_CD == '260' & DONG_CD == '690' ~ 'κ³ μ°3λ',
# GU_CD == '110' & DONG_CD == '517' ~ 'λμΈλ',
# GU_CD == '110' & DONG_CD == '545' ~ 'μΌλλ',
# GU_CD == '110' & DONG_CD == '565' ~ 'μ±λ΄1λ',
# GU_CD == '110' & DONG_CD == '575' ~ 'μ±λ΄2λ',
# GU_CD == '110' & DONG_CD == '585' ~ 'μ±λ΄3λ',
# GU_CD == '110' & DONG_CD == '595' ~ 'λμ λ',
# GU_CD == '110' & DONG_CD == '640' ~ 'λ¨μ°1λ',
# GU_CD == '110' & DONG_CD == '650' ~ 'λ¨μ°2λ',
# GU_CD == '110' & DONG_CD == '660' ~ 'λ¨μ°3λ',
# GU_CD == '110' & DONG_CD == '670' ~ 'λ¨μ°4λ',
# GU_CD == '110' & DONG_CD == '680' ~ 'λλ΄1λ',
# GU_CD == '110' & DONG_CD == '690' ~ 'λλ΄2λ'
# )) %>%
# select(DONG_NM) %>%
# unlist()
#
# data_sh_resid$MCT_CAT_NM = data_sh_resid %>%
# select(MCT_CAT_CD) %>%
# mutate(MCT_CAT_NM = case_when(
# MCT_CAT_CD == '10' ~ 'μλ°',
# MCT_CAT_CD == '20' ~ 'λ μ μ©ν',
# MCT_CAT_CD == '21' ~ 'λ μ μ
μ',
# MCT_CAT_CD == '22' ~ 'λ¬Ένμ·¨λ―Έ',
# MCT_CAT_CD == '30' ~ 'κ°κ΅¬',
# MCT_CAT_CD == '31' ~ 'μ κΈ°',
# MCT_CAT_CD == '32' ~ 'μ£Όλ°©μ©κ΅¬',
# MCT_CAT_CD == '33' ~ 'μ°λ£ν맀',
# MCT_CAT_CD == '34' ~ 'κ΄νμ ν',
# MCT_CAT_CD == '35' ~ 'κ°μ ',
# MCT_CAT_CD == '40' ~ 'μ ν΅μ
',
# MCT_CAT_CD == '42' ~ 'μ볡',
# MCT_CAT_CD == '43' ~ 'μ§λ¬Ό',
# MCT_CAT_CD == '44' ~ 'μ λ³μ‘ν',
# MCT_CAT_CD == '50' ~ 'μμ 문ꡬ',
# MCT_CAT_CD == '52' ~ 'μ¬λ¬΄ν΅μ ',
# MCT_CAT_CD == '60' ~ 'μλμ°¨ν맀',
# MCT_CAT_CD == '62' ~ 'μλμ°¨μ λΉ',
# MCT_CAT_CD == '70' ~ 'μλ£κΈ°κ΄',
# MCT_CAT_CD == '71' ~ '보건μμ',
# MCT_CAT_CD == '80' ~ 'μμμ
μ',
# MCT_CAT_CD == '81' ~ 'μλ£μν',
# MCT_CAT_CD == '92' ~ 'μ리μλΉμ€'
# )) %>%
# select(MCT_CAT_NM) %>%
# unlist()
data_sh_resid = read_csv('C:/Users/user/Desktop/Big_Contest/Data/sh_card_resident_new.csv', locale = locale('ko', encoding = 'euc-kr'))
data_sh_resid$PV_NM = 'μμΈνΉλ³μ'
data_sh_resid$PV_NM[data_sh_resid$GU_CD %in% c(260, 110)] = 'λꡬκ΄μμ'
data_sh_resid$GU_NM = 'μ€κ΅¬'
data_sh_resid$GU_NM[data_sh_resid$GU_CD == '350'] = 'λ
Έμꡬ'
data_sh_resid$GU_NM[data_sh_resid$GU_CD == '260'] = 'μμ±κ΅¬'
data_sh_resid$AGE_CD[data_sh_resid$AGE_CD == '20'] = '0-25'
data_sh_resid$AGE_CD[data_sh_resid$AGE_CD == '25'] = '25-30'
data_sh_resid$AGE_CD[data_sh_resid$AGE_CD == '30'] = '30-35'
data_sh_resid$AGE_CD[data_sh_resid$AGE_CD == '35'] = '35-40'
data_sh_resid$AGE_CD[data_sh_resid$AGE_CD == '40'] = '40-45'
data_sh_resid$AGE_CD[data_sh_resid$AGE_CD == '45'] = '45-50'
data_sh_resid$AGE_CD[data_sh_resid$AGE_CD == '50'] = '50-55'
data_sh_resid$AGE_CD[data_sh_resid$AGE_CD == '55'] = '55-60'
data_sh_resid$AGE_CD[data_sh_resid$AGE_CD == '60'] = '60-65'
data_sh_resid$AGE_CD[data_sh_resid$AGE_CD == '65'] = '65-100'
data_sh_resid$SEX_CD[data_sh_resid$SEX_CD == 'M'] = 'λ¨μ'
data_sh_resid$SEX_CD[data_sh_resid$SEX_CD == 'F'] = 'μ¬μ'
data_sh_resid$week = week(data_sh_resid$STD_YMD - 3) - 4
df_temp = data_sh_resid
df_temp = df_temp %>%
select(-X1, -X1_2, -X1_1)
df_temp = df_temp %>%
select(STD_YMD, week, PV_NM, DONG_NM, GU_NM, SEX_CD, AGE_CD, MCT_CAT_NM, USE_AMT, USE_CNT)
names(df_temp) = c('DATE', 'PERIOD', 'PROVINCE', 'DONG', 'GU', 'SEX', 'AGE', 'CATEGORY', 'USE_AMOUNT', 'USE_COUNT')
#######
# eda #
#######
data_sh_resid %>%
filter(week != 18) %>%
mutate(STD_Y = paste(STD_Y, 'λ
', sep = '')) %>%
group_by(week, STD_Y) %>%
summarise(total_USE_AMT = sum(USE_AMT)) %>%
ggplot(aes(week, total_USE_AMT/1e5, group = STD_Y, colour = STD_Y)) +
geom_point(size = 2.5) +
geom_line(size = 1) +
scale_y_continuous(labels = comma) +
labs(x = 'κΈ°κ° (λ¨μ: 7μΌ)', y = 'μ΄μ©κΈμ‘ (λ¨μ: μ΅)') +
theme(legend.title = element_blank())
data_sh_resid %>%
group_by(STD_Y, STD_YMD) %>%
summarise(total_USE_AMT = sum(USE_AMT)) %>%
ggplot(aes(STD_YMD, total_USE_AMT/1e5, colour = as.factor(STD_Y))) +
geom_point(size = 1.5) +
geom_line(size = 1) +
scale_y_continuous(labels = comma) +
guides(colour = F) +
labs(x = 'λ¬', y = 'μ΄μ©κΈμ‘ (λ¨μ: μ΅)') +
facet_wrap(~ STD_Y, scales = 'free', ncol = 1)
data_sh_resid %>%
mutate(STD_Y = paste(STD_Y, 'λ
', sep = '')) %>%
group_by(STD_Y) %>%
summarise(total_USE_AMT = sum(USE_AMT/1e8)) %>%
ggplot(aes(as.factor(STD_Y), total_USE_AMT, fill = as.factor(STD_Y))) +
geom_bar(stat = 'identity', alpha = 0.75, size = 1, colour = 'black') +
labs(x = '', y = 'μ΄μ©κΈμ‘ (λ¨μ: μ² μ΅)') +
coord_cartesian(ylim = c(25, 37.5)) +
guides(fill = F, colour = F)
df_temp = data_sh_resid %>%
mutate(STD_Y = paste(STD_Y, 'λ
', sep = '')) %>%
group_by(STD_Y) %>%
summarise(total_USE_AMT = sum(USE_AMT/1e8))
-(df_temp$total_USE_AMT[2] - df_temp$total_USE_AMT[1])/df_temp$total_USE_AMT[1]*100
df_temp_19 = data_sh_resid %>%
filter(STD_Y == 2019) %>%
group_by(AGE_CD) %>%
summarise(total_USE_AMT = sum(USE_AMT))
df_temp_20 = data_sh_resid %>%
filter(STD_Y == 2020) %>%
group_by(AGE_CD) %>%
summarise(total_USE_AMT = sum(USE_AMT))
df_temp_total = df_temp_19 %>%
select(-total_USE_AMT)
df_temp_total$decre_prop_USE_AMT = -(df_temp_20$total_USE_AMT - df_temp_19$total_USE_AMT)/df_temp_19$total_USE_AMT*100
df_temp_total %>%
ggplot(aes(AGE_CD, decre_prop_USE_AMT)) +
geom_bar(stat = 'identity', size = 1, alpha = 0.75, position = 'dodge', colour = 'black') +
labs(x = 'λμ΄', y = 'μ λ
λλΉ μ΄μ©κΈμ‘ κ°μμ¨ (%)') +
theme(legend.title = element_blank())
df_19 = data_sh_resid %>%
filter(STD_Y == '2019') %>%
mutate(STD_Y = paste(STD_Y, 'λ
', sep = '')) %>%
group_by(STD_Y, MCT_CAT_NM) %>%
summarise(total_USE_AMT = sum(USE_AMT))
df_20 = data_sh_resid %>%
filter(STD_Y == '2020') %>%
mutate(STD_Y = paste(STD_Y, 'λ
', sep = '')) %>%
group_by(STD_Y, MCT_CAT_NM) %>%
summarise(total_USE_AMT = sum(USE_AMT))
df_decre_prop_amt = data.frame(MCT_CAT_NM = c(df_19$MCT_CAT_NM, df_20$MCT_CAT_NM),
incre_prop_amt = -(df_20$total_USE_AMT - df_19$total_USE_AMT)/df_19$total_USE_AMT)
df_decre_prop_amt %>%
ggplot(aes(MCT_CAT_NM, incre_prop_amt*100)) +
geom_bar(stat = 'identity', position = 'dodge', alpha = 0.5, colour = 'black', fill = 'steelblue', size = 1) +
coord_flip() +
labs(x = NULL, y = 'μ λ
λλΉ μ΄μ©κΈμ‘ κ°μμ¨ (%)') +
theme(legend.title = element_blank())
df_19 = data_sh_resid %>%
filter(STD_Y == '2019') %>%
mutate(STD_Y = paste(STD_Y, 'λ
', sep = '')) %>%
group_by(SEX_CD, STD_Y, MCT_CAT_NM) %>%
summarise(total_USE_AMT = sum(USE_AMT))
df_20 = data_sh_resid %>%
filter(STD_Y == '2020') %>%
mutate(STD_Y = paste(STD_Y, 'λ
', sep = '')) %>%
group_by(SEX_CD, STD_Y, MCT_CAT_NM) %>%
summarise(total_USE_AMT = sum(USE_AMT))
df_decre_prop_amt = data.frame(SEX_CD = c(df_19$SEX_CD, df_20$SEX_CD),
MCT_CAT_NM = c(df_19$MCT_CAT_NM, df_20$MCT_CAT_NM),
decre_prop_amt = -(df_20$total_USE_AMT - df_19$total_USE_AMT)/df_19$total_USE_AMT)
df_decre_prop_amt %>%
ggplot(aes(MCT_CAT_NM, decre_prop_amt*100, fill = SEX_CD)) +
geom_bar(stat = 'identity', position = 'dodge', alpha = 0.5, colour = 'black') +
coord_flip() +
labs(x = NULL, y = 'μ λ
λλΉ μ΄μ©κΈμ‘ κ°μμ¨ (%)') +
scale_fill_manual(values = c('steelblue', 'pink')) +
theme(legend.title = element_blank())
######
# CJ #
######
###########
# setting #
###########
library(dplyr)
library(forcats)
library(ggplot2)
library(scales)
library(tidytext)
library(readr)
library(showtext)
library(lubridate)
font_add_google('Nanum Gothic', 'Gothic')
showtext_auto()
theme_set(theme_minimal() +
theme(plot.title = element_text(face = 'bold', colour = 'grey10'),
plot.subtitle = element_text(colour = 'grey25'),
panel.grid.major = element_line(colour = 'grey90', size = 1),
panel.grid.minor = element_line(colour = 'grey80', size = 0.5, linetype = 'dashed'),
legend.position = 'top',
legend.spacing.x = unit(0.125, 'cm'),
legend.background = element_rect(fill = NULL, linetype = 'dotted'),
strip.background = element_blank(),
strip.text = element_text(face = 'bold', colour = 'grey25', size = 11.25)))
data_cj = read_csv('C:/Users/user/Desktop/Big_Contest/Data/cj_logistics.csv')
data_cj$DL_Y = paste('20', substr(data_cj$DL_YMD, 1, 2), sep = '')
data_cj$DL_M = substr(data_cj$DL_YMD, 3, 4)
data_cj$DL_D = substr(data_cj$DL_YMD, 5, 6)
data_cj$DL_YMD = as.Date(paste(data_cj$DL_Y, '-', data_cj$DL_M, '-', data_cj$DL_D, sep = ''))
data_cj$week = week(data_cj$DL_YMD - 3) - 4
#######
# eda #
#######
data_cj %>%
select(week, DL_YMD) %>%
unique() %>%
as.data.frame()
data_cj %>%
filter(week != 18) %>%
mutate(DL_Y = paste(DL_Y, 'λ
', sep = '')) %>%
group_by(week, DL_Y) %>%
summarise(total_INVC_CONT = sum(INVC_CONT)) %>%
ggplot(aes(week, total_INVC_CONT/1e4, group = DL_Y, colour = DL_Y)) +
geom_point(size = 2.5) +
geom_line(size = 1) +
scale_y_continuous(labels = comma) +
labs(x = 'κΈ°κ° (λ¨μ: 7μΌ)', y = 'μ‘μ₯건μ (λ¨μ: λ§)') +
theme(legend.title = element_blank())
data_cj %>%
group_by(DL_Y, DL_YMD) %>%
summarise(total_INVC_CONT = sum(INVC_CONT)) %>%
ggplot(aes(DL_YMD, total_INVC_CONT/1e4, colour = as.factor(DL_Y))) +
geom_point(size = 1.5) +
geom_line(size = 1) +
labs(x = 'λ¬', y = 'μ‘μ₯건μ (λ¨μ: λ§)') +
guides(colour = F) +
facet_wrap(~ DL_Y, scales = 'free', ncol = 1)
data_cj %>%
filter(week != max(week)) %>%
mutate(DL_Y = paste(DL_Y, 'λ
', sep = '')) %>%
group_by(DL_Y, week) %>%
summarise(total_INVC_CONT = sum(INVC_CONT)) %>%
ggplot(aes(week, total_INVC_CONT/1e4, group = DL_Y, colour = DL_Y)) +
geom_point(size = 2.5) +
geom_line(size = 1) +
labs(x = 'κΈ°κ° (λ¨μ: 7μΌ)', y = 'μ‘μ₯건μ (λ¨μ: λ§)') +
theme(legend.title = element_blank())
df_temp = data_cj %>%
mutate(DL_Y = paste(DL_Y, 'λ
', sep = '')) %>%
group_by(DL_Y) %>%
summarise(total_INVC_CONT = sum(INVC_CONT)) %>%
ungroup()
df_temp %>%
ggplot(aes(DL_Y, total_INVC_CONT/1e4, fill = as.factor(DL_Y))) +
geom_bar(stat = 'identity', colour = 'black', size = 1, alpha = 0.75) +
scale_y_continuous(labels = comma) +
labs(x = NULL, y = 'μ‘μ₯건μ (λ¨μ: λ§)') +
coord_cartesian(ylim = c(500, 1000)) +
guides(fill = F)
df_19 = data_cj %>%
filter(DL_Y == '2019') %>%
group_by(DL_Y, DL_GD_LCLS_NM) %>%
summarise(sum_INVC_CONT = sum(INVC_CONT)) %>%
ungroup()
df_20 = data_cj %>%
filter(DL_Y == '2020') %>%
group_by(DL_Y, DL_GD_LCLS_NM) %>%
summarise(sum_INVC_CONT = sum(INVC_CONT)) %>%
ungroup()
df_incre_prop = data.frame(DL_GD_LCLS_NM = df_19$DL_GD_LCLS_NM,
incre_prop = (df_20$sum_INVC_CONT - df_19$sum_INVC_CONT)/df_19$sum_INVC_CONT*100) %>%
arrange(-incre_prop)
df_incre_prop %>%
ggplot(aes(reorder(DL_GD_LCLS_NM, incre_prop), incre_prop)) +
geom_bar(stat = 'identity', alpha = 0.75, size = 1, colour = 'black', fill = 'steelblue') +
labs(x = NULL, y = 'μ λ
λλΉ μ‘μ₯건μ μ¦κ°μ¨ (%)') +
guides(fill = F, colour = F) +
coord_flip()
df_temp_19 = data_cj %>%
filter(DL_Y == '2019') %>%
mutate(DL_Y = paste(DL_Y, 'λ
', sep = '')) %>%
group_by(DL_Y, DL_GD_LCLS_NM) %>%
summarise(total_INVC_CONT = sum(INVC_CONT))
df_temp_19$total_INVC_CONT = df_temp_19$total_INVC_CONT/sum(df_temp_19$total_INVC_CONT)*100
df_temp_20 = data_cj %>%
filter(DL_Y == '2020') %>%
mutate(DL_Y = paste(DL_Y, 'λ
', sep = '')) %>%
group_by(DL_Y, DL_GD_LCLS_NM) %>%
summarise(total_INVC_CONT = sum(INVC_CONT))
df_temp_20$total_INVC_CONT = df_temp_20$total_INVC_CONT/sum(df_temp_20$total_INVC_CONT)*100
df_temp_total = rbind(df_temp_19, df_temp_20)
df_temp_total %>%
ggplot(aes(DL_GD_LCLS_NM, total_INVC_CONT, fill = DL_Y)) +
geom_bar(stat = 'identity', position = 'dodge', alpha = 0.75, size = 1, colour = 'black') +
labs(x = NULL, y = 'μ 체 μ‘μ₯건μ μ€ μ°¨μ§ λΉμ¨ (%)') +
theme(legend.title = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
data_cj %>%
mutate(DL_Y = paste(DL_Y, 'λ
', sep = '')) %>%
group_by(DL_Y, CTPV_NM) %>%
summarise(total_INVC_CONT = sum(INVC_CONT)) %>%
ggplot(aes(CTPV_NM, total_INVC_CONT/1e4, colour = DL_Y, fill = DL_Y)) +
geom_bar(stat = 'identity', position = 'dodge', alpha = 0.75, size = 1, colour = 'black') +
theme(legend.title = element_blank()) +
labs(x = NULL, y = 'μ‘μ₯건μ (λ¨μ: λ§)')
data_cj %>%
mutate(DL_Y = paste(DL_Y, 'λ
', sep = '')) %>%
group_by(DL_Y, CTPV_NM) %>%
summarise(total_INVC_CONT = sum(INVC_CONT))
df_19 = data_cj %>%
group_by(CTPV_NM, DL_Y, DL_GD_LCLS_NM) %>%
summarise(sum_INVC_CONT = sum(INVC_CONT)) %>%
ungroup() %>%
filter(DL_Y == '2019')
df_20 = data_cj %>%
group_by(CTPV_NM, DL_Y, DL_GD_LCLS_NM) %>%
summarise(sum_INVC_CONT = sum(INVC_CONT)) %>%
ungroup() %>%
filter(DL_Y == '2020')
df_incre_prop = data.frame(CTPV_NM = df_19$CTPV_NM,
DL_GD_LCLS_NM = df_19$DL_GD_LCLS_NM,
incre_prop = (df_20$sum_INVC_CONT - df_19$sum_INVC_CONT)/df_19$sum_INVC_CONT*100)
df_incre_prop %>%
ggplot(aes(reorder(DL_GD_LCLS_NM, incre_prop), incre_prop)) +
geom_line(aes(group = DL_GD_LCLS_NM), size = 2.5, alpha = 0.25, colour = 'black') +
geom_point(aes(colour = CTPV_NM), size = 5) +
scale_colour_manual(values = c('orange', 'brown')) +
labs(x = NULL, y = 'μ λ
λλΉ μ‘μ₯건μ μ¦κ°μ¨ (%)') +
coord_flip() +
theme(legend.title = element_blank())