๋ณธ๋ฌธ ๋ฐ”๋กœ๊ฐ€๊ธฐ

๋ถ„๋ฅ˜ ์ „์ฒด๋ณด๊ธฐ

(86)
dplyr ์‚ฌ์šฉ๋ฒ• dplyr์˜ ๋Œ€ํ‘œ์ ์ธ ํ•จ์ˆ˜ select, filter, mutate, summarise, group_by, sample_n, sample_frac์˜ ์‚ฌ์šฉ ๋ฐฉ๋ฒ•์„ ์ดํ•ดํ•˜๊ณ  ์ ์šฉํ•˜์—ฌ ๋ณด์ž. In: library(dplyr) df_iris = iris str(df_iris) Out: Sepal.Length Sepal.Width Petal.Length Petal.Width Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300 Median :5.800 Median :3.000 Median :4.350 Median :1.300 Mean :5.843 Mean :3.057 Mean :3...
๊ฒฐ์ธก์น˜ ๋‹ค๋ฃจ๋Š” ๋ฐฉ๋ฒ• ๊ฒฐ์ธก์น˜ ๋‹ค๋ฃจ๋Š” ๋ฐฉ๋ฒ•์— ๋Œ€ํ•˜์—ฌ ์•Œ์•„๋ณด์ž. โ–ก ๊ฒฐ์ธก์น˜ ํ™•์ธ In: library(MASS) df_car = Cars93 df_car %>% sapply(function(x) sum(is.na(x))) Out: Manufacturer Model Type 0 0 0 Min.Price Price Max.Price 0 0 0 MPG.city MPG.highway AirBags 0 0 0 DriveTrain Cylinders EngineSize 0 0 0 Horsepower RPM Rev.per.mile 0 0 0 Man.trans.avail Fuel.tank.capacity Passengers 0 0 0 Length Wheelbase Width 0 0 0 Turn.circle Rear.seat.room Luggag..
CSV ํŒŒ์ผ ์ฝ๊ธฐ/์“ฐ๊ธฐ CSV ํŒŒ์ผ์„ ์ƒ์„ฑํ•œ ๋’ค, ์ด๋ฅผ ํŠน์ • ๊ฒฝ๋กœ์— ์“ฐ๊ณ  ์ฝ๊ธฐ๋ฅผ ์ˆ˜ํ–‰ํ•˜์—ฌ ๋ณด์ž. In: df_car = mtcars df_car %>% head() Out: mpg cyl disp hp drat wt qsec vs am gear carb Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 Va..
๋ฌธ์ž์—ด ๋ฐ์ดํ„ฐ ๋‹ค๋ฃจ๋Š” ๋ฐฉ๋ฒ• R์˜ ๋‚ด์žฅ๋˜์–ด ์žˆ๋Š” ํ•จ์ˆ˜์™€ stringr ํŒจํ‚ค์ง€๋ฅผ ํ™œํ•˜์—ฌ ๋ฌธ์ž์—ด ๋ฐ์ดํ„ฐ๋ฅผ ๋‹ค๋ฃจ๋Š” ๋ฐฉ๋ฒ•์— ๋Œ€ํ•ด ์•Œ์•„๋ณผ ๊ฒƒ์ด๋‹ค. โ–ก ๋ฌธ์ž ์ด์–ด ๋ถ™์ด๊ธฐ In: paste('Rooney', 'Song', sep = '_') paste0('Rooney', 'Song') str_c('Rooney', 'Song', sep = '_') Out: [1] "Rooney_Song" [1] "RooneySong" [1] "Rooney_Song" โ–ท paste, paste0 ํ•จ์ˆ˜๋Š” ๋‘ ๋ฌธ์ž์—ด์„ ๋ถ™์—ฌ์ฃผ๋Š” ์—ญํ• ์„ ํ•œ๋‹ค. ๋‘ ํ•จ์ˆ˜์˜ ์ฐจ์ด์ ์€ paste0 ํ•จ์ˆ˜๋Š” ๋ถ™์ผ ๋•Œ, ์‚ฌ์ด์— ๋ฌธ์ž๋ฅผ ์‚ฝ์ž…ํ•˜์ง€ ์•Š๊ณ , ๋ฐ”๋กœ ๋ถ™์ธ๋‹ค๋Š” ๊ฒƒ์ด๋‹ค. paste ํ•จ์ˆ˜์˜ sep ์ธ์ž๋ฅผ ํ†ตํ•ด ๋‘ ๋ฌธ์ž์—ด์„ ๋ถ™์ผ ๋•Œ, ์‚ฌ์ด์— ๋“ค์–ด๊ฐˆ ๋ฌธ์ž๋ฅผ ์ง€์ •ํ•  ์ˆ˜ ์žˆ๋‹ค. โ–ท str_c ํ•จ์ˆ˜๋Š” st..
์‹œ๊ณ„์—ด ๋ฐ์ดํ„ฐ ๋‹ค๋ฃจ๋Š” ๋ฐฉ๋ฒ• R์˜ ๋‚ด์žฅ๋˜์–ด ์žˆ๋Š” ํ•จ์ˆ˜์™€ lubridate ํŒจํ‚ค์ง€๋ฅผ ํ™œ์šฉํ•˜์—ฌ ์‹œ๊ณ„์—ด ๋ฐ์ดํ„ฐ๋ฅผ ๋‹ค๋ฃจ๋Š” ๋ฐฉ๋ฒ•์— ๋Œ€ํ•ด ์•Œ์•„๋ณผ ๊ฒƒ์ด๋‹ค. โ–ก ๊ธฐ๋ณธ ํ•จ์ˆ˜๋ฅผ ํ™œ์šฉํ•œ ์‹œ๊ณ„์—ด ๋ฐ์ดํ„ฐ ๋‹ค๋ฃจ๊ธฐ In: date_form = c('%Y%m%d', '%Y.%m.%d', '%Y~%m~%d', '%Y-%m-%d') date_1 = as.Date('20201019', tryFormats = date_form) date_2 = as.Date('2020.10.19', tryFormats = date_form) date_3 = as.Date('2020~10~19', tryFormats = date_form) print(date_1) print(date_2) print(date_3) print(class(date_1)) print(class(date_2)..
tidyr ์‚ฌ์šฉ๋ฒ• tidyr ํŒจ์บ์ง€์˜ ๋Œ€ํ‘œ์ ์ธ ํ•จ์ˆ˜ gather, spread, seperate, unite์˜ ์‚ฌ์šฉ ๋ฐฉ๋ฒ•์„ ์ดํ•ดํ•˜๊ณ  ์ ์šฉํ•˜์—ฌ ๋ณด์ž. In: library(dplyr) library(tidyr) df_iris = iris df_iris$id = 1:nrow(df_iris) df_iris = df_iris[, c(6, 1:5)] str(df_iris) Out: 'data.frame':150 obs. of 6 variables: $ id : int 1 2 3 4 5 6 7 8 9 10 ... $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... $ ..
apply, sapply, lapply ์‚ฌ์šฉ๋ฒ• apply, sapply, lapply ํ•จ์ˆ˜์˜ ์‚ฌ์šฉ๋ฒ•์„ ์ดํ•ดํ•˜๊ณ  ์ ์šฉํ•˜์—ฌ ๋ณด์ž. In: library(dplyr) df_iris = iris df_iris_num = iris %>% select(-Species) str(df_iris_num) Out: 'data.frame':150 obs. of 4 variables: $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... $ Petal.Width : num 0.2 0.2 0.2 0..
Selenium๊ณผ BeautifulSoup๋ฅผ ํ™œ์šฉํ•œ ํฌ๋กค๋ง BeautifulSoup์™€ Selenium์„ ์ด์šฉํ•˜์—ฌ ํ”ผํŒŒ ์˜จ๋ผ์ธ์˜ ๋ฐ์ดํ„ฐ ์„ผํ„ฐ์—์„œ 5์›”๋ถ€ํ„ฐ 10์›”๊นŒ์ง€์˜ ํฌ์ง€์…˜๋ณ„ ์„ ์ˆ˜์˜ ์ด์šฉ์ž ์ˆ˜ ๋ฐ์ดํ„ฐ๋ฅผ ํฌ๋กค๋ง ํ•œ ํ›„, ํ”ผํŒŒ ์˜จ๋ผ์ธ์˜ ํฌ์ง€์…˜๋ณ„ ์„ ์ˆ˜์˜ ์„ ํ˜ธ๋„๋ฅผ ํ™•์ธํ•  ๊ฒƒ์ด๋‹ค. ์ •์  ํฌ๋กค๋ง๋งŒ์œผ๋กœ ํ•ด๋‹น ์‚ฌ์ดํŠธ์˜ ๋ฐ์ดํ„ฐ๋ฅผ ๋‹ค ๋ถˆ๋Ÿฌ์˜ฌ ์ˆ˜ ์—†๊ธฐ ๋•Œ๋ฌธ์— ์ •์  ํฌ๋กค๋ง์„ ์ˆ˜ํ–‰ํ•˜๋Š” BeautifulSoup์™€ ๋™์  ํฌ๋กค๋ง์„ ์ˆ˜ํ–‰ํ•˜๋Š” Selenium์„ ํ•จ๊ป˜ ํ™œ์šฉํ•˜์—ฌ ํฌ๋กค๋ง์„ ์ˆ˜ํ–‰ํ•˜์˜€๋‹ค. ์ˆ˜ํ–‰ ๊ณผ์ •์€ ๋‹ค์Œ๊ณผ ๊ฐ™๋‹ค. 1. ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘ ๊ณผ์ • 2. ํฌ๋กค๋ง 3. ์‹œ๊ฐํ™” 1. ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘ ๊ณผ์ • ํฌ์ง€์…˜๋ณ„ ์„ ์ˆ˜์˜ ์ด์šฉ์ž ์ˆ˜๋ฅผ ์–ป๊ธฐ ์œ„ํ•ด์„œ๋Š” ํ”ผํŒŒ ์˜จ๋ผ์ธ์˜ ๋ฐ์ดํ„ฐ ์„ผํ„ฐ(fifaonline4.nexon.com/datacenter/dailysquad)์— ์ ‘์†ํ•œ ํ›„, ๋‹ค์Œ ๊ณผ์ •์„ ํ†ตํ•ด ์ˆ˜ํ–‰ํ•˜์—ฌ์•ผ ํ•œ๋‹ค. (1)..
BeautifulSoup๋ฅผ ํ™œ์šฉํ•œ ํฌ๋กค๋ง โ–ก ํฌ๋กค๋ง์˜ ์ˆœ์„œ (1) ์›ํ•˜๋Š” ํŽ˜์ด์ง€์˜ HTML ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ›๊ธฐ (2) HTML์„ ๋ถ„์„(ํŒŒ์‹ฑ) ๊ฐ€๋Šฅํ•œ ํ˜•ํƒœ๋กœ ๊ฐ€๊ณต (3) ์›ํ•˜๋Š” ๋ฐ์ดํ„ฐ๋ฅผ ์ถ”์ถœ In: import urllib.request # ์›นํŽ˜์ด์ง€ ์ ‘์† req = urllib.request.Request('https://naver.com') # ์›น์„œ๋ฒ„๋ฅผ Requestํ•˜๊ธฐ ์œ„ํ•œ ๊ฐ์ฒด ์ƒ์„ฑ res = urllib.request.urlopen(req) # Reqeustํ•œ ๊ฐ์ฒด์— ๋Œ€ํ•œ urlopen ํ•จ์ˆ˜์˜ ์ˆ˜ํ–‰๊ฒฐ๊ณผ๋ฅผ ๋ณ€์ˆ˜์— ์ €์žฅ html_byte = res.read() # read ํ•จ์ˆ˜๋ฅผ ํ†ตํ•ด ๋ฐ”์ดํŠธ ํ˜•ํƒœ๋กœ ์ €์žฅ html_str = html_byte.decode('UTF8') # ๋ฌธ์ž์—ด๋กœ ์‚ฌ์šฉํ•˜๊ธฐ ์œ„ํ•ด UTF-8๋กœ ๋””์ฝ”๋”ฉ ์ˆ˜ํ–‰ html_splt = html..
[Level 2] ๊ฐ€์žฅ ํฐ ์ •์‚ฌ๊ฐํ˜• ์ฝ”๋”ฉํ…Œ์ŠคํŠธ ์—ฐ์Šต - ๊ฐ€์žฅ ํฐ ์ •์‚ฌ๊ฐํ˜• ์ฐพ๊ธฐ [[0,1,1,1],[1,1,1,1],[1,1,1,1],[0,0,1,0]] 9 programmers.co.kr def solution(b): len_x = len(b) len_y = len(b[0]) for x in range(1, len_x): for y in range(1, len_y): if b[x][y] == 1: b[x][y] = min(b[x][y-1], b[x-1][y], b[x-1][y-1])+1 return(max([i for x in b for i in x])**2) โ–ท ์ด ๋ฌธ์ œ์˜ ๊ฐ€์žฅ ํ•ต์‹ฌ์€ DP(Dyanamic Programming)์„ ์ด์šฉํ•˜์—ฌ ํ‘ธ๋Š” ๊ฒƒ์ด๋‹ค. DP๋ฅผ ์ด์šฉํ•˜์ง€ ์•Š์„ ๊ฒฝ์šฐ, ์‹œ๊ฐ„์ด ์ดˆ๊ณผ๋˜๋Š” ๋ฌธ์ œ๊ฐ€ ๋ฐœ์ƒํ•œ๋‹ค. ๋”ฐ๋ผ์„œ ์ •์‚ฌ๊ฐํ˜•์˜ ๋ชจ..
LSTM(Long Short-Term Memories model) ๊ตฌํ˜„ ํŒŒ์ดํ† ์น˜๋ฅผ ์ด์šฉํ•˜์—ฌ LSTM(Long Short-Term Memories model)์„ ๊ตฌํ˜„ํ•  ๊ฒƒ์ด๋‹ค. ์‚ฌ์šฉ๋  ๋ฐ์ดํ„ฐ๋Š” ์•„๋งˆ์กด์˜ ์ฃผ๊ฐ€๋กœ ์ข…๊ฐ€์™€ ๊ฑฐ๋ž˜๋Ÿ‰์„ ์ด์šฉํ•˜์—ฌ, ์ผ์ฃผ์ผ ๋’ค์˜ ์ข…๊ฐ€๋ฅผ ์˜ˆ์ธกํ•˜๋Š” ๊ฒƒ์ด ๋ชฉ์ ์ด๋‹ค. ๋ฐ์ดํ„ฐ๋Š” ์—ฌ๊ธฐ(www.kaggle.com/camnugent/sandp500)์—์„œ ์–ป์„ ์ˆ˜ ์žˆ๋‹ค. ๋ฐ์ดํ„ฐ๊ตฌํ˜„ ๊ณผ์ •์€ ๋‹ค์Œ๊ณผ ๊ฐ™๋‹ค. 1. ๋ฐ์ดํ„ฐ ์ „์ฒ˜๋ฆฌ 2. ๋ชจ๋ธ ์„ค์ • 3. ๋ชจ๋ธ ํ•™์Šต 4. ํ•™์Šต ๊ฒฐ๊ณผ 1. ๋ฐ์ดํ„ฐ ์ „์ฒ˜๋ฆฌ In: import pandas as pd from numpy import array from numpy import hstack import matplotlib.pyplot as plt import torch def min_max_scaler(arr): min_arr = min(arr)..
๋งค๊ฐœํšจ๊ณผ(Mediating effect) โ–ก ๋งค๊ฐœํšจ๊ณผ(Mediating effect) - ๋…๋ฆฝ๋ณ€์ˆ˜๊ฐ€ ์ข…์†๋ณ€์ˆ˜์— ์œ ์˜ํ•œ ์˜ํ–ฅ์„ ๋ฏธ์น  ๋•Œ, ๊ทธ ์‚ฌ์ด์— ๋งค๊ฐœ๋ณ€์ˆ˜๋ฅผ ํ†ตํ•ด ๊ทธ ์˜ํ–ฅ์ด ์ „๋‹ฌ๋˜๋Š” ๊ฒฝ์šฐ - X(๋…๋ฆฝ๋ณ€์ˆ˜) โ†’ M(๋งค๊ฐœ๋ณ€์ˆ˜) โ†’ Y(์ข…์†๋ณ€์ˆ˜) โ–ก ๋งค๊ฐœํšจ๊ณผ์˜ ์ข…๋ฅ˜ (1) ๋ถ€๋ถ„๋งค๊ฐœ ํšจ๊ณผ (2) ์™„์ „๋งค๊ฐœ ํšจ๊ณผ โ–ก ๋งค๊ฐœํšจ๊ณผ ๊ฒ€์ฆ๋ฐฉ๋ฒ• - Baron & Kenny (1986) ๋ฐฉ๋ฒ• [1๋‹จ๊ณ„] X โ†’ M [2๋‹จ๊ณ„] X โ†’ Y [3๋‹จ๊ณ„] X + M โ†’ Y (1) ๋ถ€๋ถ„๋งค๊ฐœ ํšจ๊ณผ ๊ฒ€์ฆ [1๋‹จ๊ณ„] M = a + b1*X โ†’ b1 ์œ ์˜ [2๋‹จ๊ณ„] Y = a + phi*X โ†’ phi ์œ ์˜ [3๋‹จ๊ณ„] Y = a + b2*X + b3*M โ†’ b3, b2 ์œ ์˜ โ–ท ์œ„์˜ 3๋‹จ๊ณ„์˜ ๊ฐ ๊ณ„์ˆ˜์˜ ์œ ์˜ ์กฐ๊ฑด์„ ๋งŒ์กฑํ•  ๊ฒฝ์šฐ, ๋ถ€๋ถ„๋งค๊ฐœํšจ๊ณผ๊ฐ€ ๋‚˜ํƒ€๋‚œ๋‹ค๊ณ  ๋ณผ ์ˆ˜ ์žˆ๋‹ค. (2) ์™„์ „๋งค๊ฐœ ํšจ๊ณผ..