
install.packages("ggplot2")
library(ggplot2)
install.packages("foreign")
library(foreign)

library(dplyr)

install.packages("dplyr")

install.packages("readxl")

ggplot(data = mpg, aes(x = displ, y = hwy)) + geom_point()

ride <- read.csv("ride.csv")
View(ride)

dim(ride)

qplot(ride$ownership)

table(ride$ownership)

class(ride$ownership)

table(ride$people)
ride$people <- ifelse(ride$people == 9, NA, ride$people)

table(is.na(ride$people))

ride$people <- ifelse(ride$people == 1, "people", "nopeople")
table(ride$people)

qplot(ride$people)

table(ride$fee)

table(ride$fee)

table(is.na(ride$fee))

ride$fee <- ifelse(ride$fee == 1, "fee", "free")
table(ride$fee)

table(ride$fee)


table(ride$ownership)
table(ride$cradle)


summary(ride$ownership)
qplot(ride$ownership)

table(is.na(ride$ownership))

ride$ownership <- ifelse(ride$ownership == 9999, NA, ride$ownership)
table(is.na(ride$ownership))

table(ride$Bicycle.Rental.Shop)
table(ride$address)

list_region <- data.frame(code_region = c(1:9),
                          region = c("경기도 안산시",
                                     "경기도 오산시",
                                     "경기도 수원시",
                                     "경기도 시흥시",
                                     "경기도 부천시",
                                     "경기도 고양시",
                                     "경기도 과천시",
                                     "경기도 양평군",
                                     "경기도 연천군"))
list_region
region_own <- ride %>% 
  group_by(address, ownership) %>% 
  summarise(n = n ())
region_own
View(region_own)

region_own <- ride %>% 
  group_by(address, ownership) %>% 
  summarise(n = n ()) %>% 
  mutate(tot_group = sum(n)) %>% 
  mutate(pct = round(n/tot_group*100, 2))
region_own

ggplot(data = list_region, aes(x = address, y = ownership))+geom_col()+coord_flip()



df <- ride %>% 
  filter(address == "경기도 안산시") %>% 
  group_by(Bicycle.Rental.Shop) %>% 
  summarise(mean_own = mean(ownership)) %>% 
  arrange(desc(mean_own)) %>% 
  head(5)


address <- left_join(address, list_region, id = "code_region")

ride <- left_join(ride, list_region, id = "code_region")

ride %>% select(code_region, address) %>% head

ride$test <- ifelse(ride$fee >= fee, "OK", "NO")

ggplot(data = list_region, aes(x = address, y = ownership)) +   geom_col() +   coord_flip() 


ride <- left_join(ride, list_region, id = "code_region")

ride %>% select(address) %>% summary(300)

qplot(ride$address)

summary(ride)

table(ride$fee)

class(ride$address)

summary(ride$address)

table(is.na(ride$address))

table(ride$fee)
qplot(ride$fee)

ride %>% select(fee, repair)

ggplot(data = ride, aes(x= address, y = fee )) + geom_line()

ggplot(data = ride, aes(x = address, y = fee)) + geom_boxplot()


str(ride)

summary(ride$fee)


hist(ride$cradle)

table(ride$fee)

table(is.na(ride$fee))

address_fee <- ride %>% 
  filter(!is.na(fee)) %>% 
  group_by(address) %>% 
  summarise(mean_income = mean(fee))
