# install.packages("tidyverse")
library(tidyverse)

ablation <- read.csv("Ablation.csv", header = TRUE, stringsAsFactors = TRUE)
names(ablation)[names(ablation) == "SCORE"] <- "Score"

ggplot(ablation, aes(x = Time, y = Score)) + geom_point()
ggplot(ablation, aes(x = Time, y = Score)) + geom_point(color = "red", size = 4)
ggplot(ablation, aes(x = Time, y = Score)) +
  geom_point(aes(color = Experiment), size = 4)
ggplot(ablation, aes(x = Time, y = Score)) +
  geom_point(aes(color = Experiment, shape = CellType), size = 4)
ggplot(ablation, aes(x = Time, y = Score)) +
  geom_point(aes(color = Experiment), size = 4) +
  geom_text(aes(label = CellType), hjust = 0, size = 3)

p <- ggplot(ablation, aes(x = Time, y = Score))
p <- p + geom_point(aes(color = Experiment, shape = Measurement), size = 4)
p <- p + geom_line(aes(group = interaction(Experiment, Measurement, CellType),
                       color = Experiment,
                       linetype = CellType))
print(p)
interaction(ablation$Experiment, ablation$Measurement, ablation$CellType)
levels(interaction(ablation$Experiment, ablation$Measurement, ablation$CellType))

p <- ggplot(ablation, aes(x = Time, y = Score))
p + geom_point(aes(color = Experiment, shape = Measurement), size = 4) +
  scale_shape_manual(values = c(1,16))
p + geom_point(aes(color = Experiment, shape = Measurement), size = 4, alpha = 0.5) +
  scale_shape_manual(values = c(1,16))
p + geom_point(aes(color = Experiment, shape = Measurement), size = 4, position = position_dodge(0.5)) +
  scale_shape_manual(values = c(1,16))

p <- ggplot(ablation, aes(x = Time, y = Score, color = Experiment))
p <- p + geom_point(aes(shape = Measurement), size = 4)
p <- p + geom_line(aes(group = interaction(Experiment, Measurement, CellType),
                       linetype = CellType))
print(p)

( p <- p + labs(title = "Ablation", x = "Time (minutes)", y = "% Saturation") )
( p <- p + theme_bw() + theme(plot.title = element_text(h = 0.5)))

p + scale_x_continuous(breaks = c(0,5,10,20,30))
p + scale_x_continuous(breaks = unique(ablation$Time))

p + scale_shape_manual(values = c(1,16), labels = c("LDLR", "TfR")) +
    scale_linetype_discrete(name = "Cell type") +
    scale_color_manual(values = c("blue", "red", "green"))

library(RColorBrewer)
display.brewer.all()

p + facet_grid(Measurement ~ .)
p + facet_grid(. ~ Measurement)
p + facet_grid(Experiment ~ Measurement)
p + facet_grid(Measurement ~ Experiment)

p + facet_grid(Measurement ~ Experiment) +
  scale_color_discrete(guide = "none") + scale_shape_discrete(guide = "none")

trees <- data.frame(x = rnorm(100), y = rnorm(100),
                    size = rnorm(100, mean = 5, sd = 2))
ants <- data.frame(a = rnorm(10000, sd = 0.4, mean = 1),
                   b = rnorm(10000, sd = 0.2, mean = -1))
p1 <- ggplot() +
  geom_point(data = ants, aes(x = a, y = b), color = "brown", size = 2, alpha = 0.01) +
  geom_point(data = trees, aes(x = x, y = y, size = size), color = "green", shape = 8)
print(p1)

p2 <- ggplot() +
  stat_density2d(data = ants, aes(x = a, y = b, fill = ..level..),
                 alpha = 0.5, geom = "polygon") +
  geom_point(data = trees, aes(x = x, y = y, size = size),
             color = "green", shape = 8) +
  scale_fill_gradient(low = "white", high = "brown") +
  scale_size_continuous(name = "Tree size")
print(p2)

pdf(file = "figures.pdf", paper = "letter")
print(p)
print(p1)
print(p2)
dev.off()

# Bonus code: how to display all the possible shapes
i <- 0:25; x <- i %/% 6; y <- i %% 6
df <- data.frame(name = i, row = x, column = y)
ggplot(df, aes(x = row, y = column)) +
  geom_point(shape = 0:25, fill = "yellow", size = 4) +
  geom_text(label = i, vjust = 2) +
  scale_y_reverse() +
  theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x = element_blank(),
         axis.title.y=element_blank(), axis.text.y=element_blank())

# Data wrangling
library(tidyverse) # using tidyr
set.seed(1)
mouse_weights_sim <- data.frame(
  time = seq(as.Date("2017/1/1"), by = "month", length.out = 12),
  mickey = rnorm(12, 20, 1),
  minnie = rnorm(12, 20, 2),
  mighty = rnorm(12, 20, 4)
)

mouse_weights <- gather(data = mouse_weights_sim,
          key = mouse, value = weight, mickey, minnie, mighty)
mouse_weights <- gather(data = mouse_weights_sim, 
          key = mouse, value = weight, -time)
mouse_weights <- gather(data = mouse_weights_sim,
          key = mouse, value = weight, mickey:mighty)

ggplot(mouse_weights, aes(x = mouse, y = weight)) +
   geom_boxplot(aes(fill = mouse))
ggplot(mouse_weights, aes(x = time, y = weight)) +
   geom_boxplot(aes(group = time))
ggplot(mouse_weights, aes(x = time, y = weight)) +
   geom_boxplot(aes(group = time)) + geom_point(aes(color = mouse))
ggplot(mouse_weights, aes(x = time, y = weight)) +
   geom_point(aes(color = mouse)) + geom_line(aes(group = mouse, color = mouse))

#rm(USPersonalExpenditure)
uspe_df <- as.data.frame(USPersonalExpenditure)
uspe_df$Category <- rownames(USPersonalExpenditure)
uspe <- gather(uspe_df, Year, Amount, -Category)

ggplot(uspe, aes(x = Year, y = Amount)) +
  geom_bar(stat = "identity", aes(fill = Category))

ggplot(uspe, aes(x = Year, y = Amount)) +
  geom_bar(stat = "identity", aes(fill = Category)) +
  theme(legend.justification = c(0,1), legend.position = c(0,1))

ggplot(uspe, aes(x = Year, y = Amount)) +
  geom_bar(stat = "identity", position = "dodge", aes(fill = Category)) +
  theme(legend.justification = c(0,1), legend.position = c(0,1))

spread(data = mouse_weights, key = mouse, value = weight)
spread(ablation, key = CellType, value = Score)
abl_united <- unite(ablation, ExptCell, Experiment, CellType, sep = ".")
spread(abl_united, ExptCell, Score)
separate(abl_united, ExptCell, c("Expt", "Cell"), sep = "\\.")

library(dplyr)
experiment_log <- data.frame(Experiment = c("E1909", "E1915", "E1921"),
                             Tech = c("Goneril", "Regan", "Cordelia"),
                             stringsAsFactors = TRUE)
str(experiment_log)
experiment_log
inner_join(ablation, experiment_log)

save(ablation, file = "ablation.Rdata")
load("ablation.Rdata")

head(select(msleep, name, sleep_total))
head(msleep[ , c("name", "sleep_total")])
class(msleep)
msleep %>% select(name, sleep_total) %>% head
msleep %>%
  select(name, sleep_total) %>%
  head
head(msleep[ , -1])
head(select(msleep, -name))
head(select(msleep, -c(name, sleep_total)))
msleep %>%
  select(-c(name, sleep_total)) %>%
  head

msleep %>%
  select(starts_with("sl")) %>%
  head
head(msleep[ , startsWith(names(msleep), "sl")])

msleep[msleep$sleep_total >= 16, ]
msleep %>%
  filter(sleep_total >= 16)

msleep %>%
  filter(order %in% c("Perissodactyla", "Primates"))
msleep[msleep$order %in% c("Perissodactyla", "Primates"), ]

msleep %>%
  filter(sleep_total >= 16, bodywt >= 1)
msleep %>%
  filter(sleep_total >= 16 & bodywt >= 1)
msleep[msleep$sleep_total >= 16 & msleep$bodywt >=1, ]

msleep %>% arrange(order) %>% head

msleep %>%
  arrange(desc(order)) %>%
  head

msleep %>%
  select(name, order, sleep_total) %>%
  arrange(order, sleep_total) %>%
  head

msleep %>%
  arrange(order, sleep_total) %>%
  select(name, order) %>%
  head

ToothGrowth %>%
  summarize(meanLen = mean(len))

ToothGrowth %>%
  group_by(supp) %>%
  summarize(meanLen = mean(len))

ToothGrowth %>%
  group_by(supp, dose) %>%
  summarize(meanLen = mean(len), n = n())

ToothGrowth %>%
  group_by(supp, dose) %>%
  mutate(norm.len = (len - mean(len))/sd(len), max = max(len)) %>%
  print(n = 60)

ablation %>%
  select(Time, Measurement, CellType, Score) %>%
  group_by(Time, Measurement, CellType) %>%
  summarize(mean_score = mean(Score)) %>%
  spread(CellType, mean_score)

ablation %>%
  select(Time, Measurement, CellType, Score) %>%
  group_by(Time, Measurement, CellType) %>%
  summarize(min = min(Score), max = max(Score))

ablation_mean_sd <- ablation %>%
  select(Time, Measurement, CellType, Score) %>%
  group_by(Time, Measurement, CellType) %>%
  summarize(mean = mean(Score), sd = sd(Score))

ggplot(ablation_mean_sd, aes(x = Time, y = mean)) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = 0.4) +
  facet_grid(Measurement ~ CellType) +
  geom_line() +
  geom_point(data = ablation, aes(y = Score), color = "pink", shape = 1) +
  labs(title = "+/- 1 SD")

ablation_mean_ci <- ablation %>%
  select(Time, Measurement, CellType, Score) %>%
  group_by(Time, Measurement, CellType) %>%
  summarize(mean = mean(Score),
            lower_limit = t.test(Score)$conf.int[1],
            upper_limit = t.test(Score)$conf.int[2])

ablation %>% mutate(rate = ifelse(Time > 0, Score / Time, NA))

ggplot(ablation_mean_sd, aes(x = Time, y = mean)) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = mean - sd,
                    ymax = mean + sd), width = 0.4) +
  facet_grid(Measurement ~ CellType) + geom_line() +
  geom_point(data = ablation %>%
                    group_by(Measurement, CellType, Time) %>%
                    mutate(outlier = abs((Score - mean(Score)) / sd(Score)) > 1),
             aes(y = Score, color = outlier), size = 4, shape = 1) +
  labs(title = "+/- 1 SD", y = "Mean") +
  scale_colour_discrete(name = "Outlier Status",
                        labels = c("Within 1 SD", "Outside 1 SD"))


# Complete example

library(tidyverse) # using ggplot2, dplyr, tidyr packages

analyze.all <- function(save_plots = TRUE) {

  # Load data
  ablation <- read.csv(file = "Ablation.csv",
                       header = TRUE,
                       stringsAsFactors = TRUE)
  names(ablation)[names(ablation) == "SCORE"] <- "Score"
  print(ablation)

  ablation_means <- ablation %>%
    group_by(CellType, Measurement, Time) %>%
    summarize(mean = mean(Score), n = n())
  print(ablation_means)

  # Set up plotting
  if (save_plots) {
    pdf(file = "plot.pdf")
  }

  # Plot all data
  g <- ggplot(ablation, aes(x = Time, y = Score)) +
    geom_point() +
    geom_line(aes(color = Experiment)) +
    facet_grid(Measurement ~ CellType) +
    theme_bw()
  print(g)

  # Plot averages over experiments
  g <- ggplot(ablation_means, aes(x = Time, y = mean)) +
    geom_point() +
    geom_line(aes(color = CellType)) +
    facet_wrap(~ Measurement) +
    theme_bw()
  print(g)

  # Separate plots of averages over experiments
  for (measurement in levels(ablation.means$Measurement)) {
    g <- ggplot(ablation_means %>%
         filter(Measurement == measurement), aes(x = Time, y = mean)) +
      geom_point() +
      geom_line(aes(color = CellType)) +
      labs(title = measurement) +
      theme_bw()
    print(g)
  }

  # Close plotting device
  if (save_plots) {
    dev.off()
  }
}

analyze.all(FALSE)