Setting up

# load required packages
library(tidyverse)
library(scales)
library(DT)

# load data on produced water spills, 2013-2022 from district office spill logs
tx_spills_district_prod_water <- read_csv("data/district_cleaned.csv")

Total produced water spilled

tx_spills_district_prod_water %>%
  summarize(spills = n(),
            released = round(sum(release_prod_water_edit, na.rm = TRUE)), 
            recovered = round(sum(recovery_prod_water_edit, na.rm = TRUE))) %>%
  mutate_all(., prettyNum, big.mark = ",") %>%
  datatable(colnames = c("Spills", "Gallons released", "Gallons recovered"))

Spills by year

spills_year = tx_spills_district_prod_water %>%
  group_by(year = year(date_of_spill_edit)) %>%
  summarize(spills = n(),
            released = round(sum(release_prod_water_edit, na.rm = TRUE)),
            recovered = round(sum(recovery_prod_water_edit, na.rm = TRUE)))

spills_year %>%
  mutate_at(c(2:4), prettyNum, big.mark = ",") %>%
  datatable(colnames = c("Year", "Spills", "Gallons released", "Gallons recovered"))
ggplot(spills_year, aes(x=year, y=released)) + 
  geom_col(fill = "red") +
  geom_hline(yintercept = 0, linewidth = 0.3) +
  scale_x_continuous(breaks = c(2014,2016,2018,2020,2022)) +
  scale_y_continuous(labels = comma) +
  xlab("") +
  ylab("") +
  theme_minimal() +
  ggtitle("Gallons of produced water spilled, statewide by year") +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        plot.title.position = "plot")

Spills by Railroad Commission district

spills_district <- tx_spills_district_prod_water %>%
  group_by(district_edit) %>%
  summarize(spills = n(),
            released = round(sum(release_prod_water_edit, na.rm = TRUE)),
            recovered = round(sum(recovery_prod_water_edit, na.rm = TRUE))) %>%
  arrange(-released)

spills_district %>%
  mutate_at(c(2:4), prettyNum, big.mark = ",") %>%
  datatable(colnames = c("District", "Spills", "Gallons released", "Gallons recovered"))
ggplot(spills_district, aes(x=reorder(district_edit,released), y=released)) +
  geom_col(fill = "red") +
  geom_hline(yintercept = 0, linewidth = 0.3) +
  scale_y_continuous(labels = comma) +
  xlab("") +
  ylab("") +
  theme_minimal() +
  ggtitle("Gallons of produced water spilled, by district") +
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        plot.title.position = "plot") +
  coord_flip()

Spills by county

spills_county <- tx_spills_district_prod_water %>%
  group_by(county_edit) %>%
  summarize(spills = n(),
            released = round(sum(release_prod_water_edit, na.rm = TRUE)),
            recovered = round(sum(recovery_prod_water_edit, na.rm = TRUE))) %>%
  arrange(-released)

spills_county %>%
  mutate_at(c(2:4), prettyNum, big.mark = ",") %>%
  datatable(colnames = c("County", "Spills", "Gallons released", "Gallons recovered"))
spills_county_top10 <- spills_county %>%
  slice_max(released, n = 10)

ggplot(spills_county_top10, aes(x=reorder(county_edit,released), y=released)) +
  geom_col(fill = "red") +
  geom_hline(yintercept = 0, linewidth = 0.3) +
  scale_y_continuous(labels = comma) +
  xlab("") +
  ylab("") +
  theme_minimal() +
  ggtitle("Gallons of produced water spilled, top 10 counties") +
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        plot.title.position = "plot") +
  coord_flip()