id submission_type answer
tutorial-id none data-import
name question Muhammad Mudassar
email question mmahfoozpk89@gmail.com
reading-data-from-a-file-1 question library(readr)
reading-data-from-a-file-2 exercise read_csv("data/students.csv")
reading-data-from-a-file-3 exercise students <- read_csv("data/students.csv")
reading-data-from-a-file-4 exercise students
reading-data-from-a-file-5 exercise students <- read_csv("data/students.csv", na= c("N/A", ""))
reading-data-from-a-file-6 exercise students |> rename(student_id = "Student ID")
reading-data-from-a-file-7 exercise library(janitor)
reading-data-from-a-file-8 exercise students|> clean_names()
reading-data-from-a-file-9 exercise students|> clean_names() |> mutate(meal_plan = factor(meal_plan))
reading-data-from-a-file-10 exercise students|> clean_names() |> mutate(meal_plan = factor(meal_plan)) |> mutate(age = if_else(age == "five", "5", age))
reading-data-from-a-file-11 exercise students|> clean_names() |> mutate(meal_plan = factor(meal_plan), age = if_else(age == "five", "5", age), age = parse_number(age))
reading-data-from-a-file-12 exercise read_csv("data/test_1.csv")
reading-data-from-a-file-13 exercise read_csv("data/test_1.csv", show_col_types= FALSE)
reading-data-from-a-file-14 exercise read_csv("data/test_2.csv", skip = 2)
reading-data-from-a-file-15 exercise read_csv("data/test_3.csv", col_names = FALSE)
reading-data-from-a-file-16 exercise read_csv("data/test_3.csv", col_names = c("a", "b", "c"))
reading-data-from-a-file-17 exercise read_csv("data/test_3.csv", col_names = c("a", "b", "c"), col_types = cols(a = col_double(), b = col_double(), c = col_double()))
reading-data-from-a-file-18 exercise read_csv("data/test_5.csv", na = ".")
reading-data-from-a-file-19 exercise read_csv("data/test_6.csv", comment = "#")
reading-data-from-a-file-20 exercise read_csv("data/test_7.csv", cols(grade = col_integer(), student = col_character()))
reading-data-from-a-file-21 exercise read_csv("data/test_bad_names.csv", name_repair= "universal")
reading-data-from-a-file-22 exercise read_csv("data/test_bad_names.csv") |> clean_names()
reading-data-from-a-file-23 exercise read_csv("data/test_bad_names.csv", name_repair= janitor::make_clean_names)
reading-data-from-a-file-24 exercise read_delim("data/delim_1.txt")
reading-data-from-a-file-25 exercise read_delim("data/delim_2.txt", comment = "##", col_types = cols( date = col_date(), population = col_integer(), town = col_character() ) )
controlling-column-types-1 exercise read_csv(" a, b, c 1, 2, 3")
controlling-column-types-2 exercise read_csv(" logical,numeric,date,string TRUE,1,2021-01-15,abc false,4.5,2021-02-15,def T,Inf,2021-02-16,ghi ")
controlling-column-types-3 exercise simple_csv <- " x 10 . 20 30" read_csv(simple_csv)
controlling-column-types-4 exercise read_csv(simple_csv, col_type = list (x= col_double()))
controlling-column-types-5 exercise df <- read_csv(simple_csv, col_type = list (x= col_double())) problems(df)
controlling-column-types-6 exercise read_csv(simple_csv, na= ".")
controlling-column-types-7 exercise read_csv(another_csv, col_type = cols(.default = col_character()))
controlling-column-types-8 exercise read_csv(another_csv, col_type = cols_only(y = col_character()))
controlling-column-types-9 exercise read_csv("data/ex_2.csv")
controlling-column-types-10 exercise read_csv("data/ex_2.csv", col_types= cols(.default= col_character()))
controlling-column-types-11 exercise read_csv("data/ex_2.csv", col_types= cols(.default= col_character())) |> mutate(parse_integer(a))
controlling-column-types-12 exercise read_csv("data/ex_2.csv", col_types= cols(.default= col_character())) |> mutate(parse_integer(a), parse_date(b, format= "%Y%M%D"))
controlling-column-types-13 exercise read_csv("data/ex_3.csv")
controlling-column-types-14 exercise read_csv("data/ex_3.csv")|> mutate(x= parse_date(x, "%d %B %Y"))
controlling-column-types-15 exercise read_csv("data/ex_3.csv")|> mutate(x= parse_date(x, "%d %B %Y")) |> mutate(z = parse_number(z))
reading-data-from-multiple-fil-1 exercise list.files("data")
reading-data-from-multiple-fil-2 exercise list.files("data", pattern = "similar")
reading-data-from-multiple-fil-3 exercise list.files("data", pattern = "similar", full.names = TRUE)
reading-data-from-multiple-fil-4 exercise library(tidyverse) combined_data <- list.files("data", pattern = "similar", full.names = TRUE) |> map_dfr(~ read_csv(.x, col_types = cols(.default = "c"))) |> filter(!(a == "a" & b == "b" & c == "c"))
reading-data-from-multiple-fil-5 exercise library(tidyverse) combined_data <- list.files("data", pattern = "similar", full.names = TRUE) |> map_dfr(~ read_csv(.x, na = ".", col_types = cols(.default = "c"))) |> filter(!(a == "a" & b == "b" & c == "c"))
reading-data-from-multiple-fil-6 exercise library(tidyverse) combined_data <- list.files("data", pattern = "similar", full.names = TRUE) |> map_dfr(~ read_csv(.x, na = ".", col_types = cols(.default = "c"))) |> filter(!(a == "a" & b == "b" & c == "c"))
reading-data-from-multiple-fil-7 exercise library(tidyverse) # Combine all 'similar' CSV files into one data frame combined_data <- list.files("data", pattern = "similar", full.names = TRUE) |> map_dfr(~ read_csv(.x, na = ".", col_types = cols(.default = "c"))) |> filter(!(a == "a" & b == "b" & c == "c")) # List all files in the 'data' directory that contain 'sales' in the filename list.files(path = "data", pattern = "sales")
reading-data-from-multiple-fil-8 exercise library(tidyverse) sales_data <- list.files(path = "data", pattern = "sales", full.names = TRUE) |> map_dfr(read_csv)
reading-data-from-multiple-fil-9 exercise library(tidyverse) sales_data <- list.files(path = "data", pattern = "sales", full.names = TRUE) |> map_dfr(~ read_csv(.x, id = "file"))
writing-to-a-file-1 exercise students2 <- students |> clean_names() |> mutate( meal_plan = factor(meal_plan), age = if_else(age == "five", "5", age), age = parse_number(age) ) students2
writing-to-a-file-2 exercise students2
writing-to-a-file-3 exercise write_csv(x = students2, file= "data/students2.csv")
writing-to-a-file-4 exercise read_csv("data/students2.csv")
writing-to-a-file-5 exercise iris_p <- iris |> ggplot(aes(color = red, x = Sepal.Length, y = Sepal.Width)) + geom_jitter() + labs(title = "Sepal Dimensions of Various Species of Iris", x = "Sepal Length", y = "Sepal Width")
writing-to-a-file-6 exercise list.files("data")
writing-to-a-file-7 exercise read_rds(file = "data/test_1.rds")
writing-to-a-file-8 exercise write_rds(mtcars, "test_2.rds")
writing-to-a-file-9 exercise list.files("data")
writing-to-a-file-10 exercise read_rds(file ="data/test_2.rds")
writing-to-a-file-11 question What is Apache Arrow? Apache Arrow is a multi-language toolbox for building high performance applications that process and transport large data sets. It is designed to both improve the performance of analytical algorithms and the efficiency of moving data from one system (or programming language to another). A critical component of Apache Arrow is its in-memory columnar format, a standardized, language-agnostic specification for representing structured, table-like datasets in-memory. This data format has a rich data type system (included nested and user-defined data types) designed to support the needs of analytic database systems, data frame libraries, and more. The project also contains implementations of the Arrow columnar format in many languages, along with utilities for reading and writing it to many common storage formats. These official libraries enable third-party projects to work with Arrow data without having to implement the Arrow columnar format themselves. For those that want to implement a small subset of the format, the Arrow project contains some tools, such as a C data interface, to assist with interoperability with the official Arrow libraries. The Arrow libraries contain many software components that assist with systems problems related to getting data in and out of remote storage systems and moving Arrow-formatted data over network interfaces. Some of these components can be used even in scenarios where the columnar format is not used at all. Lastly, alongside software that helps with data access and IO-related issues, there are libraries of algorithms for performing analytical operations or queries against Arrow datasets.
data-entry-1 exercise tibble(x =c(1, 2, 5), y = c("h", "m", "g"), z = c(0.08, 0.83, 0.60))
data-entry-2 exercise tribble( ~x, ~y, ~z, 1, "h", 0.08, 2, "m", 0.83, 5, "g", 0.60 )
minutes question 160