I have a code written in R that extracts data from an NC file to compute a time series for a specific location using latitude and longitude. However, the file only outputs three values instead of the entire date timeline. Why is this happening? Additionally, I received an error message.
`x` must not contain list or matrix columns:✖ invalid columns at index(s): 1
Run `rlang::last_trace()` to see where the error occurred.
Can you assist?
install.packages("ncdf4")
install.packages("tidyverse")
install.packages("lubridate")
library(ncdf4)
library(tidyverse)
library(lubridate)
# Step 1: Define the folder containing NetCDF files and output path
folder_path <- "C:/Users/WINDOWS 10/Downloads/MSWEP/Daily"
output_csv <- "C:/Users/WINDOWS 10/Downloads/Full_Precipitation_Timeseries2.csv"
# Define the target latitude and longitude
target_lat <- 14.8903
target_lon <- -19.2321
# Initialize an empty data frame to store the results
all_time_series <- data.frame(Date = as.Date(character()), Precipitation = numeric())
# Step 2: Process each NetCDF file in the folder
nc_files <- list.files(folder_path, pattern = "\\.nc$", full.names = TRUE)
for (file in nc_files) {
cat("\nProcessing file:", file, "\n")
# Open the NetCDF file
nc <- nc_open(file)
# Extract latitude, longitude, and time variables
latitudes <- ncvar_get(nc, "lat") # Adjust "lat" if variable name differs
longitudes <- ncvar_get(nc, "lon") # Adjust "lon" if variable name differs
time <- ncvar_get(nc, "time") # Adjust "time" if variable name differs
# Convert time to dates
time_units <- ncatt_get(nc, "time", "units")$value
cat("Time units:", time_units, "\n")
time_origin <- strsplit(time_units, "since ")[[1]][2]
if (!is.null(time_origin)) {
dates <- as.Date(time, origin = time_origin)
cat("Sample dates:", head(dates), "\n")
} else {
cat("Warning: Time origin not found in file:", file, "\n")
nc_close(nc)
next
}
# Enforce `dates` as a Date vector
dates <- as.Date(dates)
# Find the nearest grid point indices for target lat/lon
lat_idx <- which.min(abs(latitudes - target_lat))
lon_idx <- which.min(abs(longitudes - target_lon))
cat("Latitude index:", lat_idx, "Longitude index:", lon_idx, "\n")
# Extract precipitation data for all available time points
precip_subset <- ncvar_get(nc, "precipitation",
start = c(lon_idx, lat_idx, 1),
count = c(1, 1, -1)) # Adjust count if needed
# Ensure `precip_subset` is a numeric vector
precip_subset <- as.vector(precip_subset)
# Debugging: Check dimensions and content of precipitation data
cat("Length of precip_subset:", length(precip_subset), "\n")
if (length(precip_subset) > 0) {
cat("Sample precipitation data:", head(precip_subset), "\n")
} else {
cat("Warning: Empty precipitation data for file:", file, "\n")
nc_close(nc)
next
}
# Combine dates and precipitation into a data frame
file_time_series <- data.frame(Date = dates, Precipitation = precip_subset)
# Ensure Date is consistently a Date type
file_time_series$Date <- as.Date(file_time_series$Date)
# Append to the overall time series
all_time_series <- bind_rows(all_time_series, file_time_series)
# Close the NetCDF file
nc_close(nc)
}
# Debugging: Check the final combined time series
cat("\nFinal time series preview:\n")
print(head(all_time_series))
# Step 3: Save the combined time series to a CSV file
if (nrow(all_time_series) > 0) {
write_csv(all_time_series, output_csv)
cat("Precipitation time series saved to:", output_csv, "\n")
} else {
cat("No valid data extracted. CSV file was not created.\n")
}