dl_reduce.sh (Bash) - myCompiler

Fork
# 4 steps = 1) download the file 2) extract ONLY lat, long, area 3) add in a csv file 4) remove the original file to avoid overloading 
# warning : this code is entirely AI-generated (Claude) + asked it to explain each line from the code -> my comments from what i understood from Claude

# ensures that the rest of the script will be executed by bash 
#!/bin/bash  

# create the box to list the urls to be downloaded
urls="$1"

# create the box dest = name of the file where the temporary files will be downloaded
dest="./tmp_gpkg"

# create the box output = name of the file where the results will be accumulated
output="tree_count_reduced.csv"

# create a new file via the command mkdir / "$" means = the value which is in the box dest = tmp_gpkg
mkdir -p "$dest" # the -p part ensures that if the file already exists, then it avoids crashing 

# create a temporary file in a cookiejar (?) to memorize the connexion to Earthdata during the downloading
#mktemp is a function to create temporary file 
cookiejar=$(mktemp)

# when the script is ended, automatically suppress the temporary file of connection
trap 'rm -f "$cookiejar"' EXIT

# count the nb of lines within the url lists + store this number to see the number of lines with content
total=$(grep -c . "$urls")

# create a new indice + set it to zero 
i=0

# LOOP : 
while read -r line ;do # until adresses are not all read, call it line + do 

# choice : ignore the empty lines so ignore the empty urls 
[ -z "$line" ] && continue # [ -z "$line" ] asks "is the line empty or not?" && = if yes, then skip to the nex adress 

# add 1 to the i-th indice
i=$((i+1))

# remove all the beginning of the url to only keep the name of the file
fname="${line##*/}" # keep everything after the last / = stored within the box fname 

# visualize the progression = see where we're at so the i-th file within the n total files while the script works 
echo "[$i/$total] $fname"

# download the i-th url at the adress line + download it in the temporary file
# if the download fails, then stores the failing adress in a text failed.txt to keep its trace 
if ! curl -f -b "$cookiejar" -c "$cookiejar" -L --netrc -C - -o "$dest/$fname" -- "$line"; then
echo "ÉCHEC : $line" >> failed.txt # >> means "do not erase what what before in the failed.txt file"
rm -f "$dest/$fname" # then remove the file from the memory

continue # then continue (no kidding)

fi # end of the "what if the download fails", moves on to the rest of the loop

#rest of the loop = open R + extract longitutde, latitude and surface + add them to the csv
# here : only indicates 1) which file within the dest box use 2) where to put the output 
Rscript reduce_one.R "$dest/$fname" "$output"

# then erases the original downloaded file
rm -f "$dest/$fname"

# then end of the loop
done < "$urls" # loop on the urls 

# when it's done, display a message basically saying "it's done (yay), go check the output file "
echo "Done -> $output"
Fork
Embed on website