[docs]defdata_dict():"""Creates a `dict` for all data buckets and the tar file they map to. To create tar files and follow any sym links, run: `tar -chvzf maf_may_2021.tgz maf` Returns ------- result : `dict` Data bucket filenames dictionary with keys/values: "name" - Data bucket name (`str`). "version" - Versioned file name (`str`). """file_dict={"scheduler":"scheduler_2023_10_16.tgz","site_models":"site_models_2023_10_02.tgz","skybrightness_pre":"skybrightness_pre_2024_11_19.tgz","utils":"utils_2023_11_02.tgz",}returnfile_dict
[docs]defscheduler_download_data(file_dict=None):"""Download data."""iffile_dictisNone:file_dict=data_dict()parser=argparse.ArgumentParser(description="Download data files for rubin_sim package")parser.add_argument("--versions",dest="versions",default=False,action="store_true",help="Report expected versions, then quit",)parser.add_argument("--update",dest="update",default=False,action="store_true",help="Update versions of data on disk to match current",)parser.add_argument("-d","--dirs",type=str,default=None,help="Comma-separated list of directories to download",)parser.add_argument("-f","--force",dest="force",default=False,action="store_true",help="Force re-download of data directory(ies)",)parser.add_argument("--url_base",type=str,default=DEFAULT_DATA_URL,help="Root URL of download location",)parser.add_argument("--tdqm_disable",dest="tdqm_disable",default=False,action="store_true",help="Turn off tdqm progress bar",)args=parser.parse_args()download_rubin_data(data_dict(),dirs=args.dirs,print_versions_only=args.versions,update=args.update,force=args.force,url_base=args.url_base,tdqm_disable=args.tdqm_disable,)
[docs]defdownload_rubin_data(file_dict,dirs=None,print_versions_only=False,update=False,force=False,url_base=DEFAULT_DATA_URL,tdqm_disable=False,):"""Download external data blobs Parameters ---------- file_dict : `dict` A dict with keys of directory names and values of remote filenames. dirs : `list` [`str`] List of directories to download. Default (None) assumes they are in file_dict versions : `bool` If True, print the versions currently on disk. Default False. update : `bool` If True, update versions on disk to match expected 'current'. Default False. force : `bool` If True, replace versions on disk with new download. Default False. url_base : `str` The URL to use, default to DEFAULT_DATA_URL tdqm_disable : `bool` If True, disable the tdqm progress bar. Default False. """# file_dict = dictionary of current versionsifdirsisNone:dirs=file_dict.keys()else:dirs=dirs.split(",")# Figure out where the rubin_sim_data is or is goingdata_dir=get_data_dir()ifnotos.path.isdir(data_dir):os.mkdir(data_dir)# Get dictionary of versions which are available on-diskversions=data_versions()ifversionsisNone:versions={}# ONLY check versions and return exit codeifprint_versions_only:print("Versions on disk currently // versions expected for this release:")mismatch_dict={}match=Trueforkinfile_dict:print(f"{k} : {versions.get(k,'')} // {file_dict[k]}")ifversions.get(k,"")!=file_dict[k]:match=Falsemismatch_dict[k]=Falseifmatch:print("Versions are in sync")return0else:print("Versions do not match. ")print(f"{','.join([kforkinmismatch_dict])} are not matching.")return1version_file=os.path.join(data_dir,"versions.txt")# See if base URL is aliveurl_base=url_basefail_message=f"Could not connect to {url_base}. Check site is up?"try:r=requests.get(url_base)exceptConnectionError:print(fail_message)exit()ifr.status_code!=requests.codes.ok:print(fail_message)exit()# Now do downloading for "dirs"forkeyindirs:filename=file_dict[key]path=os.path.join(data_dir,key)# Do some thinking to see if we should download new data for keydownload_this_dir=Trueifos.path.isdir(path):ifforce:# Remove and update, regardlessrmtree(path)warnings.warn("Removed existing directory %s, downloading new copy"%path)elifnotupdate:# Just see if it exists on-disk and keep it if it doeswarnings.warn("Directory %s already exists, skipping download"%path)download_this_dir=Falseelse:# Update only if necessaryifversions.get(key,"")==file_dict[key]:download_this_dir=Falseelse:rmtree(path)warnings.warn("Removed existing directory %s, downloading updated version"%path)ifdownload_this_dir:# Download fileurl=url_base+filenameprint("Downloading file: %s"%url)# Stream and write in chunks (avoid large memory usage)r=requests.get(url,stream=True)file_size=int(r.headers.get("Content-Length",0))iffile_size<245:warnings.warn(f"{url} file size unexpectedly small.")# Download this size chunk at a time; reasonable guessblock_size=512*512*10progress_bar=tqdm(total=file_size,unit="iB",unit_scale=True,disable=tdqm_disable)print(f"Writing to {os.path.join(data_dir,filename)}")withopen(os.path.join(data_dir,filename),"wb")asf:forchunkinr.iter_content(chunk_size=block_size):progress_bar.update(len(chunk))f.write(chunk)progress_bar.close()# untar in placeunpack_archive(os.path.join(data_dir,filename),data_dir)os.remove(os.path.join(data_dir,filename))versions[key]=file_dict[key]# Write out the new version info to the data directorywithopen(version_file,"w")asf:forkeyinversions:print(key+","+versions[key],file=f)# Write a little table to stdoutnew_versions=data_versions()print("Current/updated data versions:")forkinnew_versions:iflen(k)<=10:sep="\t\t"else:sep="\t"print(f"{k}{sep}{new_versions[k]}")