This shows you the differences between two versions of the page.
| Both sides previous revision Previous revision Next revision | Previous revision | ||
|
data:data_analysis_manual:read_catalog_python [2021/11/12 11:14] eric buchlin How to use the catalogue |
data:data_analysis_manual:read_catalog_python [2024/03/29 14:11] (current) eric buchlin Page is deprecated |
||
|---|---|---|---|
| Line 1: | Line 1: | ||
| - | ====== How to read the UiO FITS files catalog in Python ====== | + | ====== How to read and use the UiO FITS files catalog in Python ====== |
| - | <file python read_uio_cat.py> | + | <note warning>These scripts are deprecated in favor of the current version, available in the [[https://sospice.readthedocs.io/en/stable/|sospice Python module]].</note> |
| + | |||
| + | ===== CSV catalog (new, recommended) ===== | ||
| + | |||
| + | <file python read_uio_cat_csv.py> | ||
| from pathlib import Path | from pathlib import Path | ||
| import pandas as pd | import pandas as pd | ||
| Line 7: | Line 11: | ||
| # SPICE data tree path, to be changed to your SPICE data mirror | # SPICE data tree path, to be changed to your SPICE data mirror | ||
| data_path = "/archive/SOLAR-ORBITER/SPICE" # example for IAS computing servers | data_path = "/archive/SOLAR-ORBITER/SPICE" # example for IAS computing servers | ||
| + | |||
| + | |||
| + | def date_parser(string): | ||
| + | try: | ||
| + | return pd.Timestamp(string) | ||
| + | except ValueError: | ||
| + | return pd.NaT | ||
| + | |||
| def read_uio_cat(): | def read_uio_cat(): | ||
| """ | """ | ||
| - | Read UiO text table SPICE FITS files catalog | + | Read UiO SPICE FITS files CSV catalog |
| - | http://astro-sdc-db.uio.no/vol/spice/fits/spice_catalog.txt | + | http://astro-sdc-db.uio.no/vol/spice/fits/spice_catalog.csv |
| Return | Return | ||
| Line 17: | Line 29: | ||
| pandas.DataFrame | pandas.DataFrame | ||
| Table | Table | ||
| + | """ | ||
| + | cat_file = Path(data_path) / "fits" / "spice_catalog.csv" | ||
| + | if not cat_file.exists(): | ||
| + | print(f'Error: Catalog file not available at {cat_file.as_posix()}') | ||
| + | sys.exit(1) | ||
| + | date_columns = ['DATE-BEG','DATE', 'TIMAQUTC'] | ||
| + | df = pd.read_csv(cat_file, parse_dates=date_columns, date_parser=date_parser) | ||
| + | return df | ||
| + | </file> | ||
| - | Example queries that can be done on the result: | + | The same applies for the catalog included in the data releases (here: release 2.0), which can simply be read by: |
| - | * `df[(df.LEVEL == "L2") & (df["DATE-BEG"] >= "2020-11-17") & (df["DATE-BEG"] < "2020-11-18") & (df.XPOSURE > 60.)]` | + | <file python read_release_cat.py> |
| - | * `df[(df.LEVEL == "L2") & (df.STUDYDES == "Standard dark for cruise phase")]` | + | import pandas as pd |
| + | |||
| + | def date_parser(string): | ||
| + | try: | ||
| + | return pd.Timestamp(string) | ||
| + | except ValueError: | ||
| + | return pd.NaT | ||
| + | |||
| + | date_columns = ['DATE-BEG','DATE', 'TIMAQUTC'] | ||
| + | cat = pd.read_csv( | ||
| + | 'https://spice.osups.universite-paris-saclay.fr/spice-data/release-2.0/catalog.csv', | ||
| + | date_parser=date_parser, | ||
| + | parse_dates=date_columns | ||
| + | ) | ||
| + | # TODO interpret the JSON included in columns `proc_steps` and `windows`. | ||
| + | </file> | ||
| + | |||
| + | |||
| + | ===== Text catalog ===== | ||
| + | |||
| + | <file python read_uio_cat_txt.py> | ||
| + | from pathlib import Path | ||
| + | import pandas as pd | ||
| + | |||
| + | # SPICE data tree path, to be changed to your SPICE data mirror | ||
| + | data_path = "/archive/SOLAR-ORBITER/SPICE" # example for IAS computing servers | ||
| + | |||
| + | |||
| + | def date_parser(string): | ||
| + | try: | ||
| + | return pd.Timestamp(string) | ||
| + | except ValueError: | ||
| + | return pd.NaT | ||
| + | |||
| + | |||
| + | def read_uio_cat(): | ||
| + | """ | ||
| + | Read UiO text table SPICE FITS files catalog | ||
| + | http://astro-sdc-db.uio.no/vol/spice/fits/spice_catalog.txt | ||
| + | |||
| + | Return | ||
| + | ------ | ||
| + | pandas.DataFrame | ||
| + | Table | ||
| """ | """ | ||
| cat_file = Path(data_path) / "fits" / "spice_catalog.txt" | cat_file = Path(data_path) / "fits" / "spice_catalog.txt" | ||
| + | if not cat_file.exists(): | ||
| + | print(f'Error: Catalog file not available at {cat_file.as_posix()}') | ||
| + | sys.exit(1) | ||
| columns = list(pd.read_csv(cat_file, nrows=0).keys()) | columns = list(pd.read_csv(cat_file, nrows=0).keys()) | ||
| date_columns = ['DATE-BEG','DATE', 'TIMAQUTC'] | date_columns = ['DATE-BEG','DATE', 'TIMAQUTC'] | ||
| - | df = pd.read_table(cat_file, skiprows=1, names=columns, na_values="MISSING", | + | df = pd.read_table(cat_file, skiprows=1, names=columns, |
| - | parse_dates=date_columns, warn_bad_lines=True) | + | parse_dates=date_columns, date_parser=date_parser, |
| - | df.LEVEL = df.LEVEL.apply(lambda string: string.strip()) | + | low_memory=False) |
| - | df.STUDYTYP = df.STUDYTYP.apply(lambda string: string.strip()) | + | |
| return df | return df | ||
| </file> | </file> | ||
| - | ''na_values="MISSING"'' replaces the string "MISSING" by NaNs, it can be removed. | + | |
| + | ===== Using the catalog ===== | ||
| Then we can read the catalog and filter it: | Then we can read the catalog and filter it: | ||