Source code for grond.dataset

import glob
import copy
import os.path as op
import logging
import math
import numpy as num

from collections import defaultdict
from pyrocko import util, pile, model, config, trace, \
    marker as pmarker
from pyrocko.io.io_common import FileLoadError
from pyrocko.fdsn import enhanced_sacpz, station as fs
from pyrocko.guts import (Object, Tuple, String, Float, List, Bool, dump_all,
                          load_all)

from pyrocko import gf

from .meta import Path, HasPaths, expand_template, GrondError

from .synthetic_tests import SyntheticTest

guts_prefix = 'grond'
logger = logging.getLogger('grond.dataset')


def quote_paths(paths):
    return ', '.join('"%s"' % path for path in paths)


[docs]class InvalidObject(Exception): pass
[docs]class NotFound(Exception): def __init__(self, reason, codes=None, time_range=None): self.reason = reason self.time_range = time_range self.codes = codes def __str__(self): s = self.reason if self.codes: s += ' (%s)' % '.'.join(self.codes) if self.time_range: s += ' (%s - %s)' % ( util.time_to_str(self.time_range[0]), util.time_to_str(self.time_range[1])) return s
[docs]class DatasetError(GrondError): pass
[docs]class StationCorrection(Object): codes = Tuple.T(4, String.T()) delay = Float.T() factor = Float.T()
def load_station_corrections(filename): scs = load_all(filename=filename) for sc in scs: assert isinstance(sc, StationCorrection) return scs def dump_station_corrections(station_corrections, filename): return dump_all(station_corrections, filename=filename) class Dataset(object): def __init__(self, event_name=None): self.events = [] self.pile = pile.Pile() self.stations = {} self.responses = defaultdict(list) self.responses_stationxml = [] self.clippings = {} self.blacklist = set() self.whitelist_nslc = None self.whitelist_nsl_xx = None self.whitelist = None self.station_corrections = {} self.station_factors = {} self.pick_markers = [] self.apply_correction_delays = True self.apply_correction_factors = True self.apply_displaced_sampling_workaround = False self.extend_incomplete = False self.clip_handling = 'by_nsl' self.kite_scenes = [] self.gnss_campaigns = [] self.synthetic_test = None self._picks = None self._cache = {} self._event_name = event_name def empty_cache(self): self._cache = {} def set_synthetic_test(self, synthetic_test): self.synthetic_test = synthetic_test def add_stations( self, stations=None, pyrocko_stations_filename=None, stationxml_filenames=None): if stations is not None: for station in stations: self.stations[station.nsl()] = station if pyrocko_stations_filename is not None: logger.debug( 'Loading stations from file "%s"...' % pyrocko_stations_filename) for station in model.load_stations(pyrocko_stations_filename): self.stations[station.nsl()] = station if stationxml_filenames is not None and len(stationxml_filenames) > 0: for stationxml_filename in stationxml_filenames: if not op.exists(stationxml_filename): continue logger.debug( 'Loading stations from StationXML file "%s"...' % stationxml_filename) sx = fs.load_xml(filename=stationxml_filename) ev = self.get_event() stations = sx.get_pyrocko_stations(time=ev.time) if len(stations) == 0: logger.warning( 'No stations found for time %s in file "%s".' % ( util.time_to_str(ev.time), stationxml_filename)) for station in stations: logger.debug('Adding station: %s.%s.%s' % station.nsl()) channels = station.get_channels() if len(channels) == 1 and channels[0].name.endswith('Z'): logger.warning( 'Station "%s" has vertical component' ' information only, adding mocked channels.' % station.nsl_string()) station.add_channel( model.Channel(channels[0].name[:-1] + 'N')) station.add_channel( model.Channel(channels[0].name[:-1] + 'E')) self.stations[station.nsl()] = station def add_events(self, events=None, filename=None): if events is not None: self.events.extend(events) if filename is not None: logger.debug('Loading events from file "%s"...' % filename) try: events = model.load_events(filename) self.events.extend(events) logger.info( 'Loading events from %s: %i events found.' % (filename, len(events))) except Exception as e: logger.warning('Could not load events from %s!', filename) raise e def add_waveforms(self, paths, regex=None, fileformat='detect', show_progress=False): cachedirname = config.config().cache_dir logger.debug('Selecting waveform files %s...' % quote_paths(paths)) fns = util.select_files(paths, include=regex, show_progress=show_progress) cache = pile.get_cache(cachedirname) logger.debug('Scanning waveform files %s...' % quote_paths(paths)) self.pile.load_files(sorted(fns), cache=cache, fileformat=fileformat, show_progress=show_progress) def add_responses(self, sacpz_dirname=None, stationxml_filenames=None): if sacpz_dirname: logger.debug( 'Loading SAC PZ responses from "%s"...' % sacpz_dirname) for x in enhanced_sacpz.iload_dirname(sacpz_dirname): self.responses[x.codes].append(x) if stationxml_filenames: for stationxml_filename in stationxml_filenames: if not op.exists(stationxml_filename): continue logger.debug( 'Loading StationXML responses from "%s"...' % stationxml_filename) self.responses_stationxml.append( fs.load_xml(filename=stationxml_filename)) def add_clippings(self, markers_filename): markers = pmarker.load_markers(markers_filename) clippings = {} for marker in markers: nslc = marker.one_nslc() nsl = nslc[:3] if nsl not in clippings: clippings[nsl] = [] if nslc not in clippings: clippings[nslc] = [] clippings[nsl].append(marker.tmin) clippings[nslc].append(marker.tmin) for k, times in clippings.items(): atimes = num.array(times, dtype=float) if k not in self.clippings: self.clippings[k] = atimes else: self.clippings[k] = num.concatenate(self.clippings, atimes) def add_blacklist(self, blacklist=[], filenames=None): logger.debug('Loading blacklisted stations...') if filenames: blacklist = list(blacklist) for filename in filenames: if op.exists(filename): with open(filename, 'r') as f: blacklist.extend( s.strip() for s in f.read().splitlines()) else: logger.warning('No such blacklist file: %s' % filename) for x in blacklist: if isinstance(x, str): x = tuple(x.split('.')) self.blacklist.add(x) def add_whitelist(self, whitelist=[], filenames=None): logger.debug('Loading whitelisted stations...') if filenames: whitelist = list(whitelist) for filename in filenames: with open(filename, 'r') as f: whitelist.extend(s.strip() for s in f.read().splitlines()) if self.whitelist_nslc is None: self.whitelist_nslc = set() self.whitelist = set() self.whitelist_nsl_xx = set() for x in whitelist: if isinstance(x, str): x = tuple(x.split('.')) if len(x) == 4: self.whitelist_nslc.add(x) self.whitelist_nsl_xx.add(x[:3]) else: self.whitelist.add(x) def add_station_corrections(self, filename): self.station_corrections.update( (sc.codes, sc) for sc in load_station_corrections(filename)) def add_picks(self, filename): self.pick_markers.extend( pmarker.load_markers(filename)) self._picks = None def add_gnss_campaigns(self, paths): paths = util.select_files( paths, include=r'\.yml|\.yaml', show_progress=False) for path in paths: self.add_gnss_campaign(filename=path) def add_gnss_campaign(self, filename): try: from pyrocko.model import gnss # noqa except ImportError: raise ImportError('Module pyrocko.model.gnss not found,' ' please upgrade pyrocko!') logger.debug('Loading GNSS campaign from "%s"...' % filename) campaign = load_all(filename=filename) self.gnss_campaigns.append(campaign[0]) def add_kite_scenes(self, paths): logger.info('Loading kite InSAR scenes...') paths = util.select_files( paths, include=r'\.npz', show_progress=False) for path in paths: self.add_kite_scene(filename=path) if not self.kite_scenes: logger.warning('Could not find any kite scenes at %s.' % quote_paths(self.kite_scene_paths)) def add_kite_scene(self, filename): try: from kite import Scene except ImportError: raise ImportError('Module kite could not be imported,' ' please install from https://pyrocko.org.') logger.debug('Loading kite scene from "%s"...' % filename) scene = Scene.load(filename) scene._log.setLevel(logger.level) try: self.get_kite_scene(scene.meta.scene_id) except NotFound: self.kite_scenes.append(scene) else: raise AttributeError('Kite scene_id not unique for "%s".' % filename) def is_blacklisted(self, obj): try: nslc = self.get_nslc(obj) if nslc in self.blacklist: return True except InvalidObject: pass nsl = self.get_nsl(obj) return ( nsl in self.blacklist or nsl[1:2] in self.blacklist or nsl[:2] in self.blacklist) def is_whitelisted(self, obj): if self.whitelist_nslc is None: return True nsl = self.get_nsl(obj) if ( nsl in self.whitelist or nsl[1:2] in self.whitelist or nsl[:2] in self.whitelist): return True try: nslc = self.get_nslc(obj) if nslc in self.whitelist_nslc: return True except InvalidObject: return nsl in self.whitelist_nsl_xx def has_clipping(self, nsl_or_nslc, tmin, tmax): if nsl_or_nslc not in self.clippings: return False atimes = self.clippings[nsl_or_nslc] return num.any(num.logical_and(tmin < atimes, atimes <= tmax)) def get_nsl(self, obj): if isinstance(obj, trace.Trace): net, sta, loc, _ = obj.nslc_id elif isinstance(obj, model.Station): net, sta, loc = obj.nsl() elif isinstance(obj, gf.Target): net, sta, loc, _ = obj.codes elif isinstance(obj, tuple) and len(obj) in (3, 4): net, sta, loc = obj[:3] else: raise InvalidObject( 'Cannot get nsl code from given object of type "%s".' % type(obj)) return net, sta, loc def get_nslc(self, obj): if isinstance(obj, trace.Trace): return obj.nslc_id elif isinstance(obj, gf.Target): return obj.codes elif isinstance(obj, tuple) and len(obj) == 4: return obj else: raise InvalidObject( 'Cannot get nslc code from given object "%s"' % type(obj)) def get_tmin_tmax(self, obj): if isinstance(obj, trace.Trace): return obj.tmin, obj.tmax else: raise InvalidObject( 'Cannot get tmin and tmax from given object of type "%s"' % type(obj)) def get_station(self, obj): if self.is_blacklisted(obj): raise NotFound('Station is blacklisted:', self.get_nsl(obj)) if not self.is_whitelisted(obj): raise NotFound('Station is not on whitelist:', self.get_nsl(obj)) if isinstance(obj, model.Station): return obj net, sta, loc = self.get_nsl(obj) keys = [(net, sta, loc), (net, sta, ''), ('', sta, '')] for k in keys: if k in self.stations: return self.stations[k] raise NotFound('No station information:', keys) def get_stations(self): return [self.stations[k] for k in sorted(self.stations) if not self.is_blacklisted(self.stations[k]) and self.is_whitelisted(self.stations[k])] def get_kite_scenes(self): return self.kite_scenes def get_kite_scene(self, scene_id=None): if scene_id is None: if len(self.kite_scenes) == 0: raise AttributeError('No kite displacements defined.') return self.kite_scenes[0] else: for scene in self.kite_scenes: if scene.meta.scene_id == scene_id: return scene raise NotFound('No kite scene with id "%s" defined.' % scene_id) def get_gnss_campaigns(self): return self.gnss_campaigns def get_gnss_campaign(self, name): for camp in self.gnss_campaigns: if camp.name == name: return camp raise NotFound('GNSS campaign %s not found!' % name) def get_response(self, obj, quantity='displacement'): if (self.responses is None or len(self.responses) == 0) \ and (self.responses_stationxml is None or len(self.responses_stationxml) == 0): raise NotFound('No response information available.') quantity_to_unit = { 'displacement': 'M', 'velocity': 'M/S', 'acceleration': 'M/S**2'} if self.is_blacklisted(obj): raise NotFound('Response is blacklisted:', self.get_nslc(obj)) if not self.is_whitelisted(obj): raise NotFound('Response is not on whitelist:', self.get_nslc(obj)) net, sta, loc, cha = self.get_nslc(obj) tmin, tmax = self.get_tmin_tmax(obj) keys_x = [ (net, sta, loc, cha), (net, sta, '', cha), ('', sta, '', cha)] keys = [] for k in keys_x: if k not in keys: keys.append(k) candidates = [] for k in keys: if k in self.responses: for x in self.responses[k]: if x.tmin < tmin and (x.tmax is None or tmax < x.tmax): if quantity == 'displacement': candidates.append(x.response) elif quantity == 'velocity': candidates.append(trace.MultiplyResponse([ x.response, trace.DifferentiationResponse()])) elif quantity == 'acceleration': candidates.append(trace.MultiplyResponse([ x.response, trace.DifferentiationResponse(2)])) else: assert False for sx in self.responses_stationxml: try: candidates.append( sx.get_pyrocko_response( (net, sta, loc, cha), timespan=(tmin, tmax), fake_input_units=quantity_to_unit[quantity])) except (fs.NoResponseInformation, fs.MultipleResponseInformation): pass if len(candidates) == 1: return candidates[0] elif len(candidates) == 0: raise NotFound('No response found:', (net, sta, loc, cha)) else: raise NotFound('Multiple responses found:', (net, sta, loc, cha)) def get_waveform_raw( self, obj, tmin, tmax, tpad=0., toffset_noise_extract=0., want_incomplete=False, extend_incomplete=False): net, sta, loc, cha = self.get_nslc(obj) if self.is_blacklisted((net, sta, loc, cha)): raise NotFound( 'Waveform is blacklisted:', (net, sta, loc, cha)) if not self.is_whitelisted((net, sta, loc, cha)): raise NotFound( 'Waveform is not on whitelist:', (net, sta, loc, cha)) if self.clip_handling == 'by_nsl': if self.has_clipping((net, sta, loc), tmin, tmax): raise NotFound( 'Waveform clipped:', (net, sta, loc)) elif self.clip_handling == 'by_nslc': if self.has_clipping((net, sta, loc, cha), tmin, tmax): raise NotFound( 'Waveform clipped:', (net, sta, loc, cha)) trs = self.pile.all( tmin=tmin+toffset_noise_extract, tmax=tmax+toffset_noise_extract, tpad=tpad, trace_selector=lambda tr: tr.nslc_id == (net, sta, loc, cha), want_incomplete=want_incomplete or extend_incomplete) if self.apply_displaced_sampling_workaround: for tr in trs: tr.snap() if toffset_noise_extract != 0.0: for tr in trs: tr.shift(-toffset_noise_extract) if extend_incomplete and len(trs) == 1: trs[0].extend( tmin + toffset_noise_extract - tpad, tmax + toffset_noise_extract + tpad, fillmethod='repeat') if not want_incomplete and len(trs) != 1: if len(trs) == 0: message = 'Waveform missing or incomplete.' else: message = 'Waveform has gaps.' raise NotFound( message, codes=(net, sta, loc, cha), time_range=( tmin + toffset_noise_extract - tpad, tmax + toffset_noise_extract + tpad)) return trs def get_waveform_restituted( self, obj, quantity='displacement', tmin=None, tmax=None, tpad=0., tfade=0., freqlimits=None, deltat=None, toffset_noise_extract=0., want_incomplete=False, extend_incomplete=False): if deltat is not None: tpad_downsample = deltat * 50. else: tpad_downsample = 0. trs_raw = self.get_waveform_raw( obj, tmin=tmin, tmax=tmax, tpad=tpad+tfade+tpad_downsample, toffset_noise_extract=toffset_noise_extract, want_incomplete=want_incomplete, extend_incomplete=extend_incomplete) trs_restituted = [] for tr in trs_raw: if deltat is not None: tr.downsample_to(deltat, snap=True, allow_upsample_max=5) tr.deltat = deltat resp = self.get_response(tr, quantity=quantity) try: trs_restituted.append( tr.transfer( tfade=tfade, freqlimits=freqlimits, transfer_function=resp, invert=True, demean=True)) except trace.InfiniteResponse: raise NotFound( 'Instrument response deconvolution failed ' '(divide by zero)', tr.nslc_id) return trs_restituted, trs_raw def _get_projections( self, station, backazimuth, source, target, tmin, tmax): # fill in missing channel information (happens when station file # does not contain any channel information) if not station.get_channels(): station = copy.deepcopy(station) nsl = station.nsl() trs = self.pile.all( tmin=tmin, tmax=tmax, trace_selector=lambda tr: tr.nslc_id[:3] == nsl, load_data=False) channels = list(set(tr.channel for tr in trs)) station.set_channels_by_name(*channels) projections = [] projections.extend(station.guess_projections_to_enu( out_channels=('E', 'N', 'Z'))) if source is not None and target is not None: backazimuth = source.azibazi_to(target)[1] if backazimuth is not None: projections.extend(station.guess_projections_to_rtu( out_channels=('R', 'T', 'Z'), backazimuth=backazimuth)) if not projections: raise NotFound( 'Cannot determine projection of data components:', station.nsl()) return projections def _get_waveform( self, obj, quantity='displacement', tmin=None, tmax=None, tpad=0., tfade=0., freqlimits=None, deltat=None, cache=None, backazimuth=None, source=None, target=None, debug=False): assert not debug or (debug and cache is None) if cache is True: cache = self._cache _, _, _, channel = self.get_nslc(obj) station = self.get_station(self.get_nsl(obj)) nslc = station.nsl() + (channel,) if self.is_blacklisted(nslc): raise NotFound( 'Waveform is blacklisted:', nslc) if not self.is_whitelisted(nslc): raise NotFound( 'Waveform is not on whitelist:', nslc) assert tmin is not None assert tmax is not None tmin = float(tmin) tmax = float(tmax) nslc = tuple(nslc) cache_k = nslc + ( tmin, tmax, tuple(freqlimits), tfade, deltat, tpad, quantity) if cache is not None and (nslc + cache_k) in cache: obj = cache[nslc + cache_k] if isinstance(obj, Exception): raise obj elif obj is None: raise NotFound('Waveform not found!', nslc) else: return obj syn_test = self.synthetic_test toffset_noise_extract = 0.0 if syn_test: if not syn_test.respect_data_availability: if syn_test.real_noise_scale != 0.0: raise DatasetError( 'respect_data_availability=False and ' 'addition of real noise cannot be combined.') tr = syn_test.get_waveform( nslc, tmin, tmax, tfade=tfade, freqlimits=freqlimits) if cache is not None: cache[tr.nslc_id + cache_k] = tr if debug: return [], [], [] else: return tr if syn_test.real_noise_scale != 0.0: toffset_noise_extract = syn_test.real_noise_toffset abs_delays = [] for ocha in 'ENZRT': sc = self.station_corrections.get(station.nsl() + (channel,), None) if sc: abs_delays.append(abs(sc.delay)) if abs_delays: abs_delay_max = max(abs_delays) else: abs_delay_max = 0.0 projections = self._get_projections( station, backazimuth, source, target, tmin, tmax) try: trs_projected = [] trs_restituted = [] trs_raw = [] exceptions = [] for matrix, in_channels, out_channels in projections: deps = trace.project_dependencies( matrix, in_channels, out_channels) try: trs_restituted_group = [] trs_raw_group = [] if channel in deps: for cha in deps[channel]: trs_restituted_this, trs_raw_this = \ self.get_waveform_restituted( station.nsl() + (cha,), quantity=quantity, tmin=tmin, tmax=tmax, tpad=tpad+abs_delay_max, toffset_noise_extract=toffset_noise_extract, # noqa tfade=tfade, freqlimits=freqlimits, deltat=deltat, want_incomplete=debug, extend_incomplete=self.extend_incomplete) trs_restituted_group.extend(trs_restituted_this) trs_raw_group.extend(trs_raw_this) trs_projected.extend( trace.project( trs_restituted_group, matrix, in_channels, out_channels)) trs_restituted.extend(trs_restituted_group) trs_raw.extend(trs_raw_group) except NotFound as e: exceptions.append((in_channels, out_channels, e)) if not trs_projected: err = [] for (in_channels, out_channels, e) in exceptions: sin = ', '.join(c.name for c in in_channels) sout = ', '.join(c.name for c in out_channels) err.append('(%s) -> (%s): %s' % (sin, sout, e)) raise NotFound('\n'.join(err)) for tr in trs_projected: sc = self.station_corrections.get(tr.nslc_id, None) if sc: if self.apply_correction_factors: tr.ydata /= sc.factor if self.apply_correction_delays: tr.shift(-sc.delay) if tmin is not None and tmax is not None: tr.chop(tmin, tmax) if syn_test: trs_projected_synthetic = [] for tr in trs_projected: if tr.channel == channel: tr_syn = syn_test.get_waveform( tr.nslc_id, tmin, tmax, tfade=tfade, freqlimits=freqlimits) if tr_syn: if syn_test.real_noise_scale != 0.0: tr_syn = tr_syn.copy() tr_noise = tr.copy() tr_noise.set_ydata( tr_noise.get_ydata() * syn_test.real_noise_scale) tr_syn.add(tr_noise) trs_projected_synthetic.append(tr_syn) trs_projected = trs_projected_synthetic if cache is not None: for tr in trs_projected: cache[tr.nslc_id + cache_k] = tr tr_return = None for tr in trs_projected: if tr.channel == channel: tr_return = tr if debug: return trs_projected, trs_restituted, trs_raw, tr_return elif tr_return: return tr_return else: raise NotFound( 'waveform not available', station.nsl() + (channel,)) except NotFound: if cache is not None: cache[nslc + cache_k] = None raise def get_waveform(self, obj, tinc_cache=None, **kwargs): tmin = kwargs['tmin'] tmax = kwargs['tmax'] if tinc_cache is not None: tmin_r = (math.floor(tmin / tinc_cache) - 1.0) * tinc_cache tmax_r = (math.ceil(tmax / tinc_cache) + 1.0) * tinc_cache else: tmin_r = tmin tmax_r = tmax kwargs['tmin'] = tmin_r kwargs['tmax'] = tmax_r if kwargs.get('debug', None): return self._get_waveform(obj, **kwargs) else: tr = self._get_waveform(obj, **kwargs) return tr.chop(tmin, tmax, inplace=False) def get_events(self, magmin=None, event_names=None): evs = [] for ev in self.events: if ((magmin is None or ev.magnitude >= magmin) and (event_names is None or ev.name in event_names)): evs.append(ev) return evs def get_event_by_time(self, t, magmin=None): evs = self.get_events(magmin=magmin) ev_x = None for ev in evs: if ev_x is None or abs(ev.time - t) < abs(ev_x.time - t): ev_x = ev if not ev_x: raise NotFound( 'No event information matching criteria (t=%s, magmin=%s).' % (t, magmin)) return ev_x def get_event(self): if self._event_name is None: raise NotFound('No main event selected in dataset!') for ev in self.events: if ev.name == self._event_name: return ev raise NotFound('No such event: %s' % self._event_name) def get_picks(self): if self._picks is None: hash_to_name = {} names = set() for marker in self.pick_markers: if isinstance(marker, pmarker.EventMarker): name = marker.get_event().name if name in names: raise DatasetError( 'Duplicate event name "%s" in picks.' % name) names.add(name) hash_to_name[marker.get_event_hash()] = name for ev in self.events: hash_to_name[ev.get_hash()] = ev.name picks = {} for marker in self.pick_markers: if isinstance(marker, pmarker.PhaseMarker): ehash = marker.get_event_hash() nsl = marker.one_nslc()[:3] phasename = marker.get_phasename() if ehash is None or ehash not in hash_to_name: raise DatasetError( 'Unassociated pick: %s.%s.%s, %s' % (nsl + (phasename, ))) eventname = hash_to_name[ehash] if (nsl, phasename, eventname) in picks: raise DatasetError( 'Duplicate pick: %s.%s.%s, %s' % (nsl + (phasename, ))) picks[nsl, phasename, eventname] = marker self._picks = picks return self._picks def get_pick(self, eventname, obj, phasename): nsl = self.get_nsl(obj) return self.get_picks().get((nsl, phasename, eventname), None)
[docs]class DatasetConfig(HasPaths): ''' Configuration for a Grond `Dataset` object. ''' stations_path = Path.T( optional=True, help='List of files with station coordinates in Pyrocko format.') stations_stationxml_paths = List.T( Path.T(), optional=True, help='List of files with station coordinates in StationXML format.') events_path = Path.T( optional=True, help='File with hypocenter information and possibly' ' reference solution') waveform_paths = List.T( Path.T(), optional=True, help='List of directories with raw waveform data') clippings_path = Path.T( optional=True) responses_sacpz_path = Path.T( optional=True, help='List of SACPZ response files for restitution of' ' the raw waveform data.') responses_stationxml_paths = List.T( Path.T(), optional=True, help='List of StationXML response files for restitution of' ' the raw waveform data.') station_corrections_path = Path.T( optional=True, help='File containing station correction informations.') apply_correction_factors = Bool.T( optional=True, default=True, help='Apply correction factors from station corrections.') apply_correction_delays = Bool.T( optional=True, default=True, help='Apply correction delays from station corrections.') apply_displaced_sampling_workaround = Bool.T( optional=True, default=False, help='Work around displaced sampling issues.') extend_incomplete = Bool.T( default=False, help='Extend incomplete seismic traces.') picks_paths = List.T( Path.T()) blacklist_paths = List.T( Path.T(), help='List of text files with blacklisted stations.') blacklist = List.T( String.T(), help='Stations/components to be excluded according to their STA, ' 'NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes.') whitelist_paths = List.T( Path.T(), help='List of text files with whitelisted stations.') whitelist = List.T( String.T(), optional=True, help='If not None, list of stations/components to include according ' 'to their STA, NET.STA, NET.STA.LOC, or NET.STA.LOC.CHA codes. ' 'Note: ''when whitelisting on channel level, both, the raw and ' 'the processed channel codes have to be listed.') synthetic_test = SyntheticTest.T( optional=True) kite_scene_paths = List.T( Path.T(), optional=True, help='List of directories for the InSAR scenes.') gnss_campaign_paths = List.T( Path.T(), optional=True, help='List of directories for the GNSS campaign data.') def __init__(self, *args, **kwargs): HasPaths.__init__(self, *args, **kwargs) self._ds = {} def get_event_names(self): logger.info('Loading events ...') def extra(path): return expand_template(path, dict( event_name='*')) def fp(path): return self.expand_path(path, extra=extra) def check_events(events, fn): for ev in events: if not ev.name: logger.warning('Event in %s has no name!', fn) return if not ev.lat or not ev.lon: logger.warning('Event %s has inconsistent coordinates!', ev.name) if not ev.depth: logger.warning('Event %s has no depth!', ev.name) if not ev.time: logger.warning('Event %s has no time!', ev.name) events = [] events_path = fp(self.events_path) fns = glob.glob(events_path) if not fns: raise DatasetError('No event files matching "%s".' % events_path) for fn in fns: logger.debug('Loading from file %s' % fn) ev = model.load_events(filename=fn) check_events(ev, fn) events.extend(ev) event_names = [ev_.name for ev_ in events] event_names.sort() return event_names def get_dataset(self, event_name): if event_name not in self._ds: def extra(path): return expand_template(path, dict( event_name=event_name)) def fp(path): p = self.expand_path(path, extra=extra) if p is None: return None if isinstance(p, list): for path in p: if not op.exists(path): logger.warning('Path %s does not exist.' % path) else: if not op.exists(p): logger.warning('Path %s does not exist.' % p) return p ds = Dataset(event_name) try: ds.add_events(filename=fp(self.events_path)) ds.add_stations( pyrocko_stations_filename=fp(self.stations_path), stationxml_filenames=fp(self.stations_stationxml_paths)) if self.waveform_paths: ds.add_waveforms(paths=fp(self.waveform_paths)) if self.kite_scene_paths: ds.add_kite_scenes(paths=fp(self.kite_scene_paths)) if self.gnss_campaign_paths: ds.add_gnss_campaigns(paths=fp(self.gnss_campaign_paths)) if self.clippings_path: ds.add_clippings(markers_filename=fp(self.clippings_path)) if self.responses_sacpz_path: ds.add_responses( sacpz_dirname=fp(self.responses_sacpz_path)) if self.responses_stationxml_paths: ds.add_responses( stationxml_filenames=fp( self.responses_stationxml_paths)) if self.station_corrections_path: ds.add_station_corrections( filename=fp(self.station_corrections_path)) ds.apply_correction_factors = self.apply_correction_factors ds.apply_correction_delays = self.apply_correction_delays ds.apply_displaced_sampling_workaround = \ self.apply_displaced_sampling_workaround ds.extend_incomplete = self.extend_incomplete for picks_path in self.picks_paths: ds.add_picks( filename=fp(picks_path)) ds.add_blacklist(self.blacklist) ds.add_blacklist(filenames=fp(self.blacklist_paths)) if self.whitelist: ds.add_whitelist(self.whitelist) if self.whitelist_paths: ds.add_whitelist(filenames=fp(self.whitelist_paths)) ds.set_synthetic_test(copy.deepcopy(self.synthetic_test)) self._ds[event_name] = ds except (FileLoadError, OSError) as e: raise DatasetError(str(e)) return self._ds[event_name]
__all__ = ''' Dataset DatasetConfig DatasetError InvalidObject NotFound StationCorrection load_station_corrections dump_station_corrections '''.split()