Source code for pyrocko.scenario.targets.waveform

# http://pyrocko.org - GPLv3
#
# The Pyrocko Developers, 21st Century
# ---|P------/S----------~Lg----------

'''
Synthetic seismic waveform generator.
'''

import hashlib
import math
import logging
import numpy as num

from os import path as op
from functools import reduce

from pyrocko.guts import Float, List, Bool, Dict, String
from pyrocko.gui.snuffler.marker import PhaseMarker, EventMarker
from pyrocko import gf, model, util, trace, io
from pyrocko.io.io_common import FileSaveError
from pyrocko import pile

from ..station import StationGenerator, RandomStationGenerator
from .base import TargetGenerator, NoiseGenerator
from ..error import ScenarioError


DEFAULT_STORE_ID = 'global_2s'

logger = logging.getLogger('pyrocko.scenario.targets.waveform')
guts_prefix = 'pf.scenario'


[docs]class WaveformNoiseGenerator(NoiseGenerator):

    def get_time_increment(self, deltat):
        return deltat * 1024

    def get_intersecting_snippets(self, deltat, codes, tmin, tmax):
        raise NotImplementedError()

    def add_noise(self, tr):
        for ntr in self.get_intersecting_snippets(
                tr.deltat, tr.nslc_id, tr.tmin, tr.tmax):
            tr.add(ntr)


[docs]class WhiteNoiseGenerator(WaveformNoiseGenerator):

    scale = Float.T(default=1e-6)

    def get_seed_offset2(self, deltat, iw, codes):
        m = hashlib.sha1(('%e %i %s.%s.%s.%s' % ((deltat, iw) + codes))
                         .encode('utf8'))
        return int(m.hexdigest(), base=16) % 10000000

    def get_intersecting_snippets(self, deltat, codes, tmin, tmax):
        tinc = self.get_time_increment(deltat)
        iwmin = int(math.floor(tmin / tinc))
        iwmax = int(math.floor(tmax / tinc))

        trs = []
        for iw in range(iwmin, iwmax+1):
            seed_offset = self.get_seed_offset2(deltat, iw, codes)
            rstate = self.get_rstate(seed_offset)

            n = int(round(tinc // deltat))

            trs.append(trace.Trace(
                codes[0], codes[1], codes[2], codes[3],
                deltat=deltat,
                tmin=iw*tinc,
                ydata=rstate.normal(loc=0, scale=self.scale, size=n)))

        return trs


[docs]class WaveformGenerator(TargetGenerator):

    station_generators = List.T(
        StationGenerator.T(),
        default=[RandomStationGenerator.D()],
        help='List of StationGenerators.')

    noise_generator = WaveformNoiseGenerator.T(
        default=WhiteNoiseGenerator.D(),
        help='Add Synthetic noise on the waveforms.')

    store_id = gf.StringID.T(
        default=DEFAULT_STORE_ID,
        help='The GF store to use for forward-calculations.')

    seismogram_quantity = gf.QuantityType.T(
        default='displacement')

    nsl_to_store_id = Dict.T(
        String.T(), gf.StringID.T(),
        help='Selectively use different GF stores for different stations.')

    vmin_cut = Float.T(
        default=2000.,
        help='Minimum velocity to seismic velocity to consider in the model.')
    vmax_cut = Float.T(
        default=8000.,
        help='Maximum velocity to seismic velocity to consider in the model.')

    fmin = Float.T(
        default=0.01,
        help='Minimum frequency/wavelength to resolve in the'
             ' synthetic waveforms.')

    tabulated_phases = List.T(
        gf.meta.TPDef.T(), optional=True,
        help='Define seismic phases to be calculated.')

    tabulated_phases_from_store = Bool.T(
        default=False,
        help='Calculate seismic phase arrivals for all travel-time tables '
             'defined in GF store.')

    tabulated_phases_noise_scale = Float.T(
        default=0.0,
        help='Standard deviation of normally distributed noise added to '
             'calculated phase arrivals.')

    taper = trace.Taper.T(
        optional=True,
        help='Time domain taper applied to synthetic waveforms.')

    compensate_synthetic_offsets = Bool.T(
        default=False,
        help='Center synthetic trace amplitudes using mean of waveform tips.')

    tinc = Float.T(
        optional=True,
        help='Time increment of waveforms.')

    continuous = Bool.T(
        default=True,
        help='Only produce traces that intersect with events.')

    def __init__(self, *args, **kwargs):
        super(WaveformGenerator, self).__init__(*args, **kwargs)
        self._targets = []
        self._piles = {}

    def _get_pile(self, path):
        apath = op.abspath(path)
        assert op.isdir(apath)

        if apath not in self._piles:
            fns = util.select_files(
                [apath], show_progress=False)

            p = pile.Pile()
            if fns:
                p.load_files(fns, fileformat='mseed', show_progress=False)

            self._piles[apath] = p

        return self._piles[apath]

    def get_stations(self):
        stations = []
        for station_generator in self.station_generators:
            stations.extend(station_generator.get_stations())
        return stations

    def get_distance_range(self, sources):
        distances = num.array(
            [sg.get_distance_range(sources)
             for sg in self.station_generators])
        return (distances[:, 0].min(), distances[:, 1].max())

    def get_targets(self):
        if self._targets:
            return self._targets

        for station in self.get_stations():
            channel_data = []
            channels = station.get_channels()
            if channels:
                for channel in channels:
                    channel_data.append([
                        channel.name,
                        channel.azimuth,
                        channel.dip])

            else:
                for c_name in ['BHZ', 'BHE', 'BHN']:
                    channel_data.append([
                        c_name,
                        model.guess_azimuth_from_name(c_name),
                        model.guess_dip_from_name(c_name)])

            nsl = (station.network, station.station, station.location)
            for c_name, c_azi, c_dip in channel_data:
                target = gf.Target(
                    codes=nsl + (c_name,),
                    quantity=self.seismogram_quantity,
                    lat=station.lat,
                    lon=station.lon,
                    north_shift=station.north_shift,
                    east_shift=station.east_shift,
                    depth=station.depth,
                    store_id=self.nsl_to_store_id.get(
                        '.'.join(nsl), self.store_id),
                    optimization='enable',
                    interpolation='nearest_neighbor',
                    azimuth=c_azi,
                    dip=c_dip)

                self._targets.append(target)

        return self._targets

    def get_time_range(self, sources):
        dmin, dmax = self.get_distance_range(sources)

        times = num.array([source.time for source in sources],
                          dtype=util.get_time_dtype())

        tmin_events = num.min(times)
        tmax_events = num.max(times)

        tmin = tmin_events + dmin / self.vmax_cut - 10.0 / self.fmin
        tmax = tmax_events + dmax / self.vmin_cut + 10.0 / self.fmin

        return tmin, tmax

    def get_codes_to_deltat(self, engine, sources):
        deltats = {}

        targets = self.get_targets()
        for source in sources:
            for target in targets:
                deltats[target.codes] = engine.get_store(
                    target.store_id).config.deltat

        return deltats

    def get_useful_time_increment(self, engine, sources):
        _, dmax = self.get_distance_range(sources)
        tinc = dmax / self.vmin_cut + 2.0 / self.fmin

        deltats = set(self.get_codes_to_deltat(engine, sources).values())
        deltat = reduce(util.lcm, deltats)
        tinc = int(round(tinc / deltat)) * deltat
        return tinc

    def get_relevant_sources(self, sources, tmin, tmax):
        dmin, dmax = self.get_distance_range(sources)
        trange = tmax - tmin
        tmax_pad = trange + tmax + dmin / self.vmax_cut
        tmin_pad = tmin - (dmax / self.vmin_cut + trange)

        return [s for s in sources if s.time < tmax_pad and s.time > tmin_pad]

    def get_waveforms(self, engine, sources, tmin, tmax):

        sources_relevant = self.get_relevant_sources(sources, tmin, tmax)
        if not (self.continuous or sources_relevant):
            return []

        trs = {}
        tts = util.time_to_str

        for nslc, deltat in self.get_codes_to_deltat(engine, sources).items():
            tr_tmin = round(tmin / deltat) * deltat
            tr_tmax = (round(tmax / deltat)-1) * deltat
            nsamples = int(round((tr_tmax - tr_tmin) / deltat)) + 1

            tr = trace.Trace(
                *nslc,
                tmin=tr_tmin,
                ydata=num.zeros(nsamples),
                deltat=deltat)

            self.noise_generator.add_noise(tr)

            trs[nslc] = tr

        logger.debug('Forward modelling waveforms between %s - %s...'
                     % (tts(tmin, format='%Y-%m-%d_%H-%M-%S'),
                        tts(tmax, format='%Y-%m-%d_%H-%M-%S')))

        if not sources_relevant:
            return list(trs.values())

        targets = self.get_targets()
        response = engine.process(sources_relevant, targets)
        for source, target, res in response.iter_results(
                get='results'):

            if isinstance(res, gf.SeismosizerError):
                logger.warning(
                    'Out of bounds! \nTarget: %s\nSource: %s\n' % (
                        '.'.join(target.codes)), source)
                continue

            tr = res.trace.pyrocko_trace()

            candidate = trs[target.codes]
            if not candidate.overlaps(tr.tmin, tr.tmax):
                continue

            if self.compensate_synthetic_offsets:
                tr.ydata -= (num.mean(tr.ydata[-3:-1]) +
                             num.mean(tr.ydata[1:3])) / 2.

            if self.taper:
                tr.taper(self.taper)

            candidate.add(tr)
            trs[target.codes] = candidate

        return list(trs.values())

    def get_onsets(self, engine, sources, *args, **kwargs):

        targets = {t.codes[:3]: t for t in self.get_targets()}

        markers = []
        for source in sources:
            ev = source.pyrocko_event()
            markers.append(EventMarker(ev))
            for nsl, target in targets.items():
                store = engine.get_store(target.store_id)
                if self.tabulated_phases:
                    tabulated_phases = self.tabulated_phases

                elif self.tabulated_phases_from_store:
                    tabulated_phases = store.config.tabulated_phases
                else:
                    tabulated_phases = []

                for phase in tabulated_phases:
                    t = store.t(phase.id, source, target)
                    if not t:
                        continue

                    noise_scale = self.tabulated_phases_noise_scale
                    if noise_scale != 0.0:
                        t += num.random.normal(scale=noise_scale)

                    t += source.time
                    markers.append(
                        PhaseMarker(
                            phasename=phase.id,
                            tmin=t,
                            tmax=t,
                            event=source.pyrocko_event(),
                            nslc_ids=(nsl+('*',),)
                            )
                        )
        return markers

    def ensure_data(self, engine, sources, path, tmin=None, tmax=None):
        self.ensure_waveforms(engine, sources, path, tmin, tmax)
        self.ensure_responses(path)

    def ensure_waveforms(self, engine, sources, path, tmin=None, tmax=None):

        path_waveforms = op.join(path, 'waveforms')
        util.ensuredir(path_waveforms)

        p = self._get_pile(path_waveforms)

        nslc_ids = set(target.codes for target in self.get_targets())

        def have_waveforms(tmin, tmax):
            trs_have = p.all(
                tmin=tmin, tmax=tmax,
                load_data=False, degap=False,
                trace_selector=lambda tr: tr.nslc_id in nslc_ids)

            return any(tr.data_len() > 0 for tr in trs_have)

        def add_files(paths):
            p.load_files(paths, fileformat='mseed', show_progress=False)

        path_traces = op.join(
            path_waveforms,
            '%(wmin_year)s',
            '%(wmin_month)s',
            '%(wmin_day)s',
            'waveform_%(network)s_%(station)s_' +
            '%(location)s_%(channel)s_%(tmin)s_%(tmax)s.mseed')

        tmin_all, tmax_all = self.get_time_range(sources)
        tmin = tmin if tmin is not None else tmin_all
        tmax = tmax if tmax is not None else tmax_all
        tts = util.time_to_str

        tinc = self.tinc or self.get_useful_time_increment(engine, sources)
        tmin = num.floor(tmin / tinc) * tinc
        tmax = num.ceil(tmax / tinc) * tinc

        nwin = int(round((tmax - tmin) / tinc))

        pbar = None
        try:
            for iwin in range(nwin):
                tmin_win = tmin + iwin*tinc
                tmax_win = tmin + (iwin+1)*tinc

                if have_waveforms(tmin_win, tmax_win):
                    continue

                if pbar is None:
                    pbar = util.progressbar(
                        'Generating waveforms', (nwin-iwin))

                pbar.update(iwin)

                trs = self.get_waveforms(engine, sources, tmin_win, tmax_win)

                try:
                    wpaths = io.save(
                        trs, path_traces,
                        additional=dict(
                            wmin_year=tts(tmin_win, format='%Y'),
                            wmin_month=tts(tmin_win, format='%m'),
                            wmin_day=tts(tmin_win, format='%d'),
                            wmin=tts(tmin_win, format='%Y-%m-%d_%H-%M-%S'),
                            wmax_year=tts(tmax_win, format='%Y'),
                            wmax_month=tts(tmax_win, format='%m'),
                            wmax_day=tts(tmax_win, format='%d'),
                            wmax=tts(tmax_win, format='%Y-%m-%d_%H-%M-%S')))

                    for wpath in wpaths:
                        logger.debug('Generated file: %s' % wpath)

                    add_files(wpaths)

                except FileSaveError as e:
                    raise ScenarioError(str(e))

        finally:
            if pbar is not None:
                pbar.finish()

    def ensure_responses(self, path):
        from pyrocko.io import stationxml

        path_responses = op.join(path, 'meta')
        util.ensuredir(path_responses)

        fn_stationxml = op.join(path_responses, 'waveform_response.xml')
        i = 0
        while op.exists(fn_stationxml):
            fn_stationxml = op.join(
                path_responses, 'waveform_response-%s.xml' % i)
            i += 1

        logger.debug('Writing waveform meta information to StationXML...')

        stations = self.get_stations()
        sxml = stationxml.FDSNStationXML.from_pyrocko_stations(stations)

        sunit = stationxml.quantity_to_units[self.seismogram_quantity]

        response = stationxml.Response(
            instrument_sensitivity=stationxml.Sensitivity(
                value=1.,
                frequency=1.,
                input_units=stationxml.Units(sunit),
                output_units=stationxml.Units('COUNT')),
            stage_list=[])

        for net, station, channel in sxml.iter_network_station_channels():
            channel.response = response

        sxml.dump_xml(filename=fn_stationxml)

    def add_map_artists(self, engine, sources, automap):
        automap.add_stations(self.get_stations())
Navigation

Source code for pyrocko.scenario.targets.waveform

Navigation