datasets.py 10.8 KB
Newer Older
Yan's avatar
Yan committed
1
2
3
4
5
6
7
8
9
#!/usr/bin/env python3
from rawprasslib import load_raw
from opentimspy.opentims import OpenTIMS
try:
    from rawautoparams import load_params
    autoparams = True
except ImportError:
    autoparams = False
import numpy as np
Yan's avatar
Yan committed
10
11
12
import logging
import os.path
import pathlib
Yan's avatar
Yan committed
13
14
15
import prasopes.config as cf
import prasopes.datatools as dt

Yan's avatar
Yan committed
16

Yan's avatar
Yan committed
17
logger = logging.getLogger('dsLogger')
Yan's avatar
Yan committed
18

Yan's avatar
Yan committed
19

Yan's avatar
Yan committed
20
def logdecor(fnc):
Yan's avatar
Yan committed
21
22
23
24
25
26
    def decfnc(*args, **kwargs):
        logger.info(fnc.__doc__ + "...")
        out = fnc(*args, **kwargs)
        logger.info("DONE: " + fnc.__doc__)
        return out
    return decfnc
Yan's avatar
Yan committed
27
28


Yan's avatar
Yan committed
29
30
31
32
33
class Dataset():
    def __init__(self, rawfile):
        self.filename = rawfile
        self.chromatograms = []
        self.dataset = []
Yan's avatar
Yan committed
34
35
        self.headers = None
        self.params = None
Yan's avatar
Yan committed
36
37
        self.mintime = -np.inf
        self.maxtime = np.inf
Yan's avatar
Yan committed
38

Yan's avatar
Yan committed
39
    def get_chromargs(self, mint=None, maxt=None):
Yan's avatar
Yan committed
40
        logger.info("finding correct scans")
Yan's avatar
Yan committed
41
42
        mint = mint or self.mintime
        maxt = maxt or self.maxtime
Yan's avatar
Yan committed
43
        times = dt.argsubselect(np.concatenate(
Yan's avatar
Yan committed
44
             [subset[0] for subset in self.chromatograms]), mint, maxt)
Yan's avatar
Yan committed
45
46
47
48
49
50
51
        args = []
        for subset in self.chromatograms:
            goodtimes = np.where((times < len(subset[0])) & ~(times < 0))[0]
            args.append(times[goodtimes])
            times -= len(subset[0])
        return args

Yan's avatar
Yan committed
52
53
54
55
56
57
58
59
    def refresh(self):
        """implement per-case"""
        return None

    def get_chromatograms(self):
        """implement per-case"""
        raise NotImplementedError

Yan's avatar
Yan committed
60
61
62
63
    def get_peakchrom(self, masstart, massend):
        """implement per-case"""
        raise NotImplementedError

Yan's avatar
Yan committed
64
65
66
    def get_spectra(self, mint=None, maxt=None):
        mint = mint or self.mintime
        maxt = maxt or self.maxtime
Yan's avatar
Yan committed
67
68
69
70
71
72
73
74
75
76
77
        """implement per-case"""
        raise NotImplementedError


class ThermoRawDataset(Dataset):
    def __init__(self, rawfile):
        super().__init__(rawfile)
        self.machtype = []
        self.refresh()

    def refresh(self):
Yan's avatar
Yan committed
78
        self.dataset = load_raw(self.filename,
Yan's avatar
Yan committed
79
80
                                cf.settings().value("tmp_location"))
        self.chromatograms = self.get_chromatograms()
Yan's avatar
Yan committed
81
        self.fullspectra = self.get_spectra()
Yan's avatar
Yan committed
82
        self.mintime, self.maxtime = [self.chromatograms[i][0][i]
Yan's avatar
Yan committed
83
                                      for i in (0, -1)]
Yan's avatar
Yan committed
84
85
86
87
88
        if autoparams:
            try:
                self.params, rawheaders, self.machtype = load_params(
                    self.filename, cf.settings().value("tmp_location"))
                segments = [len(i[0]) for i in self.chromatograms]
Yan's avatar
Yan committed
89
90
                indicies = [sum(segments[:i+1])
                            for i, j in enumerate(segments)]
Yan's avatar
Yan committed
91
                self.headers = np.split(rawheaders, indicies)[:-1]
Yan's avatar
Yan committed
92
            except Exception:
Yan's avatar
Yan committed
93
94
95
96
97
98
                self.params, self.machtype, self.headers = None, None, None

    def get_chromatograms(self):
        if cf.settings().value("view/oddeven", type=bool):
            chroms = []
            for i in self.dataset:
Yan's avatar
Yan committed
99
100
                for j in (0, 1):
                    chroms.append([i[0][ax, :][j::2] for ax in (0, 1)])
Yan's avatar
Yan committed
101
102
103
104
        else:
            chroms = [i[0] for i in self.dataset]
        return chroms

Yan's avatar
Yan committed
105
106
107
108
    def get_spectra(self, mint=None, maxt=None):
        mint = mint or self.mintime
        maxt = maxt or self.maxtime
        args = self.get_chromargs(mint, maxt)
Yan's avatar
Yan committed
109
        spectra = []
Yan's avatar
Yan committed
110
        for i, subset in enumerate(self.dataset):
Yan's avatar
Yan committed
111
            if cf.settings().value("view/oddeven", type=bool):
Yan's avatar
Yan committed
112
                for j in (0, 1):
Yan's avatar
Yan committed
113
114
                    if len(subset[2][args[i*2+j][j::2]]):
                        yvalz = np.mean(subset[2][args[i*2+j][j::2]], axis=0)
Yan's avatar
Yan committed
115
                        spectra.append([subset[1], yvalz])
Yan's avatar
Yan committed
116
                    else:
Yan's avatar
Yan committed
117
                        spectra.append([[], []])
Yan's avatar
Yan committed
118
119
            else:
                if len(subset[2][args[i]]):
Yan's avatar
Yan committed
120
121
                    yvalz = np.mean(subset[2][args[i]], axis=0)
                    spectra.append([subset[1], yvalz])
Yan's avatar
Yan committed
122
                else:
Yan's avatar
Yan committed
123
                    spectra.append([[], []])
Yan's avatar
Yan committed
124
125
        return spectra

Yan's avatar
Yan committed
126
    def get_peakchrom(self, startmass, endmass):
Yan's avatar
Yan committed
127
        intensity = np.concatenate([np.divide(np.sum(subset[2].T[
Yan's avatar
Yan committed
128
            dt.argsubselect(subset[1], startmass, endmass)].T, axis=1),
Yan's avatar
Yan committed
129
130
131
132
            np.clip(subset[0][1], np.finfo(np.float32).eps, None))
            for subset in self.dataset])
        return intensity

Yan's avatar
Yan committed
133
134
135
136
137
138

class BrukerTimsDataset(Dataset):
    def __init__(self, rawfile):
        super().__init__(rawfile)
        self.refresh()

Yan's avatar
Yan committed
139
    @logdecor
Yan's avatar
Yan committed
140
    def refresh(self):
Yan's avatar
Yan committed
141
        """refreshing timsTOF dataset"""
Yan's avatar
Yan committed
142
143
144
        if(os.path.isdir(self.filename)):
            self.dataset = OpenTIMS(pathlib.Path(self.filename))
        else:
Yan's avatar
Yan committed
145
146
            self.dataset = OpenTIMS(
                    pathlib.Path(os.path.dirname(self.filename)))
Yan's avatar
Yan committed
147
        self.chromatograms = self.get_chromatograms()
Yan's avatar
Yan committed
148
        self.fullspectra = self.get_spectra()
Yan's avatar
Yan committed
149
        self.mintime, self.maxtime = [self.chromatograms[i][0][i]
Yan's avatar
Yan committed
150
                                      for i in (0, -1)]
Yan's avatar
Yan committed
151

Yan's avatar
Yan committed
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
    def sampletimes(self, mint, maxt, timescap):
        frames = dt.argsubselect(self.dataset.retention_times,
                                 mint*60, maxt*60) + 1
        framessel = frames if timescap >= len(frames) else np.linspace(
                frames[0], frames[-1], timescap).astype('int')
        return framessel

    def binit(self, x, y, minstep, length):
        sortx = np.sort(x)
        stepsx = sortx[1:] - sortx[:-1]
        binspos = np.where(stepsx > minstep)[0]
        bins = sortx[:-1][binspos] + (stepsx[binspos]/2)
        binpos = np.digitize(x, bins)
        bindx = np.bincount(binpos, x) / np.bincount(binpos)
        bindy = np.bincount(binpos, y) / length
        return bindx, bindy

Yan's avatar
Yan committed
169
    @logdecor
Yan's avatar
Yan committed
170
    def get_chromatograms(self):
Yan's avatar
Yan committed
171
        """getting timsTOF chromatogram"""
Yan's avatar
Yan committed
172
        times = self.dataset.retention_times / 60
Yan's avatar
Yan committed
173
174
175
176
177
178
179
180
        if cf.settings().value("timstof/fastchrom", type=bool):
            # Thx to Michał Startek, slight deviation, but much faster
            intensities = (self.dataset.frames['SummedIntensities'] *
                           self.dataset.frames['AccumulationTime']) / 100.0
        else:
            # devNote - summing is fast, asarray is fast, iterating is slow.
            intensities = np.asarray([
                np.sum(i['intensity']) for i in self.dataset.query_iter(
181
                    np.arange(self.dataset.max_frame)+1, columns=('intensity',))])
Yan's avatar
Yan committed
182
        logger.info("DONE: getting timsTOF chromatogram")
Yan's avatar
Yan committed
183
        return [[times, intensities]]
Yan's avatar
Yan committed
184

Yan's avatar
Yan committed
185
    @logdecor
Yan's avatar
Yan committed
186
    def get_spectra(self, mint=None, maxt=None):
Yan's avatar
Yan committed
187
        """getting timsTOF spectra"""
Yan's avatar
Yan committed
188
189
        mint = mint or self.mintime
        maxt = maxt or self.maxtime
Yan's avatar
Yan committed
190
191
        framessel = self.sampletimes(mint, maxt, cf.settings().value(
                                         "timstof/ms_sampling", type=int))
Yan's avatar
Yan committed
192
        massints = self.dataset.query(framessel, columns=('mz', 'intensity'))
Yan's avatar
Yan committed
193
194
195
        masses, ints = self.binit(
            massints['mz'], massints['intensity'],
            cf.settings().value("timstof/ms_bins", type=float), len(framessel))
Yan's avatar
Yan committed
196
        return [[masses, ints]]
Yan's avatar
Yan committed
197

Yan's avatar
Yan committed
198
    @logdecor
Yan's avatar
Yan committed
199
    def get_mobspectra(self, mint=None, maxt=None):
Yan's avatar
Yan committed
200
        """getting timsTOF spectra"""
Yan's avatar
Yan committed
201
202
203
204
205
206
207
208
209
210
        mint = mint or self.mintime
        maxt = maxt or self.maxtime
        framessel = self.sampletimes(mint, maxt, cf.settings().value(
                                         "timstof/ms_sampling", type=int))
        massints = self.dataset.query(framessel, columns=('inv_ion_mobility', 'intensity'))
        masses, ints = self.binit(
            massints['inv_ion_mobility'], massints['intensity'],
            cf.settings().value("timstof/mob_bins", type=float), len(framessel))
        return [[masses, ints]]

Yan's avatar
Yan committed
211
    @logdecor
Yan's avatar
Yan committed
212
    def get_peakchrom(self, startm, endm):
Yan's avatar
Yan committed
213
        """getting peak ion chromatogram"""
Yan's avatar
Yan committed
214
215
216
        intensity = np.divide([
            np.sum(i['intensity'][dt.argsubselect(i['mz'], startm, endm)])
            for i in self.dataset.query_iter(
Yan's avatar
Yan committed
217
                np.arange(self.dataset.max_frame)+1, columns=('intensity', 'mz'))],
Yan's avatar
Yan committed
218
219
            np.clip(self.chromatograms[0][1], np.finfo(np.float32).eps, None))
        return intensity
Yan's avatar
Yan committed
220

Yan's avatar
Yan committed
221
    @logdecor
Yan's avatar
Yan committed
222
    def get_mzmobpeakchrom(self, startmz, endmz, startmob, endmob):
Yan's avatar
Yan committed
223
        """getting mz/mob peak ion chromatogram..."""
Yan's avatar
Yan committed
224
225
226
227
228
229
230
231
232
233
        intensity = np.divide([
            np.sum(i['intensity'][dt.argsubselect_2d(
                i['mz'], startmz, endmz,
                i['inv_ion_mobility'], startmob, endmob)])
            for i in self.dataset.query_iter(
                np.arange(self.dataset.max_frame)+1, columns=(
                    'intensity', 'mz', 'inv_ion_mobility'))],
            np.clip(self.chromatograms[0][1], np.finfo(np.float32).eps, None))
        return intensity

Yan's avatar
Yan committed
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
    @logdecor
    def get_mzmobpeakchroms(self, intranges):
        """getting mz/mob peak ion chromatograms..."""
        intensities = [[]] * len(intranges)
        for i in self.dataset.query_iter(
            np.arange(self.dataset.max_frame)+1,
            columns=('intensity', 'mz', 'inv_ion_mobility')):
            for j,k in enumerate(intranges):
                intensities[j].append(np.sum(i['intensity'][dt.argsubselect_2d(
                    i['mz'], k[0], k[1], i['inv_ion_mobility'], k[2], k[3])]))
        for i,j in enumerate(intensities):
            if j:
                intensities[i] = np.divide(intensities[i],
                    np.clip(self.chromatograms[0][1],
                            np.finfo(np.float32).eps, None))
        return intensities

251
252
    def get_subspace(self, xax, subselect, start, end, bins,
                     mint=None, maxt=None):
Yan's avatar
Yan committed
253
254
        mint = mint or self.mintime
        maxt = maxt or self.maxtime
Yan's avatar
Yan committed
255
256
        msg = ("getting timsTOF for {:.3f}-{:.3f} minutes, subselecting"
                     " {}: {}-{}").format(mint, maxt, subselect, start, end)
Yan's avatar
Yan committed
257
        logger.info(msg+"...")
Yan's avatar
Yan committed
258
259
        framessel = self.sampletimes(mint, maxt, cf.settings().value(
                                         "timstof/mob_sampling", type=int))
Yan's avatar
Yan committed
260
        massintstof = self.dataset.query(
261
262
                framessel, columns=(subselect, 'intensity', xax))
        goodargs = dt.argsubselect(massintstof[subselect], start, end)
Yan's avatar
Yan committed
263
        mobs, ints = self.binit(
264
265
            massintstof[xax][goodargs], massintstof['intensity'][goodargs],
            cf.settings().value("timstof/{}".format(bins), type=float),
Yan's avatar
Yan committed
266
            len(framessel))
Yan's avatar
Yan committed
267
        logger.info("DONE: "+msg)
Yan's avatar
Yan committed
268
        return mobs, ints
269
270
271
272
273
274
275
276
277
278

    def get_mobilogram(self, startm, endm, mint=None, maxt=None):
        x, y = self. get_subspace('inv_ion_mobility', 'mz',
                                  startm, endm, 'mob_bins', mint, maxt)
        return x, y

    def get_ms_onmob(self, startm, endm, mint=None, maxt=None):
        x, y = self.get_subspace('mz', 'inv_ion_mobility',
                                 startm, endm, 'ms_bins', mint, maxt)
        return x, y