24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213 | class Station(Base):
"""
Represents a Station.
This ORM class represents a station along with its metadata.
According to the EDAM conceptual model, a station can be stationary or
moving.
Attributes:
name: The name of the station
mobile: Boolean value to define whether it's a stationary or moving
station
location: string attribute which describes the location where the
station is situated
latitude: float attribute which represent the station's latitude
longitude: float attribute which represent the station's longitude
region: string attribute which describes the region where the
station is situated.
license: string attribute which represents the license of the data
that the station holds
url: string attribute of the station's url (if applicable)
tags: dict attribute which represents any other tags which are not
explicitly defined
qualifiers: dict attribute which represents any qualifiers for the
station's data. A standard qualifier is `missing_data`. For the
rest, the `key` of this attribute is the qualifier (e.g. `*`) and
the `value` the qualifier (e.g. `estimated`)
"""
__tablename__ = "Station"
id = Column(Integer, primary_key=True)
name = Column(String(80))
mobile = Column(Boolean)
location = Column(String(200))
latitude = Column(Float)
longitude = Column(Float)
region = Column(String(200))
license = Column(String(100))
url = Column(String(100))
_tags = Column('tags', String(500))
_qualifiers = Column('qualifiers', String(500))
junctions = relationship("Junction", back_populates="station")
def update(self, new_values: dict):
update_existing(self, new_values, logger)
def __init__(self,
name: str = None, mobile: bool = False,
location: str = None, latitude: float = None,
longitude: float = None, region: str = None,
license: str = None, url: str = None,
qualifiers: dict = None, tags: dict = None):
if tags is None:
tags = dict()
self.name = name
self.mobile = mobile
self.location = location
self.latitude = latitude
self.longitude = longitude
self.region = region
self.license = license
self.url = url
self.qualifiers = qualifiers
self.tags = tags
@hybrid_property
def tags(self):
return json.loads(self._tags)
@tags.setter
def tags(self, value):
self._tags = json.dumps(value)
@hybrid_property
def qualifiers(self):
return json.loads(self._qualifiers)
@qualifiers.setter
def qualifiers(self, value):
if value is None:
self._qualifiers = json.dumps({})
else:
self._qualifiers = json.dumps(value)
@property
def missing_data(self):
try:
return self.qualifiers['missing_data']
except KeyError:
return ''
except TypeError:
return ''
@property
def observable_ids(self) -> [str]:
return [junction.observable.observable_id for junction in
self.junctions]
@property
def junctions_mapping(self) -> dict:
return {junction.id: junction.observable.observable_id for junction in
self.junctions}
@property
def data(self) -> pd.DataFrame:
dataframes = []
for junction in self.junctions_mapping.keys():
sql = session.query(Observation).filter(
Observation.junction_id == junction)
sql_literal = str(sql.statement.compile(dialect=sqlite.dialect(),
compile_kwargs={
"literal_binds": True}))
df = pd.read_sql_query(sql_literal, engine)
df.rename(columns={"value": self.junctions_mapping[junction]},
inplace=True)
if dataframes:
df.drop(['id', 'junction_id', 'timestamp'], axis=1,
inplace=True)
else:
df.drop(['id', 'junction_id'], axis=1, inplace=True)
dataframes.append(df)
if dataframes:
dataframe = pd.concat(dataframes, join='inner', axis=1).fillna(
"empty") # type: pd.DataFrame
dataframe['my_index'] = pd.to_datetime(dataframe['timestamp'])
dataframe.set_index('my_index', drop=True, inplace=True)
return dataframe
logger.warning(f"{self.name} does not have any data associated")
return None
def data_iter(self, template: "Template"):
data = self.data
variables = template.observable_ids
if template.resampled:
data = resample(self, template.resampled['rule'],
template.resampled['how'])
for row in data.itertuples():
if variables is not None:
yield self._prepare_dataframe_generator(row, variables)
else:
yield self._prepare_dataframe_generator(row, list(data))
def resampled(self, rule=None, how=None):
return self._res
@property
def res(self):
return self._res
@res.setter
def res(self, value):
self._res = value
def _prepare_dataframe_generator(self, row: tuple, variables) -> []:
gen = []
for variable in variables:
value = row.__getattribute__(variable)
if variable == "timestamp":
gen.append(datetime.strptime(value.split('.')[0],
"%Y-%m-%d %H:%M:%S"))
else:
if value == "nan":
try:
gen.append(Measurement(self.qualifiers['missing_data']))
except Exception:
gen.append(Measurement(value))
else:
gen.append(Measurement(value))
return gen
def as_dict(self):
return as_dict(self)
def __eq__(self, other):
"""Override the default Equals behavior"""
if isinstance(other, self.__class__):
return self.__dict__ == other.__dict__
return NotImplemented
def __ne__(self, other):
"""Define a non-equality test"""
if isinstance(other, self.__class__):
return not self.__eq__(other)
return NotImplemented
def __repr__(self):
return f'<{self.__class__.__name__} {self.name!r} with id {self.id!r}>'
|