Skip to content

codeocean

CapsuleComputationAPI module-attribute

CapsuleComputationAPI: TypeAlias = dict[Literal['created', 'end_status', 'has_results', 'id', 'name', 'run_time', 'state'], Any]

Result from CodeOceanAPI when querying for computations for a capsule

DataAssetAPI module-attribute

DataAssetAPI: TypeAlias = dict[Literal['created', 'custom_metadata', 'description', 'files', 'id', 'last_used', 'name', 'size', 'sourceBucket', 'state', 'tags', 'type'], Any]

Result from CodeOcean API when querying data assets.

ResultItemAPI module-attribute

ResultItemAPI: TypeAlias = dict[Literal['name', 'path', 'size', 'type'], Any]

Result from CodeOceanAPI when querying for results from a computation

get_data_asset

get_data_asset(asset: str | uuid.UUID | DataAssetAPI) -> DataAssetAPI

Converts an asset uuid to dict of info from CodeOcean API.

Source code in npc_lims/metadata/codeocean.py
226
227
228
229
230
231
232
233
def get_data_asset(asset: str | uuid.UUID | DataAssetAPI) -> DataAssetAPI:
    """Converts an asset uuid to dict of info from CodeOcean API."""
    if not isinstance(asset, Mapping):
        response = get_codeocean_client().get_data_asset(str(asset))
        response.raise_for_status()
        asset = response.json()
    assert isinstance(asset, Mapping), f"Unexpected {type(asset) = }, {asset = }"
    return asset

get_path_from_data_asset

get_path_from_data_asset(asset: DataAssetAPI) -> upath.UPath

Reconstruct path to raw data in bucket (e.g. on s3) using data asset uuid or dict of info from Code Ocean API.

Source code in npc_lims/metadata/codeocean.py
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
def get_path_from_data_asset(asset: DataAssetAPI) -> upath.UPath:
    """Reconstruct path to raw data in bucket (e.g. on s3) using data asset
    uuid or dict of info from Code Ocean API."""
    if "sourceBucket" not in asset:
        raise ValueError(
            f"Asset {asset['id']} has no `sourceBucket` info - not sure how to create UPath:\n{asset!r}"
        )
    bucket_info = asset["sourceBucket"]
    roots = {"aws": "s3", "gcs": "gs"}
    if bucket_info["origin"] not in roots:
        raise RuntimeError(
            f"Unknown bucket origin - not sure how to create UPath: {bucket_info = }"
        )
    return upath.UPath(
        f"{roots[bucket_info['origin']]}://{bucket_info['bucket']}/{bucket_info['prefix']}"
    )

get_raw_data_root cached

get_raw_data_root(session: str | npc_session.SessionRecord) -> upath.UPath

Reconstruct path to raw data in bucket (e.g. on s3) using data-asset info from Code Ocean.

>>> get_raw_data_root('668759_20230711')
S3Path('s3://aind-ephys-data/ecephys_668759_2023-07-11_13-07-32')
Source code in npc_lims/metadata/codeocean.py
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
@functools.cache
def get_raw_data_root(session: str | npc_session.SessionRecord) -> upath.UPath:
    """Reconstruct path to raw data in bucket (e.g. on s3) using data-asset
    info from Code Ocean.

        >>> get_raw_data_root('668759_20230711')
        S3Path('s3://aind-ephys-data/ecephys_668759_2023-07-11_13-07-32')
    """
    session = npc_session.SessionRecord(session)
    raw_assets = tuple(
        asset for asset in get_session_data_assets(session) if is_raw_data_asset(asset)
    )
    raw_asset = get_single_data_asset(session, raw_assets, "raw")

    return get_path_from_data_asset(raw_asset)

get_session_computation_id_and_data_asset_name

get_session_computation_id_and_data_asset_name(session: npc_session.SessionRecord, model_name: str, capsule_computations: list[CapsuleComputationAPI]) -> tuple[str, str]

Returns the computation id and data asset name for the session that will be used to create the data asset Test below fails, since arjun ran capsule but github has different token

>>> session = npc_session.SessionRecord('626791_20220816')

>>> capsule_computations = get_codeocean_client().get_capsule_computations(MODEL_CAPSULE_MAPPING['dlc_eye'])

>>> capsule_computations.raise_for_status()

>>> get_session_computation_id_and_data_asset_name(session, 'eyetracking', capsule_computations.json())

('3010ff06-aae5-4b35-b070-57df9ef85582', 'ecephys_626791_2022-08-16_00-00-00_eyetracking')

Source code in npc_lims/metadata/codeocean.py
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
def get_session_computation_id_and_data_asset_name(
    session: npc_session.SessionRecord,
    model_name: str,
    capsule_computations: list[CapsuleComputationAPI],
) -> tuple[str, str]:
    """
    Returns the computation id and data asset name for the session that will be used to create the data asset
    Test below fails, since arjun ran capsule but github has different token
    #>>> session = npc_session.SessionRecord('626791_20220816')
    #>>> capsule_computations = get_codeocean_client().get_capsule_computations(MODEL_CAPSULE_MAPPING['dlc_eye'])
    #>>> capsule_computations.raise_for_status()
    #>>> get_session_computation_id_and_data_asset_name(session, 'eyetracking', capsule_computations.json())
    ('3010ff06-aae5-4b35-b070-57df9ef85582', 'ecephys_626791_2022-08-16_00-00-00_eyetracking')
    """
    for computation in capsule_computations:
        if not computation["has_results"]:
            continue

        response_result_items = get_codeocean_client().get_list_result_items(
            computation["id"]
        )
        response_result_items.raise_for_status()
        result_items = response_result_items.json()

        session_result_item = tuple(
            item
            for item in result_items["items"]
            if re.match(  # TODO add folder
                f"ecephys_{session.subject}_{session.date}_{npc_session.PARSE_TIME}.json",
                item["name"],
            )
        )

        if not session_result_item:
            continue

        session_item = session_result_item[0]
        session_comp_id_data_asset_name = (
            computation["id"],
            session_item["name"].replace(".json", f"_{model_name}"),
        )
        break

    return session_comp_id_data_asset_name

get_session_raw_data_asset

get_session_raw_data_asset(session: str | npc_session.SessionRecord) -> DataAssetAPI

Examples:

>>> get_session_raw_data_asset('668759_20230711')["id"]
'83636983-f80d-42d6-a075-09b60c6abd5e'
Source code in npc_lims/metadata/codeocean.py
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
def get_session_raw_data_asset(
    session: str | npc_session.SessionRecord,
) -> DataAssetAPI:
    """
    Examples:
        >>> get_session_raw_data_asset('668759_20230711')["id"]
        '83636983-f80d-42d6-a075-09b60c6abd5e'
    """
    session = npc_session.SessionRecord(session)
    raw_assets = tuple(
        asset for asset in get_session_data_assets(session) if is_raw_data_asset(asset)
    )

    if not raw_assets:
        raise ValueError(f"Session {session} has no raw data assets")

    return get_single_data_asset(session, raw_assets, "raw")

get_session_result_data_assets

get_session_result_data_assets(session: str | npc_session.SessionRecord) -> tuple[DataAssetAPI, ...]

Examples:

>>> result_data_assets = get_session_result_data_assets('668759_20230711')
>>> assert len(result_data_assets) > 0
Source code in npc_lims/metadata/codeocean.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def get_session_result_data_assets(
    session: str | npc_session.SessionRecord,
) -> tuple[DataAssetAPI, ...]:
    """
    Examples:
        >>> result_data_assets = get_session_result_data_assets('668759_20230711')
        >>> assert len(result_data_assets) > 0
    """
    session_data_assets = get_session_data_assets(session)
    result_data_assets = tuple(
        data_asset
        for data_asset in session_data_assets
        if data_asset["type"] == "result"
    )

    return result_data_assets

get_session_sorted_data_asset

get_session_sorted_data_asset(session: str | npc_session.SessionRecord) -> DataAssetAPI

Examples:

>>> sorted_data_asset = get_session_sorted_data_asset('668759_20230711')
>>> assert isinstance(sorted_data_asset, dict)
Source code in npc_lims/metadata/codeocean.py
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
def get_session_sorted_data_asset(
    session: str | npc_session.SessionRecord,
) -> DataAssetAPI:
    """
    Examples:
        >>> sorted_data_asset = get_session_sorted_data_asset('668759_20230711')
        >>> assert isinstance(sorted_data_asset, dict)
    """
    session_result_data_assets = get_session_data_assets(session)
    sorted_data_assets = tuple(
        data_asset
        for data_asset in session_result_data_assets
        if is_sorted_data_asset(data_asset) and data_asset["files"] > 2
    )

    if not sorted_data_assets:
        raise ValueError(f"Session {session} has no sorted data assets")

    return get_single_data_asset(session, sorted_data_assets, "sorted")

get_session_units_data_asset cached

get_session_units_data_asset(session_id: str | npc_session.SessionRecord) -> DataAssetAPI

Examples:

>>> units_data_asset = get_session_units_data_asset('668759_20230711')
>>> assert units_data_asset is not None
Source code in npc_lims/metadata/codeocean.py
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
@functools.cache
def get_session_units_data_asset(
    session_id: str | npc_session.SessionRecord,
) -> DataAssetAPI:
    """
    Examples:
        >>> units_data_asset = get_session_units_data_asset('668759_20230711')
        >>> assert units_data_asset is not None
    """
    session = npc_session.SessionRecord(session_id)
    session_data_assets = get_session_data_assets(session)
    session_units_data_assets = tuple(
        data_asset
        for data_asset in session_data_assets
        if "units" in data_asset["name"] and "peak" not in data_asset["name"]
    )
    session_units_data_asset = get_single_data_asset(
        session, session_units_data_assets, "units"
    )

    return session_units_data_asset

get_session_units_spikes_with_peak_channels_data_asset cached

get_session_units_spikes_with_peak_channels_data_asset(session_id: str | npc_session.SessionRecord) -> DataAssetAPI

Examples:

>>> units_peak_channel_data_asset = get_session_units_spikes_with_peak_channels_data_asset('668759_20230711')
>>> assert units_peak_channel_data_asset is not None
Source code in npc_lims/metadata/codeocean.py
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
@functools.cache
def get_session_units_spikes_with_peak_channels_data_asset(
    session_id: str | npc_session.SessionRecord,
) -> DataAssetAPI:
    """
    Examples:
        >>> units_peak_channel_data_asset = get_session_units_spikes_with_peak_channels_data_asset('668759_20230711')
        >>> assert units_peak_channel_data_asset is not None
    """
    session = npc_session.SessionRecord(session_id)
    session_data_assets = get_session_data_assets(session)
    session_units_spikes_peak_channel_data_assets = tuple(
        data_asset
        for data_asset in session_data_assets
        if "units_with_peak_channels" in data_asset["name"]
    )

    session_units_spikes_peak_channel_data_asset = get_single_data_asset(
        session, session_units_spikes_peak_channel_data_assets, "units"
    )

    return session_units_spikes_peak_channel_data_asset

get_sessions_with_data_assets cached

get_sessions_with_data_assets(subject: str | int) -> tuple[npc_session.SessionRecord, ...]

Examples:

>>> sessions = get_sessions_with_data_assets(668759)
>>> assert len(sessions) > 0
Source code in npc_lims/metadata/codeocean.py
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
@functools.cache
def get_sessions_with_data_assets(
    subject: str | int,
) -> tuple[npc_session.SessionRecord, ...]:
    """
    Examples:
        >>> sessions = get_sessions_with_data_assets(668759)
        >>> assert len(sessions) > 0
    """
    assets = get_subject_data_assets(subject)
    sessions = set()
    for asset in assets:
        try:
            session = npc_session.SessionRecord(asset["name"])
        except ValueError:
            continue
        sessions.add(session)
    return tuple(sessions)

get_subject_data_assets cached

get_subject_data_assets(subject: str | int) -> tuple[DataAssetAPI, ...]

All assets associated with a subject ID.

Examples:

>>> assets = get_subject_data_assets(668759)
>>> assert len(assets) > 0
Source code in npc_lims/metadata/codeocean.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
@functools.cache
def get_subject_data_assets(subject: str | int) -> tuple[DataAssetAPI, ...]:
    """
    All assets associated with a subject ID.

    Examples:
        >>> assets = get_subject_data_assets(668759)
        >>> assert len(assets) > 0
    """
    response = get_codeocean_client().search_all_data_assets(
        query=f"subject id: {npc_session.SubjectRecord(subject)}"
    )
    response.raise_for_status()
    return response.json()["results"]

get_surface_channel_root

get_surface_channel_root(session: str | npc_session.SessionRecord) -> upath.UPath

Reconstruct path to surface channel data in bucket (e.g. on s3) using data-asset info from Code Ocean.

Examples:

>>> get_surface_channel_root('660023_20230808')
S3Path('s3://aind-ephys-data/ecephys_660023_2023-08-08_15-11-14')
>>> assert get_surface_channel_root('660023_20230808') != get_raw_data_root('660023_20230808')
>>> get_surface_channel_root('649943_20230216')
Traceback (most recent call last):
...
FileNotFoundError: 649943_20230216 has no surface channel data assets
Source code in npc_lims/metadata/codeocean.py
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
def get_surface_channel_root(session: str | npc_session.SessionRecord) -> upath.UPath:
    """Reconstruct path to surface channel data in bucket (e.g. on s3) using data-asset
    info from Code Ocean.

    Examples:
        >>> get_surface_channel_root('660023_20230808')
        S3Path('s3://aind-ephys-data/ecephys_660023_2023-08-08_15-11-14')
        >>> assert get_surface_channel_root('660023_20230808') != get_raw_data_root('660023_20230808')
        >>> get_surface_channel_root('649943_20230216')
        Traceback (most recent call last):
        ...
        FileNotFoundError: 649943_20230216 has no surface channel data assets
    """
    session = npc_session.SessionRecord(session)
    raw_assets = tuple(
        asset for asset in get_session_data_assets(session) if is_raw_data_asset(asset)
    )
    try:
        raw_asset = get_single_data_asset(session.with_idx(1), raw_assets, "raw")
    except SessionIndexError:
        raise FileNotFoundError(
            f"{session} has no surface channel data assets"
        ) from None
    return get_path_from_data_asset(raw_asset)

is_raw_data_asset

is_raw_data_asset(asset: str | DataAssetAPI) -> bool

Examples:

>>> is_raw_data_asset('83636983-f80d-42d6-a075-09b60c6abd5e')
True
>>> is_raw_data_asset('173e2fdc-0ca3-4a4e-9886-b74207a91a9a')
False
Source code in npc_lims/metadata/codeocean.py
236
237
238
239
240
241
242
243
244
245
246
247
248
249
def is_raw_data_asset(asset: str | DataAssetAPI) -> bool:
    """
    Examples:
        >>> is_raw_data_asset('83636983-f80d-42d6-a075-09b60c6abd5e')
        True
        >>> is_raw_data_asset('173e2fdc-0ca3-4a4e-9886-b74207a91a9a')
        False
    """
    asset = get_data_asset(asset)
    if is_sorted_data_asset(asset):
        return False
    return asset.get("custom_metadata", {}).get(
        "data level"
    ) == "raw data" or "raw" in asset.get("tags", [])

is_sorted_data_asset

is_sorted_data_asset(asset: str | DataAssetAPI) -> bool

Examples:

>>> is_sorted_data_asset('173e2fdc-0ca3-4a4e-9886-b74207a91a9a')
True
>>> is_sorted_data_asset('83636983-f80d-42d6-a075-09b60c6abd5e')
False
Source code in npc_lims/metadata/codeocean.py
252
253
254
255
256
257
258
259
260
261
262
263
def is_sorted_data_asset(asset: str | DataAssetAPI) -> bool:
    """
    Examples:
        >>> is_sorted_data_asset('173e2fdc-0ca3-4a4e-9886-b74207a91a9a')
        True
        >>> is_sorted_data_asset('83636983-f80d-42d6-a075-09b60c6abd5e')
        False
    """
    asset = get_data_asset(asset)
    if "ecephys" not in asset["name"]:
        return False
    return "sorted" in asset["name"]