Add, replace, cache and delete artifacts

import pytest
import lamindb as ln

ln.setup.login("testuser1")
ln.setup.init(storage="s3://lamindb-ci/test-add-replace-stage")
✅ logged in with email testuser1@lamin.ai (uid: DzTjkKse)
❗ updating local SQLite & locking cloud SQLite (sync back & unlock: lamin close)
💡 the lock has not been initialized, trying to obtain the lock.

Save with auto-managed (key=None)

AUTO_KEY_PREFIX = ln.core.storage.paths.AUTO_KEY_PREFIX
root = ln.settings.storage
artifact = ln.Artifact("./test-files/iris.csv", description="iris.csv")
❗ no run & transform get linked, consider calling ln.track()
artifact.save()
Artifact(uid='8GMTzYFhoIvVkHg95qRT', description='iris.csv', suffix='.csv', size=224, hash='iwc1TmF1TW_l5weDvscSHw', hash_type='md5', visibility=1, key_is_virtual=True, created_by_id=1, storage_id=1, updated_at='2024-06-13 12:18:39 UTC')
key_path = root / f"{AUTO_KEY_PREFIX}{artifact.uid}{artifact.suffix}"
assert key_path.exists()
cache_csv_path = artifact.cache()
cache_csv_path
PosixUPath('/home/runner/.cache/lamindb/lamindb-ci/test-add-replace-stage/.lamindb/8GMTzYFhoIvVkHg95qRT.csv')
assert cache_csv_path.suffix == ".csv"
artifact.replace("./test-files/iris.data")
❗ no run & transform get linked, consider calling ln.track()
artifact.save()
Artifact(uid='8GMTzYFhoIvVkHg95qRT', description='iris.csv', suffix='.data', size=182, hash='42Br6no9CjB6s5ZbmO-bmw', hash_type='md5', visibility=1, key_is_virtual=True, created_by_id=1, storage_id=1, updated_at='2024-06-13 12:18:39 UTC')
old_key_path = key_path
new_key_path = root / f"{AUTO_KEY_PREFIX}{artifact.uid}{artifact.suffix}"

The suffix changed:

old_key_path
S3Path('s3://lamindb-ci/test-add-replace-stage/.lamindb/8GMTzYFhoIvVkHg95qRT.csv')
new_key_path
S3Path('s3://lamindb-ci/test-add-replace-stage/.lamindb/8GMTzYFhoIvVkHg95qRT.data')
assert not old_key_path.exists()
assert new_key_path.exists()
cache_data_path = artifact.cache()
cache_data_path
PosixUPath('/home/runner/.cache/lamindb/lamindb-ci/test-add-replace-stage/.lamindb/8GMTzYFhoIvVkHg95qRT.data')
assert cache_data_path.suffix == ".data"
assert cache_data_path.stat().st_mtime >= cache_csv_path.stat().st_mtime
artifact.delete(permanent=True)

Save with manually passed real key

ln.settings.artifact_use_virtual_keys = False
artifact = ln.Artifact("./test-files/iris.csv", key="iris.csv")
❗ no run & transform get linked, consider calling ln.track()
artifact.save()
Artifact(uid='jq9KDe5ikvCsTxuaeCnr', key='iris.csv', suffix='.csv', size=224, hash='iwc1TmF1TW_l5weDvscSHw', hash_type='md5', visibility=1, key_is_virtual=False, created_by_id=1, storage_id=1, updated_at='2024-06-13 12:18:41 UTC')
key_path = root / "iris.csv"
assert key_path.exists()
artifact.replace("./test-files/new_iris.csv")
❗ no run & transform get linked, consider calling ln.track()
artifact.save()
Artifact(uid='jq9KDe5ikvCsTxuaeCnr', key='iris.csv', suffix='.csv', size=229, hash='lp2-ycXcKcaliUTnR_TqHA', hash_type='md5', visibility=1, key_is_virtual=False, created_by_id=1, storage_id=1, updated_at='2024-06-13 12:18:41 UTC')

Check paths: no changes here, as the suffix didn’t change.

old_key_path = key_path
new_key_path = root / "new_iris.csv"
old_key_path
S3Path('s3://lamindb-ci/test-add-replace-stage/iris.csv')
new_key_path
S3Path('s3://lamindb-ci/test-add-replace-stage/new_iris.csv')
assert old_key_path.exists()
assert not new_key_path.exists()
artifact.replace("./test-files/iris.data")
❗ no run & transform get linked, consider calling ln.track()
❗ replacing the file will replace key 'iris.csv' with 'iris.data' and delete 'iris.csv' upon `save()`
artifact.save()
Artifact(uid='jq9KDe5ikvCsTxuaeCnr', key='iris.data', suffix='.data', size=182, hash='42Br6no9CjB6s5ZbmO-bmw', hash_type='md5', visibility=1, key_is_virtual=False, created_by_id=1, storage_id=1, updated_at='2024-06-13 12:18:42 UTC')
new_key_path = root / "iris.data"
old_key_path
S3Path('s3://lamindb-ci/test-add-replace-stage/iris.csv')
new_key_path
S3Path('s3://lamindb-ci/test-add-replace-stage/iris.data')
assert not old_key_path.exists()
assert new_key_path.exists()
artifact.delete(permanent=True, storage=True)

Save from memory

import pandas as pd
iris = pd.read_csv("./test-files/iris.csv")
artifact = ln.Artifact.from_df(iris, description="iris_store", key="iris.parquet")
❗ no run & transform get linked, consider calling ln.track()
artifact.save()
Artifact(uid='vkrwpbnJr9AZ1YuMRILY', description='iris_store', key='iris.parquet', suffix='.parquet', accessor='DataFrame', size=4510, hash='4n5aTzTCvYC9TAcmXNW4hQ', hash_type='md5', visibility=1, key_is_virtual=False, created_by_id=1, storage_id=1, updated_at='2024-06-13 12:18:43 UTC')
key_path = root / "iris.parquet"
assert key_path.exists()
artifact.replace(data=iris[:-1])
❗ no run & transform get linked, consider calling ln.track()
assert artifact.key == "iris.parquet"
artifact.save()
Artifact(uid='vkrwpbnJr9AZ1YuMRILY', description='iris_store', key='iris.parquet', suffix='.parquet', accessor='DataFrame', size=4490, hash='07E-IuJuPmhP_WKO2NA9fg', hash_type='md5', visibility=1, key_is_virtual=False, created_by_id=1, storage_id=1, updated_at='2024-06-13 12:18:44 UTC')
assert key_path.exists()
artifact.replace("./test-files/new_iris.csv")
❗ no run & transform get linked, consider calling ln.track()
❗ replacing the file will replace key 'iris.parquet' with 'iris.csv' and delete 'iris.parquet' upon `save()`
artifact.save()
Artifact(uid='vkrwpbnJr9AZ1YuMRILY', description='iris_store', key='iris.csv', suffix='.csv', accessor='DataFrame', size=229, hash='lp2-ycXcKcaliUTnR_TqHA', hash_type='md5', visibility=1, key_is_virtual=False, created_by_id=1, storage_id=1, updated_at='2024-06-13 12:18:45 UTC')
old_key_path = key_path
new_key_path = root / "iris.csv"
old_key_path
S3Path('s3://lamindb-ci/test-add-replace-stage/iris.parquet')
new_key_path
S3Path('s3://lamindb-ci/test-add-replace-stage/iris.csv')
assert not old_key_path.exists()
assert new_key_path.exists()
# we use the path in the next section
path_in_storage = artifact.path
artifact.delete(permanent=True, storage=False)
❗ you will retain a dangling store here: s3://lamindb-ci/test-add-replace-stage/iris.csv, not referenced via an artifact

Save with manually passed virtual key

ln.settings.artifact_use_virtual_keys = True
artifact = ln.Artifact("./test-files/iris.csv", key="iris.csv")
❗ no run & transform get linked, consider calling ln.track()
artifact.save()
Artifact(uid='sO4bBBbjrl343rlU5usp', key='iris.csv', suffix='.csv', size=224, hash='iwc1TmF1TW_l5weDvscSHw', hash_type='md5', visibility=1, key_is_virtual=True, created_by_id=1, storage_id=1, updated_at='2024-06-13 12:18:46 UTC')
with pytest.raises(ValueError):
    artifact.replace(path_in_storage)
❗ no run & transform get linked, consider calling ln.track()
# return an existing artifact if the hash is the same
assert artifact == artifact.replace("./test-files/iris.csv")
❗ no run & transform get linked, consider calling ln.track()
💡 returning existing artifact with same hash: Artifact(uid='sO4bBBbjrl343rlU5usp', key='iris.csv', suffix='.csv', size=224, hash='iwc1TmF1TW_l5weDvscSHw', hash_type='md5', visibility=1, key_is_virtual=True, created_by_id=1, storage_id=1, updated_at='2024-06-13 12:18:46 UTC')
fpath = artifact.path
assert fpath.suffix == ".csv" and fpath.stem == artifact.uid
artifact.replace("./test-files/iris.data")
❗ no run & transform get linked, consider calling ln.track()
artifact.save()
Artifact(uid='sO4bBBbjrl343rlU5usp', key='iris.data', suffix='.data', size=182, hash='42Br6no9CjB6s5ZbmO-bmw', hash_type='md5', visibility=1, key_is_virtual=True, created_by_id=1, storage_id=1, updated_at='2024-06-13 12:18:48 UTC')
assert artifact.key == "iris.data"
assert not fpath.exists()
fpath = artifact.path
assert fpath.suffix == ".data" and fpath.stem == artifact.uid
artifact.delete(permanent=True, storage=True)
path_in_storage.unlink()
ln.setup.delete("test-add-replace-stage", force=True)
💡 deleted storage record on hub 8d5230fca3955b48bb7f707e93064ca9
💡 deleted storage record on hub 762b2d23bcb752cf88c5b7bab2d4e03e
💡 deleted instance record on hub 0102f680066a569da6f32766b07a9f5d