-
Notifications
You must be signed in to change notification settings - Fork 1.3k
CloudStack Volume support with ONTAP storage #13053
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4c8c8d9
bf838b3
b82fb40
1199d55
ebc3f00
28faca1
fe0f752
35f1011
a03a2c4
2e0efe8
a96eb9e
0f09c5f
2cc4b0c
033c23d
2822946
f7b837a
c585299
a492797
25353c2
973f5e2
686a892
edfcdde
73eb9f5
465fffe
3d6bd91
5815ebd
618f957
6c4b24e
1b0c7f7
54ddfa9
b23ac40
2c61e76
e99b98e
ef0354a
2f02d8a
1ae738b
890c2db
2d3b279
8a2c7fb
b26542f
856c5cc
7c2b229
763aa3b
eace4ee
f42552b
8894248
3f0019a
9b79f46
7a0d61e
7c3419e
d2b6a27
c5d5428
3f18c11
723561b
09968db
0a1a9c4
49df4c3
776b9a2
c04e223
186e59b
1020a2c
672d7a4
9c63c61
79730ed
ae96e9b
1b0bba9
7780a93
5bff41f
2abbed6
2340400
ce93705
e1a6465
9138e20
5f9e51c
142e0e6
aa74a5a
55447b7
fccaf83
ea40967
a41eb28
7d08878
4a62d40
a6e4b49
b58fd24
0f5370a
ddb119f
929d30f
a20b6cc
9748198
cddacd1
34e910c
c86d377
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -1340,6 +1340,15 @@ private void createManagedVolumeCopyTemplateAsync(VolumeInfo volumeInfo, Primary | |||||
| primaryDataStore.setDetails(details); | ||||||
|
|
||||||
| grantAccess(volumeInfo, destHost, primaryDataStore); | ||||||
| volumeInfo = volFactory.getVolume(volumeInfo.getId(), primaryDataStore); | ||||||
| // For Netapp ONTAP iscsiName or Lun path is available only after grantAccess | ||||||
| String managedStoreTarget = volumeInfo.get_iScsiName() != null ? volumeInfo.get_iScsiName() : volumeInfo.getUuid(); | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @winterhazel what is the comment on this statement?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is replacing the ternary on line 1345 with a call to |
||||||
| details.put(PrimaryDataStore.MANAGED_STORE_TARGET, managedStoreTarget); | ||||||
| primaryDataStore.setDetails(details); | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this set be done inside |
||||||
| // Update destTemplateInfo with the iSCSI path from volumeInfo | ||||||
| if (destTemplateInfo instanceof TemplateObject) { | ||||||
| ((TemplateObject)destTemplateInfo).setInstallPath(volumeInfo.getPath()); | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This |
||||||
| } | ||||||
|
|
||||||
| try { | ||||||
| motionSrv.copyAsync(srcTemplateInfo, destTemplateInfo, destHost, caller); | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -106,6 +106,10 @@ protected Answer takeDiskOnlyVmSnapshotOfRunningVm(CreateDiskOnlyVmSnapshotComma | |
| return new CreateDiskOnlyVmSnapshotAnswer(cmd, false, errorMsg, null); | ||
| } | ||
| return new CreateDiskOnlyVmSnapshotAnswer(cmd, false, e.getMessage(), null); | ||
| } catch (Exception e) { | ||
| String errorMsg = String.format("Creation of disk-only VM snapshot for VM [%s] failed due to %s.", vmName, e.getMessage()); | ||
| logger.error(errorMsg, e); | ||
| return new CreateDiskOnlyVmSnapshotAnswer(cmd, false, errorMsg, null); | ||
| } finally { | ||
| if (dm != null) { | ||
| try { | ||
|
|
@@ -146,21 +150,13 @@ protected Answer takeDiskOnlyVmSnapshotOfStoppedVm(CreateDiskOnlyVmSnapshotComma | |
| } | ||
| } catch (LibvirtException | QemuImgException e) { | ||
| logger.error("Exception while creating disk-only VM snapshot for VM [{}]. Deleting leftover deltas.", vmName, e); | ||
| for (VolumeObjectTO volumeObjectTO : volumeObjectTos) { | ||
| Pair<Long, String> volSizeAndNewPath = mapVolumeToSnapshotSizeAndNewVolumePath.get(volumeObjectTO.getUuid()); | ||
| PrimaryDataStoreTO primaryDataStoreTO = (PrimaryDataStoreTO) volumeObjectTO.getDataStore(); | ||
| KVMStoragePool kvmStoragePool = storagePoolMgr.getStoragePool(primaryDataStoreTO.getPoolType(), primaryDataStoreTO.getUuid()); | ||
|
|
||
| if (volSizeAndNewPath == null) { | ||
| continue; | ||
| } | ||
| try { | ||
| Files.deleteIfExists(Path.of(kvmStoragePool.getLocalPathFor(volSizeAndNewPath.second()))); | ||
| } catch (IOException ex) { | ||
| logger.warn("Tried to delete leftover snapshot at [{}] failed.", volSizeAndNewPath.second(), ex); | ||
| } | ||
| } | ||
| cleanupLeftoverDeltas(volumeObjectTos, mapVolumeToSnapshotSizeAndNewVolumePath, storagePoolMgr); | ||
| return new Answer(cmd, e); | ||
| } catch (Exception e) { | ||
| logger.error("Unexpected exception while creating disk-only VM snapshot for VM [{}]. Deleting leftover deltas.", vmName, e); | ||
| cleanupLeftoverDeltas(volumeObjectTos, mapVolumeToSnapshotSizeAndNewVolumePath, storagePoolMgr); | ||
| return new CreateDiskOnlyVmSnapshotAnswer(cmd, false, | ||
| String.format("Creation of disk-only VM snapshot for VM [%s] failed due to %s.", vmName, e.getMessage()), null); | ||
|
Comment on lines
+155
to
+159
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be unified with the catch block above?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @winterhazel, I added this logic separately after encountering failures in negative scenarios where the UI did not surface any error details. With this change, users are now notified of the failure reason when the exception falls outside the scope of the existing catch block. This is expected to improve overall user experience. Please let me know if you still feel this should be removed. |
||
| } | ||
|
|
||
| return new CreateDiskOnlyVmSnapshotAnswer(cmd, true, null, mapVolumeToSnapshotSizeAndNewVolumePath); | ||
|
|
@@ -192,6 +188,23 @@ protected Pair<String, Map<String, Pair<Long, String>>> createSnapshotXmlAndNewV | |
| return new Pair<>(snapshotXml, volumeObjectToNewPathMap); | ||
| } | ||
|
|
||
| protected void cleanupLeftoverDeltas(List<VolumeObjectTO> volumeObjectTos, Map<String, Pair<Long, String>> mapVolumeToSnapshotSizeAndNewVolumePath, KVMStoragePoolManager storagePoolMgr) { | ||
| for (VolumeObjectTO volumeObjectTO : volumeObjectTos) { | ||
| Pair<Long, String> volSizeAndNewPath = mapVolumeToSnapshotSizeAndNewVolumePath.get(volumeObjectTO.getUuid()); | ||
| PrimaryDataStoreTO primaryDataStoreTO = (PrimaryDataStoreTO) volumeObjectTO.getDataStore(); | ||
| KVMStoragePool kvmStoragePool = storagePoolMgr.getStoragePool(primaryDataStoreTO.getPoolType(), primaryDataStoreTO.getUuid()); | ||
|
|
||
| if (volSizeAndNewPath == null) { | ||
| continue; | ||
| } | ||
| try { | ||
| Files.deleteIfExists(Path.of(kvmStoragePool.getLocalPathFor(volSizeAndNewPath.second()))); | ||
| } catch (IOException ex) { | ||
| logger.warn("Tried to delete leftover snapshot at [{}] failed.", volSizeAndNewPath.second(), ex); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| protected long getFileSize(String path) { | ||
| return new File(path).length(); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,8 @@ | |
| import java.util.HashMap; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.nio.file.Files; | ||
| import java.nio.file.Paths; | ||
|
|
||
| import org.apache.cloudstack.utils.qemu.QemuImg; | ||
| import org.apache.cloudstack.utils.qemu.QemuImg.PhysicalDiskFormat; | ||
|
|
@@ -96,10 +98,15 @@ public boolean connectPhysicalDisk(String volumeUuid, KVMStoragePool pool, Map<S | |
| String result = iScsiAdmCmd.execute(); | ||
|
|
||
| if (result != null) { | ||
| logger.debug("Failed to add iSCSI target " + volumeUuid); | ||
| System.out.println("Failed to add iSCSI target " + volumeUuid); | ||
| // Node record may already exist from a previous run; accept and proceed | ||
| if (isNonFatalNodeCreate(result)) { | ||
| logger.debug("iSCSI node already exists for {}@{}:{}, proceeding", getIqn(volumeUuid), pool.getSourceHost(), pool.getSourcePort()); | ||
| } else { | ||
| logger.debug("Failed to add iSCSI target " + volumeUuid); | ||
| System.out.println("Failed to add iSCSI target " + volumeUuid); | ||
|
|
||
| return false; | ||
| return false; | ||
| } | ||
| } else { | ||
| logger.debug("Successfully added iSCSI target " + volumeUuid); | ||
| System.out.println("Successfully added to iSCSI target " + volumeUuid); | ||
|
|
@@ -123,21 +130,39 @@ public boolean connectPhysicalDisk(String volumeUuid, KVMStoragePool pool, Map<S | |
| } | ||
| } | ||
|
Comment on lines
100
to
131
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @winterhazel can you please elaborate on this comment? |
||
|
|
||
| // ex. sudo iscsiadm -m node -T iqn.2012-03.com.test:volume1 -p 192.168.233.10:3260 --login | ||
| iScsiAdmCmd = new Script(true, "iscsiadm", 0, logger); | ||
| final String host = pool.getSourceHost(); | ||
| final int port = pool.getSourcePort(); | ||
| final String iqn = getIqn(volumeUuid); | ||
|
|
||
| // Always try to login; treat benign outcomes as success (idempotent) | ||
| iScsiAdmCmd = new Script(true, "iscsiadm", 0, logger); | ||
| iScsiAdmCmd.add("-m", "node"); | ||
| iScsiAdmCmd.add("-T", getIqn(volumeUuid)); | ||
| iScsiAdmCmd.add("-p", pool.getSourceHost() + ":" + pool.getSourcePort()); | ||
| iScsiAdmCmd.add("-T", iqn); | ||
| iScsiAdmCmd.add("-p", host + ":" + port); | ||
| iScsiAdmCmd.add("--login"); | ||
|
|
||
| result = iScsiAdmCmd.execute(); | ||
|
|
||
| if (result != null) { | ||
| logger.debug("Failed to log in to iSCSI target " + volumeUuid); | ||
| System.out.println("Failed to log in to iSCSI target " + volumeUuid); | ||
| if (isNonFatalLogin(result)) { | ||
| logger.debug("iSCSI login returned benign message for {}@{}:{}: {}", iqn, host, port, result); | ||
| // Session already exists — a newly mapped LUN won't be visible until | ||
| // the kernel's next periodic SCSI scan (~30-60s). | ||
| Script rescanCmd = new Script(true, "iscsiadm", 0, logger); | ||
| rescanCmd.add("-m", "session"); | ||
| rescanCmd.add("--rescan"); | ||
| String rescanResult = rescanCmd.execute(); | ||
| if (rescanResult != null) { | ||
| logger.warn("iSCSI session rescan returned: {}", rescanResult); | ||
| } else { | ||
| logger.debug("iSCSI session rescan completed successfully for {}@{}:{}", iqn, host, port); | ||
| } | ||
| } else { | ||
| logger.debug("Failed to log in to iSCSI target " + volumeUuid + ": " + result); | ||
| System.out.println("Failed to log in to iSCSI target " + volumeUuid); | ||
|
|
||
| return false; | ||
| return false; | ||
| } | ||
| } else { | ||
| logger.debug("Successfully logged in to iSCSI target " + volumeUuid); | ||
| System.out.println("Successfully logged in to iSCSI target " + volumeUuid); | ||
|
|
@@ -158,8 +183,23 @@ public boolean connectPhysicalDisk(String volumeUuid, KVMStoragePool pool, Map<S | |
| return true; | ||
| } | ||
|
Comment on lines
146
to
184
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @winterhazel I introduced this nested logic because NetApp storage exposes IQNs at the SVM level rather than at the volume level. As a result, all volumes within the same SVM share the same IQN when creating physical disks. This check ensures that if a connection to the SVM‑level IQN already exists, the failure can be safely ignored; otherwise, an exception should be raised. |
||
|
|
||
| // Removed sessionExists() call to avoid noisy sudo/iscsiadm session queries that may fail on some setups | ||
|
|
||
| private boolean isNonFatalLogin(String result) { | ||
| if (result == null) return true; | ||
| String msg = result.toLowerCase(); | ||
| // Accept messages where the session already exists | ||
| return msg.contains("already present") || msg.contains("already logged in") || msg.contains("session exists"); | ||
| } | ||
|
|
||
| private boolean isNonFatalNodeCreate(String result) { | ||
| if (result == null) return true; | ||
| String msg = result.toLowerCase(); | ||
| return msg.contains("already exists") || msg.contains("database exists") || msg.contains("exists"); | ||
| } | ||
|
|
||
| private void waitForDiskToBecomeAvailable(String volumeUuid, KVMStoragePool pool) { | ||
| int numberOfTries = 10; | ||
| int numberOfTries = 30; | ||
| int timeBetweenTries = 1000; | ||
|
|
||
| while (getPhysicalDisk(volumeUuid, pool).getSize() == 0 && numberOfTries > 0) { | ||
|
|
@@ -238,6 +278,15 @@ public KVMPhysicalDisk getPhysicalDisk(String volumeUuid, KVMStoragePool pool) { | |
| } | ||
|
|
||
| private long getDeviceSize(String deviceByPath) { | ||
| try { | ||
| if (!Files.exists(Paths.get(deviceByPath))) { | ||
| logger.debug("Device by-path does not exist yet: " + deviceByPath); | ||
| return 0L; | ||
| } | ||
| } catch (Exception ignore) { | ||
| // If FS check fails for any reason, fall back to blockdev call | ||
| } | ||
|
|
||
| Script iScsiAdmCmd = new Script(true, "blockdev", 0, logger); | ||
|
|
||
| iScsiAdmCmd.add("--getsize64", deviceByPath); | ||
|
|
@@ -280,8 +329,96 @@ private String getComponent(String path, int index) { | |
| return tmp[index].trim(); | ||
| } | ||
|
|
||
| /** | ||
| * Check if there are other LUNs on the same iSCSI target (IQN) that are still | ||
| * visible as block devices. This is needed because ONTAP uses a single IQN per | ||
| * SVM — logging out of the target would kill ALL LUNs, not just the one being | ||
| * disconnected. | ||
| * | ||
| * Checks /dev/disk/by-path/ for symlinks matching the same host:port + IQN but | ||
| * with a different LUN number. | ||
| */ | ||
| private boolean hasOtherActiveLuns(String host, int port, String iqn, String lun) { | ||
| String prefix = "ip-" + host + ":" + port + "-iscsi-" + iqn + "-lun-"; | ||
| java.io.File byPathDir = new java.io.File("/dev/disk/by-path"); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please import the |
||
| if (!byPathDir.exists() || !byPathDir.isDirectory()) { | ||
| return false; | ||
| } | ||
| java.io.File[] entries = byPathDir.listFiles(); | ||
| if (entries == null) { | ||
| return false; | ||
| } | ||
| for (java.io.File entry : entries) { | ||
| String name = entry.getName(); | ||
| // Skip partition entries (e.g. lun-0-part1, lun-0-part2) — these are not | ||
| // independent LUNs, they are partition symlinks for the same LUN disk. | ||
| // Only count actual LUN entries (no "-part" suffix after the lun number). | ||
| if (name.startsWith(prefix) && !name.equals(prefix + lun) && !name.contains("-part")) { | ||
| logger.debug("Found other active LUN on same target: " + name); | ||
| return true; | ||
| } | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
| /** | ||
| * Removes a single stale SCSI device from the kernel using the sysfs interface. | ||
| * | ||
| * When ONTAP unmaps a LUN from the host's igroup, the by-path symlink and the | ||
| * underlying SCSI device (/dev/sdX) remain present in the kernel until explicitly | ||
| * removed — the kernel does not auto-remove devices from live iSCSI sessions. | ||
| * | ||
| * This method resolves the by-path symlink to the real block device name (e.g. sdd), | ||
| * then writes "1" to /sys/block/<dev>/device/delete — the standard Linux kernel SCSI | ||
| * API for removing a single device without tearing down the entire iSCSI session. | ||
| * Once the kernel processes the delete, it also removes the by-path symlink. | ||
| * | ||
| * This is used instead of iscsiadm --logout when other LUNs on the same IQN are still | ||
| * active (ONTAP single-IQN-per-SVM model), since logout would tear down ALL LUNs. | ||
| */ | ||
| private void removeStaleScsiDevice(String host, int port, String iqn, String lun) { | ||
| String byPath = getByPath(host, port, "/" + iqn + "/" + lun); | ||
| java.nio.file.Path byPathLink = java.nio.file.Paths.get(byPath); | ||
| if (!java.nio.file.Files.exists(byPathLink)) { | ||
| logger.debug("by-path entry for LUN " + lun + " already gone, nothing to remove"); | ||
| return; | ||
| } | ||
| try { | ||
| java.nio.file.Path realDevice = byPathLink.toRealPath(); | ||
| String devName = realDevice.getFileName().toString(); | ||
| java.io.File deleteFile = new java.io.File("/sys/block/" + devName + "/device/delete"); | ||
| if (!deleteFile.exists()) { | ||
| logger.warn("sysfs delete entry not found for device " + devName + " — cannot remove stale SCSI device"); | ||
| return; | ||
| } | ||
| try (java.io.FileWriter fw = new java.io.FileWriter(deleteFile)) { | ||
| fw.write("1"); | ||
| } | ||
| logger.info("Removed stale SCSI device " + devName + " for LUN /" + iqn + "/" + lun + " via sysfs"); | ||
| } catch (Exception e) { | ||
| logger.warn("Failed to remove stale SCSI device for LUN /" + iqn + "/" + lun + ": " + e.getMessage()); | ||
| } | ||
| } | ||
|
|
||
| private boolean disconnectPhysicalDisk(String host, int port, String iqn, String lun) { | ||
| // use iscsiadm to log out of the iSCSI target and un-discover it | ||
| // Check if other LUNs on the same IQN target are still in use. | ||
| // ONTAP (and similar) uses a single IQN per SVM with multiple LUNs. | ||
| // Doing iscsiadm --logout tears down the ENTIRE target session, | ||
| // which would destroy access to ALL LUNs — not just the one being disconnected. | ||
| if (hasOtherActiveLuns(host, port, iqn, lun)) { | ||
| logger.info("Skipping iSCSI logout for /" + iqn + "/" + lun + | ||
| " — other LUNs on the same target are still active. Removing stale SCSI device for this LUN only."); | ||
| removeStaleScsiDevice(host, port, iqn, lun); | ||
| // After removing this LUN's device, re-check: if no other LUNs remain active, | ||
| // If it is the last one then must logout to clean up the iSCSI session entirely. | ||
| if (hasOtherActiveLuns(host, port, iqn, lun)) { | ||
| logger.info("Other LUNs still active after removing /" + iqn + "/" + lun + " — session kept alive."); | ||
| return true; | ||
|
Comment on lines
+415
to
+416
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As the disconnect operation failed, I wonder if it makes more sense to throw an exception here instead |
||
| } | ||
| logger.info("No more active LUNs on target after removing /" + iqn + "/" + lun + " — proceeding with iSCSI logout."); | ||
| } | ||
|
|
||
| // No other LUNs active on this target — safe to logout and delete the node record. | ||
|
|
||
| // ex. sudo iscsiadm -m node -T iqn.2012-03.com.test:volume1 -p 192.168.233.10:3260 --logout | ||
| Script iScsiAdmCmd = new Script(true, "iscsiadm", 0, logger); | ||
|
|
@@ -422,6 +559,19 @@ public KVMPhysicalDisk copyPhysicalDisk(KVMPhysicalDisk srcDisk, String destVolu | |
| try { | ||
| QemuImg q = new QemuImg(timeout); | ||
| q.convert(srcFile, destFile); | ||
| // Below fix is required when vendor depends on host based copy rather than storage CAN_CREATE_VOLUME_FROM_VOLUME capability | ||
| // When host based template copy is triggered , small size template sits in RAM(depending on host memory and RAM) and copy is marked successful and by the time flush to storage is triggered | ||
| // disconnectPhysicalDisk would disconnect the lun , hence template staying in RAM is not copied to storage lun. Below does flushing of data to storage and marking | ||
| // copy as successful once flush is complete. | ||
| Script flushCmd = new Script(true, "blockdev", 0, logger); | ||
| flushCmd.add("--flushbufs", destDisk.getPath()); | ||
| String flushResult = flushCmd.execute(); | ||
| if (flushResult != null) { | ||
| logger.warn("iSCSI copyPhysicalDisk: blockdev --flushbufs returned: {}", flushResult); | ||
| } | ||
| Script syncCmd = new Script(true, "sync", 0, logger); | ||
| syncCmd.execute(); | ||
| logger.info("iSCSI copyPhysicalDisk: flush/sync completed "); | ||
| } catch (QemuImgException | LibvirtException ex) { | ||
| String msg = "Failed to copy data from " + srcDisk.getPath() + " to " + | ||
| destDisk.getPath() + ". The error was the following: " + ex.getMessage(); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This constant is declared in 3 places. Perhaps we could create a file in a single common dependency (maybe cloud-api) to declare the plugin names?