
This patch moves the test for the HBA to after it's been reattached to the host driver. The current check will always fail, because it will never see it. The host driver will never see the HBA until after the virsh reattach command runs. Change-Id: I74255576e2b7f31dcb35c87bbf7fc270d6d736b0
249 lines
8.4 KiB
Bash
249 lines
8.4 KiB
Bash
#!/usr/bin/env bash
|
|
|
|
# Copyright (C) 2015 Hewlett-Packard Development Company, L.P.
|
|
# Copyright (C) 2015 Pure Storage, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
#
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
|
|
# Shell commands to get virsh the information it
|
|
# needs to successfully pass through a Fibre Channel PCI Card to the virtual
|
|
# machine this script is running on. The instance only knows its IP address,
|
|
# while its Virsh name is required for pass through. This script uses Nova on
|
|
# the provider blade as an intermediary to find the name. Meanwhile, this
|
|
# script finds the Fibre Channel PCI card on the provider and generates the
|
|
# information Virsh needs to attach it.
|
|
#
|
|
# Expect four env variables, the provider hostname (optionally user if needed)
|
|
# the private key file we should use to connect to the provider, and the file
|
|
# that should be sourced for OpenStack credentials.
|
|
#
|
|
# export FC_PROVIDER=my.provider.hostname
|
|
# export FC_PROVIDER_USER=root
|
|
# export FC_PROVIDER_KEY=/opt/nodepool-scripts/passthrough
|
|
# export FC_PROVIDER_RC=/root/keystonerc_jenkins
|
|
#
|
|
# The maximum number of FC devices to passthrough, failing if they cannot all be
|
|
# aquired
|
|
# export FC_NUM=2 (default 1)
|
|
#
|
|
# For single node setups where the hypervisor is the same as the provider, and dns
|
|
# is not configured, export this variable to use the provider ip as the hypervisor
|
|
# export FC_SINGLE_NODE=1
|
|
|
|
FC_NUM=${FC_NUM:-1}
|
|
FC_PCI_VAR_NAME=${FC_PCI_VAR_NAME:-"fc_pci_device"}
|
|
|
|
echo "Planning to passthrough $FC_NUM pci devices"
|
|
|
|
eth0_ip=$(hostname -I | cut -f1 -d' ')
|
|
|
|
PROVIDER=${FC_PROVIDER}
|
|
if [[ -z $PROVIDER ]]; then
|
|
eth0_ip_base=$(echo $eth0_ip | cut -f1,2,3 -d.)
|
|
PROVIDER="${eth0_ip_base}.1"
|
|
fi
|
|
|
|
PROVIDER_KEY=${FC_PROVIDER_KEY:-"/opt-nodepool-scripts/passthrough"}
|
|
PROVIDER_RC=${FC_PROVIDER_RC:-"keystonerc_jenkins"}
|
|
|
|
CURRENT_USER=$(whoami)
|
|
PROVIDER_USER=${FC_PROVIDER_USER:-$CURRENT_USER}
|
|
|
|
# Passthrough is a private key that needs to be setup for the provider
|
|
# and any compute nodes that might end up hosting the VM we want passthrough on.
|
|
# We will assume ownership of the key (probably as the jenkins user..), also
|
|
# assuming the group is the same name as the user...
|
|
sudo chown $CURRENT_USER:$CURRENT_USER $PROVIDER_KEY
|
|
chmod 0400 $PROVIDER_KEY
|
|
|
|
# Get our NOVA_ID
|
|
NOVA_LIST=$(ssh -i $PROVIDER_KEY $PROVIDER_USER@$PROVIDER "source $PROVIDER_RC && nova list")
|
|
nova_result=$?
|
|
NOVA_ID=$(echo "$NOVA_LIST" | grep ACTIVE | grep -v deleting | grep $eth0_ip | cut -d \| -f 2 | tr -d '[:space:]')
|
|
echo "NOVA_ID result: $nova_result"
|
|
if [[ $nova_result -ne 0 || -z "$NOVA_ID" ]]; then
|
|
echo "Unable to get Nova ID. Aborting. Debug info:"
|
|
echo $NOVA_LIST
|
|
echo "NOVA_ID: $NOVA_ID"
|
|
exit 2
|
|
fi
|
|
|
|
# Get instance details
|
|
NOVA_DETAILS=$(ssh -i $PROVIDER_KEY $PROVIDER_USER@$PROVIDER "source $PROVIDER_RC && nova show $NOVA_ID")
|
|
nova_results=$?
|
|
|
|
# Get our Virsh name
|
|
VIRSH_NAME=$(echo "$NOVA_DETAILS" | grep instance_name | cut -d \| -f 3 | tr -d '[:space:]')
|
|
virsh_result=$?
|
|
echo "VIRSH_NAME result: $virsh_result"
|
|
if [[ $nova_result -ne 0 || $virsh_result -ne 0 || -z "$VIRSH_NAME" ]]; then
|
|
echo "Unable to get Virsh Name. Aborting. Debug info:"
|
|
echo "NOVA_LIST:"
|
|
echo $NOVA_LIST
|
|
echo "NOVA_DETAILS:"
|
|
echo $NOVA_DETAILS
|
|
echo "VIRSH_NAME: $VIRSH_NAME"
|
|
exit 2
|
|
fi
|
|
|
|
# Get the hypervisor_hostname
|
|
if [[ -z $FC_SINGLE_NODE ]]; then
|
|
HYPERVISOR=$(echo "$NOVA_DETAILS" | grep hypervisor_hostname | cut -d \| -f 3 | tr -d '[:space:]')
|
|
hypervisor_result=$?
|
|
echo "HYPERVISOR result: $hypervisor_result"
|
|
if [[ $hypervisor_result -ne 0 || -z "$HYPERVISOR" ]]; then
|
|
echo "Unable to get Hypervisor Host Name. Aborting. Debug info:"
|
|
echo "NOVA_LIST:"
|
|
echo $NOVA_LIST
|
|
echo "NOVA_DETAILS:"
|
|
echo $NOVA_DETAILS
|
|
echo "HYPERVISOR: $HYPERVISOR"
|
|
exit 2
|
|
fi
|
|
else
|
|
HYPERVISOR=$PROVIDER
|
|
fi
|
|
echo "Found Hypervisor hostname: $HYPERVISOR"
|
|
|
|
fc_pci_device_cmd="echo \$$FC_PCI_VAR_NAME"
|
|
fc_pci_device=$(ssh -i $PROVIDER_KEY $PROVIDER_USER@$HYPERVISOR "$fc_pci_device_cmd")
|
|
|
|
if [[ -z $fc_pci_device ]]; then
|
|
echo "No FC device known. Set fc_pci_device in your /etc/profile.d or /etc/environment (depending on distro and ssh configuration) to the desired 'Class Device path', e.g. '0000:21:00.2'"
|
|
exit 2
|
|
fi
|
|
|
|
echo "Found pci devices: $fc_pci_device"
|
|
|
|
function is_device_online() {
|
|
fc_device=$1
|
|
# If a device is not "Online" we'll get an empty
|
|
# string as a result of the following command.
|
|
cmd="systool -c fc_host -v"
|
|
OUTPUT=$(ssh -i $PROVIDER_KEY $PROVIDER_USER@$HYPERVISOR "systool -c fc_host -v")
|
|
test_fc_online="systool -c fc_host -v | grep -B12 'Online' | grep 'Class Device path' | grep '$fc_device'"
|
|
ONLINE=$(ssh -i $PROVIDER_KEY $PROVIDER_USER@$HYPERVISOR "$test_fc_online")
|
|
echo "online result='$ONLINE'"
|
|
if [ -z "$ONLINE" ]; then
|
|
return 0;
|
|
else
|
|
return 1;
|
|
fi
|
|
}
|
|
|
|
exit_code=1
|
|
errexit=$(set +o | grep errexit)
|
|
# Ignore errors
|
|
set +e
|
|
let num_attached=0
|
|
for pci in $fc_pci_device; do
|
|
echo "Trying passthrough for $pci"
|
|
|
|
BUS=$(echo $pci | cut -d : -f2)
|
|
SLOT=$(echo $pci | cut -d : -f3 | cut -d . -f1)
|
|
FUNCTION=$(echo $pci | cut -d : -f3 | cut -d . -f2)
|
|
XML="<hostdev mode='subsystem' type='pci' managed='yes'><source><address domain='0x0000' bus='0x$BUS' slot='0x$SLOT' function='0x$FUNCTION'/></source></hostdev>"
|
|
echo $XML
|
|
fcoe=`mktemp --suffix=_fcoe.xml`
|
|
echo $XML > $fcoe
|
|
|
|
fc_virsh_device="pci_0000_${BUS}_${SLOT}_${FUNCTION}"
|
|
|
|
scp -i $PROVIDER_KEY $fcoe $PROVIDER_USER@$HYPERVISOR:/tmp/
|
|
|
|
# Run passthrough and clean up.
|
|
# TODO: At the point where we can do more than one node on a provider we
|
|
# will need to do this cleanup at the end of the job and not *before* attaching
|
|
# since we won't know which ones are still in use
|
|
echo $(sudo lspci | grep -i fib)
|
|
ssh -i $PROVIDER_KEY $PROVIDER_USER@$HYPERVISOR "virsh nodedev-dettach $fc_virsh_device"
|
|
|
|
detach_result=$?
|
|
echo "Detach result: $detach_result"
|
|
if [[ $detach_result -ne 0 ]]; then
|
|
echo "Detach failed ($detach_result). Trying next device..."
|
|
continue
|
|
fi
|
|
|
|
# Reattach the device to the host.
|
|
# This will hopefully reset the device
|
|
echo $(sudo lspci | grep -i fib)
|
|
ssh -i $PROVIDER_KEY $PROVIDER_USER@$HYPERVISOR "virsh nodedev-reattach $fc_virsh_device"
|
|
reattach_result=$?
|
|
echo "reattach result: $reattach_result"
|
|
if [[ $reattach_result -ne 0 ]]; then
|
|
echo "Reattach failed ($reattach_result). Trying next device..."
|
|
continue
|
|
fi
|
|
|
|
# Now that the device has been re-attached to it's host device driver
|
|
# systool should be able to see it. Make sure it's online.
|
|
is_device_online $pci
|
|
online=$?
|
|
if [ $online -eq 1 ]; then
|
|
echo "Device($pci) is Online"
|
|
else
|
|
echo "Device($pci) is NOT Online"
|
|
# It does no good to passthrough an HBA that isn't Online.
|
|
# When an HBA goes into 'Linkdown' or 'Offline' mode, the
|
|
# host typically needs to get rebooted.
|
|
continue
|
|
fi
|
|
|
|
echo $(sudo lspci | grep -i fib)
|
|
ssh -i $PROVIDER_KEY $PROVIDER_USER@$HYPERVISOR "virsh attach-device $VIRSH_NAME $fcoe"
|
|
attach_result=$?
|
|
echo "Attach result: $attach_result"
|
|
if [[ $attach_result -eq 0 ]]; then
|
|
echo "Attached succeed. Trying next device..."
|
|
(( num_attached += 1 ))
|
|
exit_code=0
|
|
fi
|
|
echo $(sudo lspci | grep -i fib)
|
|
echo $num_attached
|
|
if [[ $num_attached -eq $FC_NUM ]]; then
|
|
echo "Attached $num_attached devices. Stopping"
|
|
break
|
|
fi
|
|
|
|
done
|
|
$errexit
|
|
|
|
if [[ $exit_code -ne 0 ]]; then
|
|
echo "FC Passthrough failed. Aborting."
|
|
exit $exit_code
|
|
fi
|
|
|
|
if [[ $num_attached -ne $FC_NUM ]]; then
|
|
echo "FC requested $FC_NUM, but only attached $num_attached. Aborting."
|
|
exit 1
|
|
fi
|
|
|
|
# Make sure that really it worked...
|
|
sudo modprobe lpfc
|
|
echo $?
|
|
|
|
sudo systool -c fc_host -v
|
|
echo $?
|
|
|
|
echo $(sudo lspci | grep -i fib)
|
|
|
|
device_path=$(sudo systool -c fc_host -v | grep "Device path")
|
|
if [[ ${#device_path} -eq 0 ]]; then
|
|
echo "Failed to install FC Drivers. Aborting."
|
|
exit 1
|
|
fi
|