Walter A. Boring IV f4e427216e Move HBA online check
This patch moves the test for the HBA to after it's been
reattached to the host driver.   The current check will always
fail, because it will never see it.

The host driver will never see the HBA until after the virsh reattach
command runs.

Change-Id: I74255576e2b7f31dcb35c87bbf7fc270d6d736b0
2015-11-18 18:57:28 +00:00

249 lines
8.4 KiB
Bash

#!/usr/bin/env bash
# Copyright (C) 2015 Hewlett-Packard Development Company, L.P.
# Copyright (C) 2015 Pure Storage, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
# Shell commands to get virsh the information it
# needs to successfully pass through a Fibre Channel PCI Card to the virtual
# machine this script is running on. The instance only knows its IP address,
# while its Virsh name is required for pass through. This script uses Nova on
# the provider blade as an intermediary to find the name. Meanwhile, this
# script finds the Fibre Channel PCI card on the provider and generates the
# information Virsh needs to attach it.
#
# Expect four env variables, the provider hostname (optionally user if needed)
# the private key file we should use to connect to the provider, and the file
# that should be sourced for OpenStack credentials.
#
# export FC_PROVIDER=my.provider.hostname
# export FC_PROVIDER_USER=root
# export FC_PROVIDER_KEY=/opt/nodepool-scripts/passthrough
# export FC_PROVIDER_RC=/root/keystonerc_jenkins
#
# The maximum number of FC devices to passthrough, failing if they cannot all be
# aquired
# export FC_NUM=2 (default 1)
#
# For single node setups where the hypervisor is the same as the provider, and dns
# is not configured, export this variable to use the provider ip as the hypervisor
# export FC_SINGLE_NODE=1
FC_NUM=${FC_NUM:-1}
FC_PCI_VAR_NAME=${FC_PCI_VAR_NAME:-"fc_pci_device"}
echo "Planning to passthrough $FC_NUM pci devices"
eth0_ip=$(hostname -I | cut -f1 -d' ')
PROVIDER=${FC_PROVIDER}
if [[ -z $PROVIDER ]]; then
eth0_ip_base=$(echo $eth0_ip | cut -f1,2,3 -d.)
PROVIDER="${eth0_ip_base}.1"
fi
PROVIDER_KEY=${FC_PROVIDER_KEY:-"/opt-nodepool-scripts/passthrough"}
PROVIDER_RC=${FC_PROVIDER_RC:-"keystonerc_jenkins"}
CURRENT_USER=$(whoami)
PROVIDER_USER=${FC_PROVIDER_USER:-$CURRENT_USER}
# Passthrough is a private key that needs to be setup for the provider
# and any compute nodes that might end up hosting the VM we want passthrough on.
# We will assume ownership of the key (probably as the jenkins user..), also
# assuming the group is the same name as the user...
sudo chown $CURRENT_USER:$CURRENT_USER $PROVIDER_KEY
chmod 0400 $PROVIDER_KEY
# Get our NOVA_ID
NOVA_LIST=$(ssh -i $PROVIDER_KEY $PROVIDER_USER@$PROVIDER "source $PROVIDER_RC && nova list")
nova_result=$?
NOVA_ID=$(echo "$NOVA_LIST" | grep ACTIVE | grep -v deleting | grep $eth0_ip | cut -d \| -f 2 | tr -d '[:space:]')
echo "NOVA_ID result: $nova_result"
if [[ $nova_result -ne 0 || -z "$NOVA_ID" ]]; then
echo "Unable to get Nova ID. Aborting. Debug info:"
echo $NOVA_LIST
echo "NOVA_ID: $NOVA_ID"
exit 2
fi
# Get instance details
NOVA_DETAILS=$(ssh -i $PROVIDER_KEY $PROVIDER_USER@$PROVIDER "source $PROVIDER_RC && nova show $NOVA_ID")
nova_results=$?
# Get our Virsh name
VIRSH_NAME=$(echo "$NOVA_DETAILS" | grep instance_name | cut -d \| -f 3 | tr -d '[:space:]')
virsh_result=$?
echo "VIRSH_NAME result: $virsh_result"
if [[ $nova_result -ne 0 || $virsh_result -ne 0 || -z "$VIRSH_NAME" ]]; then
echo "Unable to get Virsh Name. Aborting. Debug info:"
echo "NOVA_LIST:"
echo $NOVA_LIST
echo "NOVA_DETAILS:"
echo $NOVA_DETAILS
echo "VIRSH_NAME: $VIRSH_NAME"
exit 2
fi
# Get the hypervisor_hostname
if [[ -z $FC_SINGLE_NODE ]]; then
HYPERVISOR=$(echo "$NOVA_DETAILS" | grep hypervisor_hostname | cut -d \| -f 3 | tr -d '[:space:]')
hypervisor_result=$?
echo "HYPERVISOR result: $hypervisor_result"
if [[ $hypervisor_result -ne 0 || -z "$HYPERVISOR" ]]; then
echo "Unable to get Hypervisor Host Name. Aborting. Debug info:"
echo "NOVA_LIST:"
echo $NOVA_LIST
echo "NOVA_DETAILS:"
echo $NOVA_DETAILS
echo "HYPERVISOR: $HYPERVISOR"
exit 2
fi
else
HYPERVISOR=$PROVIDER
fi
echo "Found Hypervisor hostname: $HYPERVISOR"
fc_pci_device_cmd="echo \$$FC_PCI_VAR_NAME"
fc_pci_device=$(ssh -i $PROVIDER_KEY $PROVIDER_USER@$HYPERVISOR "$fc_pci_device_cmd")
if [[ -z $fc_pci_device ]]; then
echo "No FC device known. Set fc_pci_device in your /etc/profile.d or /etc/environment (depending on distro and ssh configuration) to the desired 'Class Device path', e.g. '0000:21:00.2'"
exit 2
fi
echo "Found pci devices: $fc_pci_device"
function is_device_online() {
fc_device=$1
# If a device is not "Online" we'll get an empty
# string as a result of the following command.
cmd="systool -c fc_host -v"
OUTPUT=$(ssh -i $PROVIDER_KEY $PROVIDER_USER@$HYPERVISOR "systool -c fc_host -v")
test_fc_online="systool -c fc_host -v | grep -B12 'Online' | grep 'Class Device path' | grep '$fc_device'"
ONLINE=$(ssh -i $PROVIDER_KEY $PROVIDER_USER@$HYPERVISOR "$test_fc_online")
echo "online result='$ONLINE'"
if [ -z "$ONLINE" ]; then
return 0;
else
return 1;
fi
}
exit_code=1
errexit=$(set +o | grep errexit)
# Ignore errors
set +e
let num_attached=0
for pci in $fc_pci_device; do
echo "Trying passthrough for $pci"
BUS=$(echo $pci | cut -d : -f2)
SLOT=$(echo $pci | cut -d : -f3 | cut -d . -f1)
FUNCTION=$(echo $pci | cut -d : -f3 | cut -d . -f2)
XML="<hostdev mode='subsystem' type='pci' managed='yes'><source><address domain='0x0000' bus='0x$BUS' slot='0x$SLOT' function='0x$FUNCTION'/></source></hostdev>"
echo $XML
fcoe=`mktemp --suffix=_fcoe.xml`
echo $XML > $fcoe
fc_virsh_device="pci_0000_${BUS}_${SLOT}_${FUNCTION}"
scp -i $PROVIDER_KEY $fcoe $PROVIDER_USER@$HYPERVISOR:/tmp/
# Run passthrough and clean up.
# TODO: At the point where we can do more than one node on a provider we
# will need to do this cleanup at the end of the job and not *before* attaching
# since we won't know which ones are still in use
echo $(sudo lspci | grep -i fib)
ssh -i $PROVIDER_KEY $PROVIDER_USER@$HYPERVISOR "virsh nodedev-dettach $fc_virsh_device"
detach_result=$?
echo "Detach result: $detach_result"
if [[ $detach_result -ne 0 ]]; then
echo "Detach failed ($detach_result). Trying next device..."
continue
fi
# Reattach the device to the host.
# This will hopefully reset the device
echo $(sudo lspci | grep -i fib)
ssh -i $PROVIDER_KEY $PROVIDER_USER@$HYPERVISOR "virsh nodedev-reattach $fc_virsh_device"
reattach_result=$?
echo "reattach result: $reattach_result"
if [[ $reattach_result -ne 0 ]]; then
echo "Reattach failed ($reattach_result). Trying next device..."
continue
fi
# Now that the device has been re-attached to it's host device driver
# systool should be able to see it. Make sure it's online.
is_device_online $pci
online=$?
if [ $online -eq 1 ]; then
echo "Device($pci) is Online"
else
echo "Device($pci) is NOT Online"
# It does no good to passthrough an HBA that isn't Online.
# When an HBA goes into 'Linkdown' or 'Offline' mode, the
# host typically needs to get rebooted.
continue
fi
echo $(sudo lspci | grep -i fib)
ssh -i $PROVIDER_KEY $PROVIDER_USER@$HYPERVISOR "virsh attach-device $VIRSH_NAME $fcoe"
attach_result=$?
echo "Attach result: $attach_result"
if [[ $attach_result -eq 0 ]]; then
echo "Attached succeed. Trying next device..."
(( num_attached += 1 ))
exit_code=0
fi
echo $(sudo lspci | grep -i fib)
echo $num_attached
if [[ $num_attached -eq $FC_NUM ]]; then
echo "Attached $num_attached devices. Stopping"
break
fi
done
$errexit
if [[ $exit_code -ne 0 ]]; then
echo "FC Passthrough failed. Aborting."
exit $exit_code
fi
if [[ $num_attached -ne $FC_NUM ]]; then
echo "FC requested $FC_NUM, but only attached $num_attached. Aborting."
exit 1
fi
# Make sure that really it worked...
sudo modprobe lpfc
echo $?
sudo systool -c fc_host -v
echo $?
echo $(sudo lspci | grep -i fib)
device_path=$(sudo systool -c fc_host -v | grep "Device path")
if [[ ${#device_path} -eq 0 ]]; then
echo "Failed to install FC Drivers. Aborting."
exit 1
fi