use-case-and-architecture/ai_computing_force_scheduling/preprocess.py
Weisen Pan a877aed45f AI-based CFN Traffic Control and Computer Force Scheduling
Change-Id: I16cd7730c1e0732253ac52f51010f6b813295aa7
2023-11-03 00:09:19 -07:00

124 lines
5.6 KiB
Python

"""
Author: Weisen Pan
Date: 2023-10-24
"""
import pandas as pd
import numpy as np
from tqdm import tqdm
def find_pos(out_degree_matrix, num_nodes):
stage = np.zeros(num_nodes)
signal = True
while signal:
temp = stage.copy()
for m in range(num_nodes):
for n in range(num_nodes):
if out_degree_matrix[m, n] != 0:
stage[n] = max(stage[n], stage[m] + 1)
if (temp == stage).all():
signal = False
return stage
def create_position(pos, num_feat):
pe = np.array([[[posit / (10000.0 ** (i // 2 * 2.0 / num_feat)) for i in range(num_feat)] for posit in posi] for posi in pos])
pe[:, :, 0::2] = np.sin(pe[:, :, 0::2])
pe[:, :, 1::2] = np.cos(pe[:, :, 1::2])
return pe
def create_attn_mask(tensor, num_heads, num_nodes):
mask = np.zeros(((tensor.shape[0] * num_heads), tensor.shape[1], tensor.shape[2]))
for x in range(0, mask.shape[0], num_heads):
mask[x:x + num_heads] = tensor[x // num_heads] + np.eye(num_nodes)
return mask.reshape(tensor.shape[0], num_heads, tensor.shape[1], tensor.shape[2])
def prepare_data_exp1_dag(pred_task, pred_tgt, pred_mode):
direct = './datasets_exp1/%s%s/%s' % (pred_task, pred_tgt, pred_mode)
df_train = pd.read_csv(direct + '/train.csv')
df_val = pd.read_csv(direct + '/val.csv')
df_test = pd.read_csv(direct + '/test.csv')
dag_direct = './datasets_exp1/%s%s/' % (pred_task, pred_tgt)
df_dag_train = pd.read_csv(dag_direct + '/train_daginfo.csv')
df_dag_val = pd.read_csv(dag_direct + '/val_daginfo.csv')
df_dag_test = pd.read_csv(dag_direct + '/test_daginfo.csv')
return df_train, df_val, df_test, df_dag_train, df_dag_val, df_dag_test
def prepare_data_exp1(pred_task, pred_tgt, pred_mode):
direct = './datasets_exp1/%s%s/%s' % (pred_task, pred_tgt, pred_mode)
df_train = pd.read_csv(direct + '/train.csv')
df_val = pd.read_csv(direct + '/val.csv')
df_test = pd.read_csv(direct + '/test.csv')
return df_train, df_val, df_test
def prepare_data_exp23_dag(split):
direct = './datasets_exp2_3/%s' % split
df_train = pd.read_csv(direct + '/train.csv')
df_val = pd.read_csv(direct + '/val.csv')
df_test = pd.read_csv(direct + '/test.csv')
df_dag_train = pd.read_csv(direct + '/train_daginfo.csv')
df_dag_val = pd.read_csv(direct + '/val_daginfo.csv')
df_dag_test = pd.read_csv(direct + '/test_daginfo.csv')
return df_train, df_val, df_test, df_dag_train, df_dag_val, df_dag_test
def prepare_data_exp23(split):
direct = './datasets_exp2_3/%s' % split
df_train = pd.read_csv(direct + '/train.csv')
df_val = pd.read_csv(direct + '/val.csv')
df_test = pd.read_csv(direct + '/test.csv')
return df_train, df_val, df_test
def preprocess_data_exp1_dag(pred_task, pred_tgt, pred_mode, num_feat=34, num_task=7, num_head=8):
df_train, df_val, df_test, df_dag_train, df_dag_val, df_dag_test = prepare_data_exp1_dag(pred_task, pred_tgt, pred_mode)
arr1 = np.array(df_train.iloc[:, :-1])
arr2 = np.array(df_val.iloc[:, :-1])
arr3 = np.array(df_test.iloc[:, :-1])
data = np.vstack((arr1.reshape(arr1.shape[0] * num_task, num_feat),
arr2.reshape(arr2.shape[0] * num_task, num_feat),
arr3.reshape(arr3.shape[0] * num_task, num_feat)))
data = (data - data.min(0)) / (data.max(0) - data.min(0) + 1e-9)
data1 = data[:arr1.shape[0] * num_task, :]
data2 = data[arr1.shape[0] * num_task:arr1.shape[0] * num_task + arr2.shape[0] * num_task, :]
data3 = data[arr1.shape[0] * num_task + arr2.shape[0] * num_task:, :]
data1 = data1.reshape(-1, num_task, num_feat)
data2 = data2.reshape(-1, num_task, num_feat)
data3 = data3.reshape(-1, num_task, num_feat)
dag1 = df_dag_train.to_numpy().reshape(-1, num_task, num_task * 2 + 1)
dag2 = df_dag_val.to_numpy().reshape(-1, num_task, num_task * 2 + 1)
dag3 = df_dag_test.to_numpy().reshape(-1, num_task, num_task * 2 + 1)
dag1 = dag1[:, :, 1:]
dag2 = dag2[:, :, 1:]
dag3 = dag3[:, :, 1:]
dagout1 = dag1[:, :, :7]
dagout2 = dag2[:, :, :7]
dagout3 = dag3[:, :, :7]
dagin1 = dag1[:, :, 7:]
dagin2 = dag2[:, :, 7:]
dagin3 = dag3[:, :, 7:]
mask1 = dagin1 + dagout1
mask2 = dagin2 + dagout2
mask3 = dagin3 + dagout3
pos1 = np.zeros((arr1.shape[0], 7))
for x in range(pos1.shape[0]):
pos1[x] = find_pos(dagout1[x], 7)
pos2 = np.zeros((arr2.shape[0], 7))
for x in range(pos2.shape[0]):
pos2[x] = find_pos(dagout2[x], 7)
pos3 = np.zeros((arr3.shape[0], 7))
for x in range(pos3.shape[0]):
pos3[x] = find_pos(dagout3[x], 7)
position1 = create_position(pos1, num_feat)
position2 = create_position(pos2, num_feat)
position3 = create_position(pos3, num_feat)
mask_train = create_attn_mask(mask1, num_heads=num_head, num_nodes=num_task)
mask_val = create_attn_mask(mask2, num_heads=num_head, num_nodes=num_task)
mask_test = create_attn_mask(mask3, num_heads=num_head, num_nodes=num_task)
data1 = np.array(data1, dtype=np.float32)
data2 = np.array(data2, dtype=np.float32)
data3 = np.array(data3, dtype=np.float32)
position1 = np.array(position1, dtype=np.float32)
position2 = np.array(position2, dtype=np.float32)
position3 = np.array(position3, dtype=np.float32)
train_data = [(data1[x], df_train.iloc[x, -1], position1[x], mask_train[x]) for x in range(data1.shape[0])]
val_data = [(data2[x], df_val.iloc[x, -1], position2[x], mask_val[x]) for x in range(data2.shape[0])]
test_data = [(data3[x], df_test.iloc[x, -1], position3[x], mask_test[x]) for x in range(data3.shape[0])]
return train_data, val_data, test_data