b75d85315c
Synergy depends on the python tabulate package. However, it is not available as a system package for both CentOS 7 and Ubuntu 14.04. In this commit we opt for the following solution: don't mark tabulate as a dependency when building synergy system packages, instead we provide the tabulate package as part of Synergy. DocImpact Change-Id: I76018e1ba27de8ad47b59c85baad2c2d06f03398 Sem-Ver: bugfix
1088 lines
38 KiB
Python
1088 lines
38 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
"""Pretty-print tabular data."""
|
|
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
from collections import namedtuple
|
|
from platform import python_version_tuple
|
|
import re
|
|
|
|
|
|
if python_version_tuple()[0] < "3":
|
|
from itertools import izip_longest
|
|
from functools import partial
|
|
_none_type = type(None)
|
|
_int_type = int
|
|
_long_type = long
|
|
_float_type = float
|
|
_text_type = unicode
|
|
_binary_type = str
|
|
|
|
def _is_file(f):
|
|
return isinstance(f, file)
|
|
|
|
else:
|
|
from itertools import zip_longest as izip_longest
|
|
from functools import reduce, partial
|
|
_none_type = type(None)
|
|
_int_type = int
|
|
_long_type = int
|
|
_float_type = float
|
|
_text_type = str
|
|
_binary_type = bytes
|
|
|
|
import io
|
|
def _is_file(f):
|
|
return isinstance(f, io.IOBase)
|
|
|
|
|
|
__all__ = ["tabulate", "tabulate_formats", "simple_separated_format"]
|
|
__version__ = "0.7.5"
|
|
|
|
|
|
MIN_PADDING = 2
|
|
|
|
|
|
Line = namedtuple("Line", ["begin", "hline", "sep", "end"])
|
|
|
|
|
|
DataRow = namedtuple("DataRow", ["begin", "sep", "end"])
|
|
|
|
|
|
# A table structure is suppposed to be:
|
|
#
|
|
# --- lineabove ---------
|
|
# headerrow
|
|
# --- linebelowheader ---
|
|
# datarow
|
|
# --- linebewteenrows ---
|
|
# ... (more datarows) ...
|
|
# --- linebewteenrows ---
|
|
# last datarow
|
|
# --- linebelow ---------
|
|
#
|
|
# TableFormat's line* elements can be
|
|
#
|
|
# - either None, if the element is not used,
|
|
# - or a Line tuple,
|
|
# - or a function: [col_widths], [col_alignments] -> string.
|
|
#
|
|
# TableFormat's *row elements can be
|
|
#
|
|
# - either None, if the element is not used,
|
|
# - or a DataRow tuple,
|
|
# - or a function: [cell_values], [col_widths], [col_alignments] -> string.
|
|
#
|
|
# padding (an integer) is the amount of white space around data values.
|
|
#
|
|
# with_header_hide:
|
|
#
|
|
# - either None, to display all table elements unconditionally,
|
|
# - or a list of elements not to be displayed if the table has column headers.
|
|
#
|
|
TableFormat = namedtuple("TableFormat", ["lineabove", "linebelowheader",
|
|
"linebetweenrows", "linebelow",
|
|
"headerrow", "datarow",
|
|
"padding", "with_header_hide"])
|
|
|
|
|
|
def _pipe_segment_with_colons(align, colwidth):
|
|
"""Return a segment of a horizontal line with optional colons which
|
|
indicate column's alignment (as in `pipe` output format)."""
|
|
w = colwidth
|
|
if align in ["right", "decimal"]:
|
|
return ('-' * (w - 1)) + ":"
|
|
elif align == "center":
|
|
return ":" + ('-' * (w - 2)) + ":"
|
|
elif align == "left":
|
|
return ":" + ('-' * (w - 1))
|
|
else:
|
|
return '-' * w
|
|
|
|
|
|
def _pipe_line_with_colons(colwidths, colaligns):
|
|
"""Return a horizontal line with optional colons to indicate column's
|
|
alignment (as in `pipe` output format)."""
|
|
segments = [_pipe_segment_with_colons(a, w) for a, w in zip(colaligns, colwidths)]
|
|
return "|" + "|".join(segments) + "|"
|
|
|
|
|
|
def _mediawiki_row_with_attrs(separator, cell_values, colwidths, colaligns):
|
|
alignment = { "left": '',
|
|
"right": 'align="right"| ',
|
|
"center": 'align="center"| ',
|
|
"decimal": 'align="right"| ' }
|
|
# hard-coded padding _around_ align attribute and value together
|
|
# rather than padding parameter which affects only the value
|
|
values_with_attrs = [' ' + alignment.get(a, '') + c + ' '
|
|
for c, a in zip(cell_values, colaligns)]
|
|
colsep = separator*2
|
|
return (separator + colsep.join(values_with_attrs)).rstrip()
|
|
|
|
|
|
def _html_row_with_attrs(celltag, cell_values, colwidths, colaligns):
|
|
alignment = { "left": '',
|
|
"right": ' style="text-align: right;"',
|
|
"center": ' style="text-align: center;"',
|
|
"decimal": ' style="text-align: right;"' }
|
|
values_with_attrs = ["<{0}{1}>{2}</{0}>".format(celltag, alignment.get(a, ''), c)
|
|
for c, a in zip(cell_values, colaligns)]
|
|
return "<tr>" + "".join(values_with_attrs).rstrip() + "</tr>"
|
|
|
|
|
|
def _latex_line_begin_tabular(colwidths, colaligns, booktabs=False):
|
|
alignment = { "left": "l", "right": "r", "center": "c", "decimal": "r" }
|
|
tabular_columns_fmt = "".join([alignment.get(a, "l") for a in colaligns])
|
|
return "\n".join(["\\begin{tabular}{" + tabular_columns_fmt + "}",
|
|
"\\toprule" if booktabs else "\hline"])
|
|
|
|
LATEX_ESCAPE_RULES = {r"&": r"\&", r"%": r"\%", r"$": r"\$", r"#": r"\#",
|
|
r"_": r"\_", r"^": r"\^{}", r"{": r"\{", r"}": r"\}",
|
|
r"~": r"\textasciitilde{}", "\\": r"\textbackslash{}",
|
|
r"<": r"\ensuremath{<}", r">": r"\ensuremath{>}"}
|
|
|
|
|
|
def _latex_row(cell_values, colwidths, colaligns):
|
|
def escape_char(c):
|
|
return LATEX_ESCAPE_RULES.get(c, c)
|
|
escaped_values = ["".join(map(escape_char, cell)) for cell in cell_values]
|
|
rowfmt = DataRow("", "&", "\\\\")
|
|
return _build_simple_row(escaped_values, rowfmt)
|
|
|
|
|
|
_table_formats = {"simple":
|
|
TableFormat(lineabove=Line("", "-", " ", ""),
|
|
linebelowheader=Line("", "-", " ", ""),
|
|
linebetweenrows=None,
|
|
linebelow=Line("", "-", " ", ""),
|
|
headerrow=DataRow("", " ", ""),
|
|
datarow=DataRow("", " ", ""),
|
|
padding=0,
|
|
with_header_hide=["lineabove", "linebelow"]),
|
|
"plain":
|
|
TableFormat(lineabove=None, linebelowheader=None,
|
|
linebetweenrows=None, linebelow=None,
|
|
headerrow=DataRow("", " ", ""),
|
|
datarow=DataRow("", " ", ""),
|
|
padding=0, with_header_hide=None),
|
|
"grid":
|
|
TableFormat(lineabove=Line("+", "-", "+", "+"),
|
|
linebelowheader=Line("+", "=", "+", "+"),
|
|
linebetweenrows=Line("+", "-", "+", "+"),
|
|
linebelow=Line("+", "-", "+", "+"),
|
|
headerrow=DataRow("|", "|", "|"),
|
|
datarow=DataRow("|", "|", "|"),
|
|
padding=1, with_header_hide=None),
|
|
"fancy_grid":
|
|
TableFormat(lineabove=Line("╒", "═", "╤", "╕"),
|
|
linebelowheader=Line("╞", "═", "╪", "╡"),
|
|
linebetweenrows=Line("├", "─", "┼", "┤"),
|
|
linebelow=Line("╘", "═", "╧", "╛"),
|
|
headerrow=DataRow("│", "│", "│"),
|
|
datarow=DataRow("│", "│", "│"),
|
|
padding=1, with_header_hide=None),
|
|
"pipe":
|
|
TableFormat(lineabove=_pipe_line_with_colons,
|
|
linebelowheader=_pipe_line_with_colons,
|
|
linebetweenrows=None,
|
|
linebelow=None,
|
|
headerrow=DataRow("|", "|", "|"),
|
|
datarow=DataRow("|", "|", "|"),
|
|
padding=1,
|
|
with_header_hide=["lineabove"]),
|
|
"orgtbl":
|
|
TableFormat(lineabove=None,
|
|
linebelowheader=Line("|", "-", "+", "|"),
|
|
linebetweenrows=None,
|
|
linebelow=None,
|
|
headerrow=DataRow("|", "|", "|"),
|
|
datarow=DataRow("|", "|", "|"),
|
|
padding=1, with_header_hide=None),
|
|
"psql":
|
|
TableFormat(lineabove=Line("+", "-", "+", "+"),
|
|
linebelowheader=Line("|", "-", "+", "|"),
|
|
linebetweenrows=None,
|
|
linebelow=Line("+", "-", "+", "+"),
|
|
headerrow=DataRow("|", "|", "|"),
|
|
datarow=DataRow("|", "|", "|"),
|
|
padding=1, with_header_hide=None),
|
|
"rst":
|
|
TableFormat(lineabove=Line("", "=", " ", ""),
|
|
linebelowheader=Line("", "=", " ", ""),
|
|
linebetweenrows=None,
|
|
linebelow=Line("", "=", " ", ""),
|
|
headerrow=DataRow("", " ", ""),
|
|
datarow=DataRow("", " ", ""),
|
|
padding=0, with_header_hide=None),
|
|
"mediawiki":
|
|
TableFormat(lineabove=Line("{| class=\"wikitable\" style=\"text-align: left;\"",
|
|
"", "", "\n|+ <!-- caption -->\n|-"),
|
|
linebelowheader=Line("|-", "", "", ""),
|
|
linebetweenrows=Line("|-", "", "", ""),
|
|
linebelow=Line("|}", "", "", ""),
|
|
headerrow=partial(_mediawiki_row_with_attrs, "!"),
|
|
datarow=partial(_mediawiki_row_with_attrs, "|"),
|
|
padding=0, with_header_hide=None),
|
|
"html":
|
|
TableFormat(lineabove=Line("<table>", "", "", ""),
|
|
linebelowheader=None,
|
|
linebetweenrows=None,
|
|
linebelow=Line("</table>", "", "", ""),
|
|
headerrow=partial(_html_row_with_attrs, "th"),
|
|
datarow=partial(_html_row_with_attrs, "td"),
|
|
padding=0, with_header_hide=None),
|
|
"latex":
|
|
TableFormat(lineabove=_latex_line_begin_tabular,
|
|
linebelowheader=Line("\\hline", "", "", ""),
|
|
linebetweenrows=None,
|
|
linebelow=Line("\\hline\n\\end{tabular}", "", "", ""),
|
|
headerrow=_latex_row,
|
|
datarow=_latex_row,
|
|
padding=1, with_header_hide=None),
|
|
"latex_booktabs":
|
|
TableFormat(lineabove=partial(_latex_line_begin_tabular, booktabs=True),
|
|
linebelowheader=Line("\\midrule", "", "", ""),
|
|
linebetweenrows=None,
|
|
linebelow=Line("\\bottomrule\n\\end{tabular}", "", "", ""),
|
|
headerrow=_latex_row,
|
|
datarow=_latex_row,
|
|
padding=1, with_header_hide=None),
|
|
"tsv":
|
|
TableFormat(lineabove=None, linebelowheader=None,
|
|
linebetweenrows=None, linebelow=None,
|
|
headerrow=DataRow("", "\t", ""),
|
|
datarow=DataRow("", "\t", ""),
|
|
padding=0, with_header_hide=None)}
|
|
|
|
|
|
tabulate_formats = list(sorted(_table_formats.keys()))
|
|
|
|
|
|
_invisible_codes = re.compile(r"\x1b\[\d*m|\x1b\[\d*\;\d*\;\d*m") # ANSI color codes
|
|
_invisible_codes_bytes = re.compile(b"\x1b\[\d*m|\x1b\[\d*\;\d*\;\d*m") # ANSI color codes
|
|
|
|
|
|
def simple_separated_format(separator):
|
|
"""Construct a simple TableFormat with columns separated by a separator.
|
|
|
|
>>> tsv = simple_separated_format("\\t") ; \
|
|
tabulate([["foo", 1], ["spam", 23]], tablefmt=tsv) == 'foo \\t 1\\nspam\\t23'
|
|
True
|
|
|
|
"""
|
|
return TableFormat(None, None, None, None,
|
|
headerrow=DataRow('', separator, ''),
|
|
datarow=DataRow('', separator, ''),
|
|
padding=0, with_header_hide=None)
|
|
|
|
|
|
def _isconvertible(conv, string):
|
|
try:
|
|
n = conv(string)
|
|
return True
|
|
except (ValueError, TypeError):
|
|
return False
|
|
|
|
|
|
def _isnumber(string):
|
|
"""
|
|
>>> _isnumber("123.45")
|
|
True
|
|
>>> _isnumber("123")
|
|
True
|
|
>>> _isnumber("spam")
|
|
False
|
|
"""
|
|
return _isconvertible(float, string)
|
|
|
|
|
|
def _isint(string, inttype=int):
|
|
"""
|
|
>>> _isint("123")
|
|
True
|
|
>>> _isint("123.45")
|
|
False
|
|
"""
|
|
return type(string) is inttype or\
|
|
(isinstance(string, _binary_type) or isinstance(string, _text_type))\
|
|
and\
|
|
_isconvertible(inttype, string)
|
|
|
|
|
|
def _type(string, has_invisible=True):
|
|
"""The least generic type (type(None), int, float, str, unicode).
|
|
|
|
>>> _type(None) is type(None)
|
|
True
|
|
>>> _type("foo") is type("")
|
|
True
|
|
>>> _type("1") is type(1)
|
|
True
|
|
>>> _type('\x1b[31m42\x1b[0m') is type(42)
|
|
True
|
|
>>> _type('\x1b[31m42\x1b[0m') is type(42)
|
|
True
|
|
|
|
"""
|
|
|
|
if has_invisible and \
|
|
(isinstance(string, _text_type) or isinstance(string, _binary_type)):
|
|
string = _strip_invisible(string)
|
|
|
|
if string is None:
|
|
return _none_type
|
|
elif hasattr(string, "isoformat"): # datetime.datetime, date, and time
|
|
return _text_type
|
|
elif _isint(string):
|
|
return int
|
|
elif _isint(string, _long_type):
|
|
return _long_type
|
|
elif _isnumber(string):
|
|
return float
|
|
elif isinstance(string, _binary_type):
|
|
return _binary_type
|
|
else:
|
|
return _text_type
|
|
|
|
|
|
def _afterpoint(string):
|
|
"""Symbols after a decimal point, -1 if the string lacks the decimal point.
|
|
|
|
>>> _afterpoint("123.45")
|
|
2
|
|
>>> _afterpoint("1001")
|
|
-1
|
|
>>> _afterpoint("eggs")
|
|
-1
|
|
>>> _afterpoint("123e45")
|
|
2
|
|
|
|
"""
|
|
if _isnumber(string):
|
|
if _isint(string):
|
|
return -1
|
|
else:
|
|
pos = string.rfind(".")
|
|
pos = string.lower().rfind("e") if pos < 0 else pos
|
|
if pos >= 0:
|
|
return len(string) - pos - 1
|
|
else:
|
|
return -1 # no point
|
|
else:
|
|
return -1 # not a number
|
|
|
|
|
|
def _padleft(width, s, has_invisible=True):
|
|
"""Flush right.
|
|
|
|
>>> _padleft(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430'
|
|
True
|
|
|
|
"""
|
|
iwidth = width + len(s) - len(_strip_invisible(s)) if has_invisible else width
|
|
fmt = "{0:>%ds}" % iwidth
|
|
return fmt.format(s)
|
|
|
|
|
|
def _padright(width, s, has_invisible=True):
|
|
"""Flush left.
|
|
|
|
>>> _padright(6, '\u044f\u0439\u0446\u0430') == '\u044f\u0439\u0446\u0430 '
|
|
True
|
|
|
|
"""
|
|
iwidth = width + len(s) - len(_strip_invisible(s)) if has_invisible else width
|
|
fmt = "{0:<%ds}" % iwidth
|
|
return fmt.format(s)
|
|
|
|
|
|
def _padboth(width, s, has_invisible=True):
|
|
"""Center string.
|
|
|
|
>>> _padboth(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430 '
|
|
True
|
|
|
|
"""
|
|
iwidth = width + len(s) - len(_strip_invisible(s)) if has_invisible else width
|
|
fmt = "{0:^%ds}" % iwidth
|
|
return fmt.format(s)
|
|
|
|
|
|
def _strip_invisible(s):
|
|
"Remove invisible ANSI color codes."
|
|
if isinstance(s, _text_type):
|
|
return re.sub(_invisible_codes, "", s)
|
|
else: # a bytestring
|
|
return re.sub(_invisible_codes_bytes, "", s)
|
|
|
|
|
|
def _visible_width(s):
|
|
"""Visible width of a printed string. ANSI color codes are removed.
|
|
|
|
>>> _visible_width('\x1b[31mhello\x1b[0m'), _visible_width("world")
|
|
(5, 5)
|
|
|
|
"""
|
|
if isinstance(s, _text_type) or isinstance(s, _binary_type):
|
|
return len(_strip_invisible(s))
|
|
else:
|
|
return len(_text_type(s))
|
|
|
|
|
|
def _align_column(strings, alignment, minwidth=0, has_invisible=True):
|
|
"""[string] -> [padded_string]
|
|
|
|
>>> list(map(str,_align_column(["12.345", "-1234.5", "1.23", "1234.5", "1e+234", "1.0e234"], "decimal")))
|
|
[' 12.345 ', '-1234.5 ', ' 1.23 ', ' 1234.5 ', ' 1e+234 ', ' 1.0e234']
|
|
|
|
>>> list(map(str,_align_column(['123.4', '56.7890'], None)))
|
|
['123.4', '56.7890']
|
|
|
|
"""
|
|
if alignment == "right":
|
|
strings = [s.strip() for s in strings]
|
|
padfn = _padleft
|
|
elif alignment == "center":
|
|
strings = [s.strip() for s in strings]
|
|
padfn = _padboth
|
|
elif alignment == "decimal":
|
|
if has_invisible:
|
|
decimals = [_afterpoint(_strip_invisible(s)) for s in strings]
|
|
else:
|
|
decimals = [_afterpoint(s) for s in strings]
|
|
maxdecimals = max(decimals)
|
|
strings = [s + (maxdecimals - decs) * " "
|
|
for s, decs in zip(strings, decimals)]
|
|
padfn = _padleft
|
|
elif not alignment:
|
|
return strings
|
|
else:
|
|
strings = [s.strip() for s in strings]
|
|
padfn = _padright
|
|
|
|
if has_invisible:
|
|
width_fn = _visible_width
|
|
else:
|
|
width_fn = len
|
|
|
|
maxwidth = max(max(map(width_fn, strings)), minwidth)
|
|
padded_strings = [padfn(maxwidth, s, has_invisible) for s in strings]
|
|
return padded_strings
|
|
|
|
|
|
def _more_generic(type1, type2):
|
|
types = { _none_type: 0, int: 1, float: 2, _binary_type: 3, _text_type: 4 }
|
|
invtypes = { 4: _text_type, 3: _binary_type, 2: float, 1: int, 0: _none_type }
|
|
moregeneric = max(types.get(type1, 4), types.get(type2, 4))
|
|
return invtypes[moregeneric]
|
|
|
|
|
|
def _column_type(strings, has_invisible=True):
|
|
"""The least generic type all column values are convertible to.
|
|
|
|
>>> _column_type(["1", "2"]) is _int_type
|
|
True
|
|
>>> _column_type(["1", "2.3"]) is _float_type
|
|
True
|
|
>>> _column_type(["1", "2.3", "four"]) is _text_type
|
|
True
|
|
>>> _column_type(["four", '\u043f\u044f\u0442\u044c']) is _text_type
|
|
True
|
|
>>> _column_type([None, "brux"]) is _text_type
|
|
True
|
|
>>> _column_type([1, 2, None]) is _int_type
|
|
True
|
|
>>> import datetime as dt
|
|
>>> _column_type([dt.datetime(1991,2,19), dt.time(17,35)]) is _text_type
|
|
True
|
|
|
|
"""
|
|
types = [_type(s, has_invisible) for s in strings ]
|
|
return reduce(_more_generic, types, int)
|
|
|
|
|
|
def _format(val, valtype, floatfmt, missingval="", has_invisible=True):
|
|
"""Format a value accoding to its type.
|
|
|
|
Unicode is supported:
|
|
|
|
>>> hrow = ['\u0431\u0443\u043a\u0432\u0430', '\u0446\u0438\u0444\u0440\u0430'] ; \
|
|
tbl = [['\u0430\u0437', 2], ['\u0431\u0443\u043a\u0438', 4]] ; \
|
|
good_result = '\\u0431\\u0443\\u043a\\u0432\\u0430 \\u0446\\u0438\\u0444\\u0440\\u0430\\n------- -------\\n\\u0430\\u0437 2\\n\\u0431\\u0443\\u043a\\u0438 4' ; \
|
|
tabulate(tbl, headers=hrow) == good_result
|
|
True
|
|
|
|
"""
|
|
if val is None:
|
|
return missingval
|
|
|
|
if valtype in [int, _long_type, _text_type]:
|
|
return "{0}".format(val)
|
|
elif valtype is _binary_type:
|
|
try:
|
|
return _text_type(val, "ascii")
|
|
except TypeError:
|
|
return _text_type(val)
|
|
elif valtype is float:
|
|
is_a_colored_number = has_invisible and isinstance(val, (_text_type, _binary_type))
|
|
if is_a_colored_number:
|
|
raw_val = _strip_invisible(val)
|
|
formatted_val = format(float(raw_val), floatfmt)
|
|
return val.replace(raw_val, formatted_val)
|
|
else:
|
|
return format(float(val), floatfmt)
|
|
else:
|
|
return "{0}".format(val)
|
|
|
|
|
|
def _align_header(header, alignment, width):
|
|
if alignment == "left":
|
|
return _padright(width, header)
|
|
elif alignment == "center":
|
|
return _padboth(width, header)
|
|
elif not alignment:
|
|
return "{0}".format(header)
|
|
else:
|
|
return _padleft(width, header)
|
|
|
|
|
|
def _normalize_tabular_data(tabular_data, headers):
|
|
"""Transform a supported data type to a list of lists, and a list of headers.
|
|
|
|
Supported tabular data types:
|
|
|
|
* list-of-lists or another iterable of iterables
|
|
|
|
* list of named tuples (usually used with headers="keys")
|
|
|
|
* list of dicts (usually used with headers="keys")
|
|
|
|
* list of OrderedDicts (usually used with headers="keys")
|
|
|
|
* 2D NumPy arrays
|
|
|
|
* NumPy record arrays (usually used with headers="keys")
|
|
|
|
* dict of iterables (usually used with headers="keys")
|
|
|
|
* pandas.DataFrame (usually used with headers="keys")
|
|
|
|
The first row can be used as headers if headers="firstrow",
|
|
column indices can be used as headers if headers="keys".
|
|
|
|
"""
|
|
|
|
if hasattr(tabular_data, "keys") and hasattr(tabular_data, "values"):
|
|
# dict-like and pandas.DataFrame?
|
|
if hasattr(tabular_data.values, "__call__"):
|
|
# likely a conventional dict
|
|
keys = tabular_data.keys()
|
|
rows = list(izip_longest(*tabular_data.values())) # columns have to be transposed
|
|
elif hasattr(tabular_data, "index"):
|
|
# values is a property, has .index => it's likely a pandas.DataFrame (pandas 0.11.0)
|
|
keys = tabular_data.keys()
|
|
vals = tabular_data.values # values matrix doesn't need to be transposed
|
|
names = tabular_data.index
|
|
rows = [[v]+list(row) for v,row in zip(names, vals)]
|
|
else:
|
|
raise ValueError("tabular data doesn't appear to be a dict or a DataFrame")
|
|
|
|
if headers == "keys":
|
|
headers = list(map(_text_type,keys)) # headers should be strings
|
|
|
|
else: # it's a usual an iterable of iterables, or a NumPy array
|
|
rows = list(tabular_data)
|
|
|
|
if (headers == "keys" and
|
|
hasattr(tabular_data, "dtype") and
|
|
getattr(tabular_data.dtype, "names")):
|
|
# numpy record array
|
|
headers = tabular_data.dtype.names
|
|
elif (headers == "keys"
|
|
and len(rows) > 0
|
|
and isinstance(rows[0], tuple)
|
|
and hasattr(rows[0], "_fields")):
|
|
# namedtuple
|
|
headers = list(map(_text_type, rows[0]._fields))
|
|
elif (len(rows) > 0
|
|
and isinstance(rows[0], dict)):
|
|
# dict or OrderedDict
|
|
uniq_keys = set() # implements hashed lookup
|
|
keys = [] # storage for set
|
|
if headers == "firstrow":
|
|
firstdict = rows[0] if len(rows) > 0 else {}
|
|
keys.extend(firstdict.keys())
|
|
uniq_keys.update(keys)
|
|
rows = rows[1:]
|
|
for row in rows:
|
|
for k in row.keys():
|
|
#Save unique items in input order
|
|
if k not in uniq_keys:
|
|
keys.append(k)
|
|
uniq_keys.add(k)
|
|
if headers == 'keys':
|
|
headers = keys
|
|
elif isinstance(headers, dict):
|
|
# a dict of headers for a list of dicts
|
|
headers = [headers.get(k, k) for k in keys]
|
|
headers = list(map(_text_type, headers))
|
|
elif headers == "firstrow":
|
|
if len(rows) > 0:
|
|
headers = [firstdict.get(k, k) for k in keys]
|
|
headers = list(map(_text_type, headers))
|
|
else:
|
|
headers = []
|
|
elif headers:
|
|
raise ValueError('headers for a list of dicts is not a dict or a keyword')
|
|
rows = [[row.get(k) for k in keys] for row in rows]
|
|
elif headers == "keys" and len(rows) > 0:
|
|
# keys are column indices
|
|
headers = list(map(_text_type, range(len(rows[0]))))
|
|
|
|
# take headers from the first row if necessary
|
|
if headers == "firstrow" and len(rows) > 0:
|
|
headers = list(map(_text_type, rows[0])) # headers should be strings
|
|
rows = rows[1:]
|
|
|
|
headers = list(map(_text_type,headers))
|
|
rows = list(map(list,rows))
|
|
|
|
# pad with empty headers for initial columns if necessary
|
|
if headers and len(rows) > 0:
|
|
nhs = len(headers)
|
|
ncols = len(rows[0])
|
|
if nhs < ncols:
|
|
headers = [""]*(ncols - nhs) + headers
|
|
|
|
return rows, headers
|
|
|
|
|
|
def tabulate(tabular_data, headers=(), tablefmt="simple",
|
|
floatfmt="g", numalign="decimal", stralign="left",
|
|
missingval=""):
|
|
"""Format a fixed width table for pretty printing.
|
|
|
|
>>> print(tabulate([[1, 2.34], [-56, "8.999"], ["2", "10001"]]))
|
|
--- ---------
|
|
1 2.34
|
|
-56 8.999
|
|
2 10001
|
|
--- ---------
|
|
|
|
The first required argument (`tabular_data`) can be a
|
|
list-of-lists (or another iterable of iterables), a list of named
|
|
tuples, a dictionary of iterables, an iterable of dictionaries,
|
|
a two-dimensional NumPy array, NumPy record array, or a Pandas'
|
|
dataframe.
|
|
|
|
|
|
Table headers
|
|
-------------
|
|
|
|
To print nice column headers, supply the second argument (`headers`):
|
|
|
|
- `headers` can be an explicit list of column headers
|
|
- if `headers="firstrow"`, then the first row of data is used
|
|
- if `headers="keys"`, then dictionary keys or column indices are used
|
|
|
|
Otherwise a headerless table is produced.
|
|
|
|
If the number of headers is less than the number of columns, they
|
|
are supposed to be names of the last columns. This is consistent
|
|
with the plain-text format of R and Pandas' dataframes.
|
|
|
|
>>> print(tabulate([["sex","age"],["Alice","F",24],["Bob","M",19]],
|
|
... headers="firstrow"))
|
|
sex age
|
|
----- ----- -----
|
|
Alice F 24
|
|
Bob M 19
|
|
|
|
|
|
Column alignment
|
|
----------------
|
|
|
|
`tabulate` tries to detect column types automatically, and aligns
|
|
the values properly. By default it aligns decimal points of the
|
|
numbers (or flushes integer numbers to the right), and flushes
|
|
everything else to the left. Possible column alignments
|
|
(`numalign`, `stralign`) are: "right", "center", "left", "decimal"
|
|
(only for `numalign`), and None (to disable alignment).
|
|
|
|
|
|
Table formats
|
|
-------------
|
|
|
|
`floatfmt` is a format specification used for columns which
|
|
contain numeric data with a decimal point.
|
|
|
|
`None` values are replaced with a `missingval` string:
|
|
|
|
>>> print(tabulate([["spam", 1, None],
|
|
... ["eggs", 42, 3.14],
|
|
... ["other", None, 2.7]], missingval="?"))
|
|
----- -- ----
|
|
spam 1 ?
|
|
eggs 42 3.14
|
|
other ? 2.7
|
|
----- -- ----
|
|
|
|
Various plain-text table formats (`tablefmt`) are supported:
|
|
'plain', 'simple', 'grid', 'pipe', 'orgtbl', 'rst', 'mediawiki',
|
|
'latex', and 'latex_booktabs'. Variable `tabulate_formats` contains the list of
|
|
currently supported formats.
|
|
|
|
"plain" format doesn't use any pseudographics to draw tables,
|
|
it separates columns with a double space:
|
|
|
|
>>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
|
|
... ["strings", "numbers"], "plain"))
|
|
strings numbers
|
|
spam 41.9999
|
|
eggs 451
|
|
|
|
>>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="plain"))
|
|
spam 41.9999
|
|
eggs 451
|
|
|
|
"simple" format is like Pandoc simple_tables:
|
|
|
|
>>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
|
|
... ["strings", "numbers"], "simple"))
|
|
strings numbers
|
|
--------- ---------
|
|
spam 41.9999
|
|
eggs 451
|
|
|
|
>>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="simple"))
|
|
---- --------
|
|
spam 41.9999
|
|
eggs 451
|
|
---- --------
|
|
|
|
"grid" is similar to tables produced by Emacs table.el package or
|
|
Pandoc grid_tables:
|
|
|
|
>>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
|
|
... ["strings", "numbers"], "grid"))
|
|
+-----------+-----------+
|
|
| strings | numbers |
|
|
+===========+===========+
|
|
| spam | 41.9999 |
|
|
+-----------+-----------+
|
|
| eggs | 451 |
|
|
+-----------+-----------+
|
|
|
|
>>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="grid"))
|
|
+------+----------+
|
|
| spam | 41.9999 |
|
|
+------+----------+
|
|
| eggs | 451 |
|
|
+------+----------+
|
|
|
|
"fancy_grid" draws a grid using box-drawing characters:
|
|
|
|
>>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
|
|
... ["strings", "numbers"], "fancy_grid"))
|
|
╒═══════════╤═══════════╕
|
|
│ strings │ numbers │
|
|
╞═══════════╪═══════════╡
|
|
│ spam │ 41.9999 │
|
|
├───────────┼───────────┤
|
|
│ eggs │ 451 │
|
|
╘═══════════╧═══════════╛
|
|
|
|
"pipe" is like tables in PHP Markdown Extra extension or Pandoc
|
|
pipe_tables:
|
|
|
|
>>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
|
|
... ["strings", "numbers"], "pipe"))
|
|
| strings | numbers |
|
|
|:----------|----------:|
|
|
| spam | 41.9999 |
|
|
| eggs | 451 |
|
|
|
|
>>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="pipe"))
|
|
|:-----|---------:|
|
|
| spam | 41.9999 |
|
|
| eggs | 451 |
|
|
|
|
"orgtbl" is like tables in Emacs org-mode and orgtbl-mode. They
|
|
are slightly different from "pipe" format by not using colons to
|
|
define column alignment, and using a "+" sign to indicate line
|
|
intersections:
|
|
|
|
>>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
|
|
... ["strings", "numbers"], "orgtbl"))
|
|
| strings | numbers |
|
|
|-----------+-----------|
|
|
| spam | 41.9999 |
|
|
| eggs | 451 |
|
|
|
|
|
|
>>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="orgtbl"))
|
|
| spam | 41.9999 |
|
|
| eggs | 451 |
|
|
|
|
"rst" is like a simple table format from reStructuredText; please
|
|
note that reStructuredText accepts also "grid" tables:
|
|
|
|
>>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
|
|
... ["strings", "numbers"], "rst"))
|
|
========= =========
|
|
strings numbers
|
|
========= =========
|
|
spam 41.9999
|
|
eggs 451
|
|
========= =========
|
|
|
|
>>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="rst"))
|
|
==== ========
|
|
spam 41.9999
|
|
eggs 451
|
|
==== ========
|
|
|
|
"mediawiki" produces a table markup used in Wikipedia and on other
|
|
MediaWiki-based sites:
|
|
|
|
>>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]],
|
|
... headers="firstrow", tablefmt="mediawiki"))
|
|
{| class="wikitable" style="text-align: left;"
|
|
|+ <!-- caption -->
|
|
|-
|
|
! strings !! align="right"| numbers
|
|
|-
|
|
| spam || align="right"| 41.9999
|
|
|-
|
|
| eggs || align="right"| 451
|
|
|}
|
|
|
|
"html" produces HTML markup:
|
|
|
|
>>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]],
|
|
... headers="firstrow", tablefmt="html"))
|
|
<table>
|
|
<tr><th>strings </th><th style="text-align: right;"> numbers</th></tr>
|
|
<tr><td>spam </td><td style="text-align: right;"> 41.9999</td></tr>
|
|
<tr><td>eggs </td><td style="text-align: right;"> 451 </td></tr>
|
|
</table>
|
|
|
|
"latex" produces a tabular environment of LaTeX document markup:
|
|
|
|
>>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex"))
|
|
\\begin{tabular}{lr}
|
|
\\hline
|
|
spam & 41.9999 \\\\
|
|
eggs & 451 \\\\
|
|
\\hline
|
|
\\end{tabular}
|
|
|
|
"latex_booktabs" produces a tabular environment of LaTeX document markup
|
|
using the booktabs.sty package:
|
|
|
|
>>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex_booktabs"))
|
|
\\begin{tabular}{lr}
|
|
\\toprule
|
|
spam & 41.9999 \\\\
|
|
eggs & 451 \\\\
|
|
\\bottomrule
|
|
\end{tabular}
|
|
"""
|
|
if tabular_data is None:
|
|
tabular_data = []
|
|
list_of_lists, headers = _normalize_tabular_data(tabular_data, headers)
|
|
|
|
# optimization: look for ANSI control codes once,
|
|
# enable smart width functions only if a control code is found
|
|
plain_text = '\n'.join(['\t'.join(map(_text_type, headers))] + \
|
|
['\t'.join(map(_text_type, row)) for row in list_of_lists])
|
|
has_invisible = re.search(_invisible_codes, plain_text)
|
|
if has_invisible:
|
|
width_fn = _visible_width
|
|
else:
|
|
width_fn = len
|
|
|
|
# format rows and columns, convert numeric values to strings
|
|
cols = list(zip(*list_of_lists))
|
|
coltypes = list(map(_column_type, cols))
|
|
cols = [[_format(v, ct, floatfmt, missingval, has_invisible) for v in c]
|
|
for c,ct in zip(cols, coltypes)]
|
|
|
|
# align columns
|
|
aligns = [numalign if ct in [int,float] else stralign for ct in coltypes]
|
|
minwidths = [width_fn(h) + MIN_PADDING for h in headers] if headers else [0]*len(cols)
|
|
cols = [_align_column(c, a, minw, has_invisible)
|
|
for c, a, minw in zip(cols, aligns, minwidths)]
|
|
|
|
if headers:
|
|
# align headers and add headers
|
|
t_cols = cols or [['']] * len(headers)
|
|
t_aligns = aligns or [stralign] * len(headers)
|
|
minwidths = [max(minw, width_fn(c[0])) for minw, c in zip(minwidths, t_cols)]
|
|
headers = [_align_header(h, a, minw)
|
|
for h, a, minw in zip(headers, t_aligns, minwidths)]
|
|
rows = list(zip(*cols))
|
|
else:
|
|
minwidths = [width_fn(c[0]) for c in cols]
|
|
rows = list(zip(*cols))
|
|
|
|
if not isinstance(tablefmt, TableFormat):
|
|
tablefmt = _table_formats.get(tablefmt, _table_formats["simple"])
|
|
|
|
return _format_table(tablefmt, headers, rows, minwidths, aligns)
|
|
|
|
|
|
def _build_simple_row(padded_cells, rowfmt):
|
|
"Format row according to DataRow format without padding."
|
|
begin, sep, end = rowfmt
|
|
return (begin + sep.join(padded_cells) + end).rstrip()
|
|
|
|
|
|
def _build_row(padded_cells, colwidths, colaligns, rowfmt):
|
|
"Return a string which represents a row of data cells."
|
|
if not rowfmt:
|
|
return None
|
|
if hasattr(rowfmt, "__call__"):
|
|
return rowfmt(padded_cells, colwidths, colaligns)
|
|
else:
|
|
return _build_simple_row(padded_cells, rowfmt)
|
|
|
|
|
|
def _build_line(colwidths, colaligns, linefmt):
|
|
"Return a string which represents a horizontal line."
|
|
if not linefmt:
|
|
return None
|
|
if hasattr(linefmt, "__call__"):
|
|
return linefmt(colwidths, colaligns)
|
|
else:
|
|
begin, fill, sep, end = linefmt
|
|
cells = [fill*w for w in colwidths]
|
|
return _build_simple_row(cells, (begin, sep, end))
|
|
|
|
|
|
def _pad_row(cells, padding):
|
|
if cells:
|
|
pad = " "*padding
|
|
padded_cells = [pad + cell + pad for cell in cells]
|
|
return padded_cells
|
|
else:
|
|
return cells
|
|
|
|
|
|
def _format_table(fmt, headers, rows, colwidths, colaligns):
|
|
"""Produce a plain-text representation of the table."""
|
|
lines = []
|
|
hidden = fmt.with_header_hide if (headers and fmt.with_header_hide) else []
|
|
pad = fmt.padding
|
|
headerrow = fmt.headerrow
|
|
|
|
padded_widths = [(w + 2*pad) for w in colwidths]
|
|
padded_headers = _pad_row(headers, pad)
|
|
padded_rows = [_pad_row(row, pad) for row in rows]
|
|
|
|
if fmt.lineabove and "lineabove" not in hidden:
|
|
lines.append(_build_line(padded_widths, colaligns, fmt.lineabove))
|
|
|
|
if padded_headers:
|
|
lines.append(_build_row(padded_headers, padded_widths, colaligns, headerrow))
|
|
if fmt.linebelowheader and "linebelowheader" not in hidden:
|
|
lines.append(_build_line(padded_widths, colaligns, fmt.linebelowheader))
|
|
|
|
if padded_rows and fmt.linebetweenrows and "linebetweenrows" not in hidden:
|
|
# initial rows with a line below
|
|
for row in padded_rows[:-1]:
|
|
lines.append(_build_row(row, padded_widths, colaligns, fmt.datarow))
|
|
lines.append(_build_line(padded_widths, colaligns, fmt.linebetweenrows))
|
|
# the last row without a line below
|
|
lines.append(_build_row(padded_rows[-1], padded_widths, colaligns, fmt.datarow))
|
|
else:
|
|
for row in padded_rows:
|
|
lines.append(_build_row(row, padded_widths, colaligns, fmt.datarow))
|
|
|
|
if fmt.linebelow and "linebelow" not in hidden:
|
|
lines.append(_build_line(padded_widths, colaligns, fmt.linebelow))
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def _main():
|
|
"""\
|
|
Usage: tabulate [options] [FILE ...]
|
|
|
|
Pretty-print tabular data.
|
|
See also https://bitbucket.org/astanin/python-tabulate
|
|
|
|
FILE a filename of the file with tabular data;
|
|
if "-" or missing, read data from stdin.
|
|
|
|
Options:
|
|
|
|
-h, --help show this message
|
|
-1, --header use the first row of data as a table header
|
|
-o FILE, --output FILE print table to FILE (default: stdout)
|
|
-s REGEXP, --sep REGEXP use a custom column separator (default: whitespace)
|
|
-F FPFMT, --float FPFMT floating point number format (default: g)
|
|
-f FMT, --format FMT set output table format; supported formats:
|
|
plain, simple, grid, fancy_grid, pipe, orgtbl,
|
|
rst, mediawiki, html, latex, latex_booktabs, tsv
|
|
(default: simple)
|
|
"""
|
|
import getopt
|
|
import sys
|
|
import textwrap
|
|
usage = textwrap.dedent(_main.__doc__)
|
|
try:
|
|
opts, args = getopt.getopt(sys.argv[1:],
|
|
"h1o:s:F:f:",
|
|
["help", "header", "output", "sep=", "float=", "format="])
|
|
except getopt.GetoptError as e:
|
|
print(e)
|
|
print(usage)
|
|
sys.exit(2)
|
|
headers = []
|
|
floatfmt = "g"
|
|
tablefmt = "simple"
|
|
sep = r"\s+"
|
|
outfile = "-"
|
|
for opt, value in opts:
|
|
if opt in ["-1", "--header"]:
|
|
headers = "firstrow"
|
|
elif opt in ["-o", "--output"]:
|
|
outfile = value
|
|
elif opt in ["-F", "--float"]:
|
|
floatfmt = value
|
|
elif opt in ["-f", "--format"]:
|
|
if value not in tabulate_formats:
|
|
print("%s is not a supported table format" % value)
|
|
print(usage)
|
|
sys.exit(3)
|
|
tablefmt = value
|
|
elif opt in ["-s", "--sep"]:
|
|
sep = value
|
|
elif opt in ["-h", "--help"]:
|
|
print(usage)
|
|
sys.exit(0)
|
|
files = [sys.stdin] if not args else args
|
|
with (sys.stdout if outfile == "-" else open(outfile, "w")) as out:
|
|
for f in files:
|
|
if f == "-":
|
|
f = sys.stdin
|
|
if _is_file(f):
|
|
_pprint_file(f, headers=headers, tablefmt=tablefmt,
|
|
sep=sep, floatfmt=floatfmt, file=out)
|
|
else:
|
|
with open(f) as fobj:
|
|
_pprint_file(fobj, headers=headers, tablefmt=tablefmt,
|
|
sep=sep, floatfmt=floatfmt, file=out)
|
|
|
|
|
|
def _pprint_file(fobject, headers, tablefmt, sep, floatfmt, file):
|
|
rows = fobject.readlines()
|
|
table = [re.split(sep, r.rstrip()) for r in rows]
|
|
print(tabulate(table, headers, tablefmt, floatfmt=floatfmt), file=file)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
_main()
|