-
Notifications
You must be signed in to change notification settings - Fork 95
Expand file tree
/
Copy pathscatterplot.py
More file actions
100 lines (83 loc) · 2.67 KB
/
scatterplot.py
File metadata and controls
100 lines (83 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python
# Greg Von Kuster
# flake8: noqa
from __future__ import print_function
import sys
from numpy import array
import rpy2.rpy_classic as rpy
from rpy2.robjects.numpy2ri import numpy2ri
rpy.set_default_mode(rpy.NO_CONVERSION)
r = rpy.r
def stop_err(msg):
sys.stderr.write(msg)
sys.exit()
def main():
in_fname = sys.argv[1]
out_fname = sys.argv[2]
try:
columns = int(sys.argv[3]) - 1, int(sys.argv[4]) - 1
except Exception:
stop_err(
"Columns not specified, your query does not contain a column of numerical data."
)
title = sys.argv[5]
xlab = sys.argv[6]
ylab = sys.argv[7]
matrix = []
skipped_lines = 0
first_invalid_line = 0
invalid_value = ""
invalid_column = 0
i = 0
for i, line in enumerate(open(in_fname)):
valid = True
line = line.rstrip("\r\n")
if line and not line.startswith("#"):
row = []
fields = line.split("\t")
for column in columns:
try:
val = fields[column]
if val.lower() == "na":
row.append(float("nan"))
else:
row.append(float(fields[column]))
except Exception:
valid = False
skipped_lines += 1
if not first_invalid_line:
first_invalid_line = i + 1
try:
invalid_value = fields[column]
except Exception:
invalid_value = ""
invalid_column = column + 1
break
else:
valid = False
skipped_lines += 1
if not first_invalid_line:
first_invalid_line = i + 1
if valid:
matrix.append(row)
if skipped_lines < i:
try:
a = numpy2ri(array(matrix))
r.pdf(out_fname, 8, 8)
r.plot(a, type="p", main=title, xlab=xlab, ylab=ylab, col="blue", pch=19)
r.dev_off()
except Exception as exc:
stop_err("%s" % str(exc))
else:
stop_err(
"All values in both columns %s and %s are non-numeric or empty."
% (sys.argv[3], sys.argv[4])
)
print("Scatter plot on columns %s, %s. " % (sys.argv[3], sys.argv[4]))
if skipped_lines > 0:
print(
"Skipped %d lines starting with line #%d, value '%s' in column %d is not numeric."
% (skipped_lines, first_invalid_line, invalid_value, invalid_column)
)
if __name__ == "__main__":
main()