Python Basic Notes
Basic Types
Tuples
tuples in python are immutable
Control Statement
For Loop
for i in range(3): # goes from i = 0 to i = 2
x += 1
for i in [0, 1, 2]:
x += 1
While Loop
while x > 0:
x -= 1
Functions
def join_name(first_name, last_name):
joined_name = first_name + " " + last_name
return joined_name
Classes
class Pet(object):
def __init__(self, species, color, name):
self.species = species
self.color = color
self.name = name
def __str__(self):
return "{0} {1} named {2}.".format(self.color, self.species, self.name)
def change_name(self, new_name):
self.name = new_name
my_dog = Pet(species="dog", color="orange", name="Guises")
print(my_dog)
print(my_dog.name)
# => output:
# orange dog named Guises.
# Guises
CLI Application
Basic CLI
import click
from caesar_encryption import encrypt
@click.command()
@click.option(
'--input_file',
type=click.File('r'),
help='File in which there is the text you want to encrypt/decrypt.'
'If not provided, a prompt will allow you to type the input text.',
)
@click.option(
'--output_file',
type=click.File('w'),
help='File in which the encrypted / decrypted text will be written.'
'If not provided, the output text will just be printed.',
)
@click.option(
'--decrypt/--encrypt',
'-d/-e',
help='Whether you want to encrypt the input text or decrypt it.'
)
@click.option(
'--key',
'-k',
default=1,
help='The numeric key to use for the caesar encryption / decryption.'
)
def caesar(input_file, output_file, decrypt, key):
if input_file:
text = input_file.read()
else:
text = click.prompt('Enter a text', hide_input=not decrypt)
if decrypt:
key = -key
cypherText = encrypt(text, key)
if output_file:
output_file.write(cypherText)
else:
click.echo(cypherText)
if __name__ == '__main__':
caesar()
Progress Bar
import click
import enchant
from tqdm import tqdm
from caesar_encryption import encrypt
@click.command()
@click.option(
'--input_file',
type=click.File('r'),
required=True,
)
@click.option(
'--output_file',
type=click.File('w'),
required=True,
)
def caesar_breaker(input_file, output_file):
cypherText = input_file.read()
english_dictionary = enchant.Dict("en_US")
best_number_of_english_words = 0
for key in tqdm(range(26)):
plaintext = encrypt(cypherText, -key)
number_of_english_words = 0
for word in plaintext.split(' '):
if word and english_dictionary.check(word):
number_of_english_words += 1
if number_of_english_words > best_number_of_english_words:
best_number_of_english_words = number_of_english_words
best_plaintext = plaintext
best_key = key
click.echo(f'The most likely encryption key is {best_key}. It gives the
following plaintext:\n\n{best_plaintext[:1000]}...')
output_file.write(best_plaintext)
if __name__ == '__main__':
caesar_breaker()
Data Files
CSV File
import csv
filename = "my_data.csv"
fields = []
rows = []
# Reading csv file
with open(filename, 'r') as csvFile:
# Creating a csv reader object
csvReader = csv.reader(csvFile)
# Extracting field names in the first row
fields = csvReader.next()
# Extracting each data row one by one
for row in csvReader:
rows.append(row)
# Printing out the first 5 rows
for row in rows[:5]:
print(row)
import csv
# Field names
fields = ['Name', 'Goals', 'Assists', 'Shots']
# Rows of data in the csv file
rows = [ ['Emily', '12', '18', '112'],
['Katie', '8', '24', '96'],
['John', '16', '9', '101'],
['Mike', '3', '14', '82']]
filename = "soccer.csv"
# Writing to csv file
with open(filename, 'w+') as csvFile:
# Creating a csv writer object
csvWriter = csv.writer(csvFile)
# Writing the fields
csvWriter.writerow(fields)
# Writing the data rows
csvWriter.writerows(rows)
import pandas as pd
filename = "my_data.csv"
# Read in the data
data = pd.read_csv(filename)
# Print the first 5 rows
print(data.head(5))
# Write the data to file
data.to_csv("new_data.csv", sep=",", index=False)
JSON File
import json
import pandas as pd
# Read the data from file
# We now have a Python dictionary
with open('data.json') as f:
data_listOfDict = json.load(f)
# We can do the same thing with pandas
data_df = pd.read_json('data.json', orient='records')
# We can write a dictionary to JSON like so
# Use 'indent' and 'sort_keys' to make the JSON
# file look nice
with open('new_data.json', 'w+') as json_file:
json.dump(data_listOfDict, json_file, indent=4, sort_keys=True)
# And again the same thing with pandas
export = data_df.to_json('new_data.json', orient='records')
XML File
import xml.etree.ElementTree as ET
import xmltodict
import json
tree = ET.parse('output.xml')
xml_data = tree.getroot()
xmlStr = ET.tostring(xml_data, encoding='utf8', method='xml')
data_dict = dict(xmltodict.parse(xmlStr))
print(data_dict)
with open('new_data_2.json', 'w+') as json_file:
json.dump(data_dict, json_file, indent=4, sort_keys=True)
Plain Text File
import numpy as np
x, y = np.loadtxt('input.dat', delimiter=',', unpack=True)
Converter
import pandas as pd
from dicttoxml import dicttoxml
import json
# Building our dataframe
data = {'Name': ['Emily', 'Katie', 'John', 'Mike'],
'Goals': [12, 8, 16, 3],
'Assists': [18, 24, 9, 14],
'Shots': [112, 96, 101, 82]
}
df = pd.DataFrame(data, columns=data.keys())
# Converting the dataframe to a dictionary
# Then save it to file
data_dict = df.to_dict(orient="records")
with open('output.json', "w+") as f:
json.dump(data_dict, f, indent=4)
# Converting the dataframe to XML
# Then save it to file
xml_data = dicttoxml(data_dict).decode()
with open("output.xml", "w+") as f:
f.write(xml_data)
import json
import pandas as pd
import csv
# Read the data from file
# We now have a Python dictionary
with open('data.json') as f:
data_listOfDict = json.load(f)
# Writing a list of dicts to CSV
keys = data_listOfDict[0].keys()
with open('saved_data.csv', 'wb') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(data_listOfDict)
NumPy
import numpy as np
np.random.seed(seed=1234)
NumPy Array Creation
x = np.array(6)
x.ndim
x.shape
x.size
x.dtype
np.zeros((2, 2))
np.ones((2, 2))
np.eye((2))
np.random.random.((2, 2))
NumPy Indexing
x = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
x[:, 1] # [2, 6, 10]
x[0, :] # [1, 2, 3, 4]
x[:3, 1:3] # [[2, 3], [6, 7], [10, 11]]
x[[0, 1, 2], [0, 2, 1]] # [1, 7, 10]
# Boolean array indexing
x = np.array([[1,2], [3, 4], [5, 6]])
print ("x:\n", x)
print ("x > 2:\n", x > 2)
print ("x[x > 2]:\n", x[x > 2])
# x:
# [[1 2]
# [3 4]
# [5 6]]
# x > 2:
# [[False False]
# [ True True]
# [ True True]]
# x[x > 2]:
# [3 4 5 6]
NumPy Matrix Operations
- math:
x+y
/x-y
/x*y
np.add/subtract/multiply
- dot product:
a.dot(b)
- sum:
np.sum(x)
- column sum:
np.sum(x, axis=0)
- row sum:
np.sum(x, axis=1)
- transposing:
x.T
- reshape:
np.reshape(x, (2, 3))
Matplotlib
pip3 install matplotlib
pip3 install ggplot
import matplotlib.pyplot as plt
fig = plt.figure()
ax = plt.subplot2grid((1, 1), (0, 0))
ax.fill_between(x, y, y[0],where=(y > y[0]), facecolor='g', alpha=0.5)
ax.grid(True)
ax.xaxis.label.set_color('c') ax.yaxis.label.set_color('r') ax.set_yticks([0,25,50,75])
ax.xaxis.get_ticklabels()
ax.spines['left'].set_color('c') ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False) ax.spines['left'].set_linewidth(5)
ax.tick_params(axis='x', colors='#f06215')
ax.axhline(20, color='k', linewidth=5)
ax.text(x, y, 'text')
ax.annotate(
'Bad News!',
xytext=(0.8, 0.9),
textcoords='axes fraction',
arrowprops = dict(facecolor='grey', color='grey')
)
plt.plot(x, y, label='label', color='red', linewidth=5)
plt.bar(x, y, label='label', color='#111')
plt.scatter(x, y, label='label', marker='*', size=100)
plt.stackplot(x, y1, y2, ..., yn, colors=[])
plt.pie(slices, labels=[], colors=[], startangle=90, explode=(), autopct='%1.1f%%')
plt.hist(data, bins=10, histtype='bar', rwidth=0.8)
plt.xlabel('xlabel')
plt.ylabel('ylabel')
plt.title('title')
plt.legend()
plt.subplots_adjust()
plt.show()
Plot Type
bar
plot.line
plot.scatter
plot.pie
plot.stack
plot.histogram
plot.interval
plot.box
plot.KDE
plot.candlestick_ohlc
plot.
Plot Style
from matplotlib import style
print(plt.style.available)
print(plt.__file__)
style.use('ggplot')
style.use('fivethirtyeight')
plt.style.use('mystylesheet.mplrc')
Plot Axis Tick
ax3.xaxis.set_major_formatter(mDates.DateFormatter('%Y-%m-%d'))
ax3.xaxis.set_major_locator(mTicker.MaxNLocator(10))
ax1.yaxis.set_major_locator(mTicker.MaxNLocator(nbins=5, prune='lower'))
for label in ax3.xaxis.get_ticklabels():
label.set_rotation(45)
plt.setp(ax1.get_xticklabels(), visible=False)
plt.setp(ax2.get_xticklabels(), visible=False)
Plot Legend
Up middle legend:
ax.legend()
leg = ax.legend(loc=9, ncol=2,prop={'size':11})
leg.get_frame().set_alpha(0.4)
Subplot
Tall and width for grid template:
fig = plt.figure()
ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(222)
ax3 = fig.add_subplot(212)
ax1 = plt.subplot2grid((6,1), (0,0), rowspan=1, colspan=1)
ax2 = plt.subplot2grid((6,1), (1,0), rowspan=4, colspan=1)
ax3 = plt.subplot2grid((6,1), (5,0), rowspan=1, colspan=1)
Double y-axis:
ax2v = ax2.twinx()
3D Plot
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
fig = plt.figure()
ax1 = fig.add_subplot(111, projection='3d')
x = [1,2,3,4,5,6,7,8,9,10]
y = [5,6,7,8,2,5,6,3,7,2]
z = [1,2,6,3,2,7,3,3,7,2]
x2 = [-1,-2,-3,-4,-5,-6,-7,-8,-9,-10]
y2 = [-5,-6,-7,-8,-2,-5,-6,-3,-7,-2]
z2 = [1,2,6,3,2,7,3,3,7,2]
ax1.scatter(x, y, z, c='g', marker='o')
ax1.scatter(x2, y2, z2, c ='r', marker='o')
ax1.set_xlabel('x axis')
ax1.set_ylabel('y axis')
ax1.set_zlabel('z axis')
plt.show()
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import style
style.use('ggplot')
fig = plt.figure()
ax1 = fig.add_subplot(111, projection='3d')
x3 = [1,2,3,4,5,6,7,8,9,10]
y3 = [5,6,7,8,2,5,6,3,7,2]
z3 = np.zeros(10)
dx = np.ones(10)
dy = np.ones(10)
dz = [1,2,3,4,5,6,7,8,9,10]
ax1.bar3d(x3, y3, z3, dx, dy, dz)
ax1.set_xlabel('x axis')
ax1.set_ylabel('y axis')
ax1.set_zlabel('z axis')
plt.show()
Paper Figures Plot
import matplotlib.pyplot as plt
import matplotlib
def latexify(fig_width=None, fig_height=None, columns=1):
'''Set up matplotlib RC params for LaTeX plotting.
Call this before plotting a figure.
Parameters
----------
fig_width : float, optional, inches
fig_height : float, optional, inches
columns : {1, 2}
'''
# code adapted from http://www.scipy.org/Cookbook/Matplotlib/LaTeX_Examples
# Width and max height in inches for IEEE journals taken from
# computer.org/cms/Computer.org/Journal%20templates/transactions_art_guide.pdf
assert(columns in [1,2])
if fig_width is None:
fig_width = 3.39 if columns==1 else 6.9 # width in inches
if fig_height is None:
golden_mean = (sqrt(5)-1.0)/2.0 # Aesthetic ratio
fig_height = fig_width*golden_mean # height in inches
MAX_HEIGHT_INCHES = 8.0
if fig_height > MAX_HEIGHT_INCHES:
print("WARNING: fig_height too large:" + fig_height +
"so will reduce to" + MAX_HEIGHT_INCHES + "inches.")
fig_height = MAX_HEIGHT_INCHES
params = {'backend': 'ps',
'text.latex.preamble': ['\usepackage{gensymb}'],
'axes.labelsize': 8, # fontsize for x and y labels (was 10)
'axes.titlesize': 8,
'text.fontsize': 8, # was 10
'legend.fontsize': 8, # was 10
'xtick.labelsize': 8,
'ytick.labelsize': 8,
'text.usetex': True,
'figure.figsize': [fig_width,fig_height],
'font.family': 'serif'
}
matplotlib.rcParams.update(params)
def format_axes(ax):
for spine in ['top', 'right']:
ax.spines[spine].set_visible(False)
for spine in ['left', 'bottom']:
ax.spines[spine].set_color(SPINE_COLOR)
ax.spines[spine].set_linewidth(0.5)
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
for axis in [ax.xaxis, ax.yaxis]:
axis.set_tick_params(direction='out', color=SPINE_COLOR)
return ax
latexify()
ax.set_xlabel("X label")
ax.set_ylabel("Y label")
ax.set_title("Title")
plt.tight_layout()
format_axes(ax)
plt.savefig("figure.pdf")
import numpy as np
import matplotlib as mpl
mpl.use('pdf')
import matplotlib.pyplot as plt
plt.rc('font', family='serif', serif='Times')
plt.rc('text', usetex=True)
plt.rc('xtick', labelsize=8)
plt.rc('ytick', labelsize=8)
plt.rc('axes', labelsize=8)
# width as measured in ink scape
width = 3.487
height = width / 1.618
fig, ax = plt.subplots()
fig.subplots_adjust(left=.15, bottom=.16, right=.99, top=.97)
x = np.arange(0.0, 3*np.pi , 0.1)
plt.plot(x, np.sin(x))
ax.set_ylabel('Some Metric (in unit)')
ax.set_xlabel('Something (in unit)')
ax.set_xlim(0, 3*np.pi)
fig.set_size_inches(width, height)
fig.savefig('plot.pdf')