deanmalmgren · August 29, 2015 14:03
diff --git a/.gitignore b/.gitignore
 sample_a.dat
 sample_b.dat
diff --git a/README.md b/README.md
diff --git a/compare_random_a_b.py b/compare_random_a_b.py
 #!/usr/bin/env python

 """Given two files that contain a list of data values, compute the
 differences in the sample means with a bootstrapping approach
 """

 import random

 def read_data(filename):
    with open(filename) as stream:
        return map(float, stream.read().split())

 m = 10000

 a_data = read_data('sample_a.dat')
 b_data = read_data('sample_b.dat')


 # calculate how frequently a random a is greater than a random b
 count = 0
 for j in xrange(m):
    a = random.choice(a_data)
    b = random.choice(b_data)
    if a > b:
        count += 1

 p_a_gt_b = float(count) / m
 print("p(a > b) = %(p_a_gt_b)f" % locals())
diff --git a/create_sample.py b/create_sample.py
 #!/usr/bin/env python

 """I don't currently have access to the movie or horse racing data, so
 use this script to create the group a and group b data sets that are
 used in subsequent steps.
 """

 import sys
 import random

 def write_sample(stream, n, lam):
    filename = stream.name
    sys.stderr.write((
        '%(filename)s with %(n)d values from exponential distribution'
        'with lam=%(lam)s\n'
    ) % locals())
    for i in xrange(n):
        x = random.expovariate(lam)
        stream.write(str(x) + '\n')

 with open('sample_a.dat', 'w') as stream:
    write_sample(stream, 100000, 1.5)

 with open('sample_b.dat', 'w') as stream:
    write_sample(stream, 20000, 1.1)
	#!/usr/bin/env python

	"""Given two files that contain a list of data values, compute the
	differences in the sample means with a bootstrapping approach
	"""

	import random

	def read_data(filename):
	with open(filename) as stream:
	return map(float, stream.read().split())

	m = 10000

	a_data = read_data('sample_a.dat')
	b_data = read_data('sample_b.dat')


	# calculate how frequently a random a is greater than a random b
	count = 0
	for j in xrange(m):
	a = random.choice(a_data)
	b = random.choice(b_data)
	if a > b:
	count += 1

	p_a_gt_b = float(count) / m
	print("p(a > b) = %(p_a_gt_b)f" % locals())
	#!/usr/bin/env python

	"""I don't currently have access to the movie or horse racing data, so
	use this script to create the group a and group b data sets that are
	used in subsequent steps.
	"""

	import sys
	import random

	def write_sample(stream, n, lam):
	filename = stream.name
	sys.stderr.write((
	'%(filename)s with %(n)d values from exponential distribution'
	'with lam=%(lam)s\n'
	) % locals())
	for i in xrange(n):
	x = random.expovariate(lam)
	stream.write(str(x) + '\n')

	with open('sample_a.dat', 'w') as stream:
	write_sample(stream, 100000, 1.5)

	with open('sample_b.dat', 'w') as stream:
	write_sample(stream, 20000, 1.1)