Skip to content

Commit 39c3a21

Browse files
committed
Added - ProblemSet6/DNA
1 parent 933e95b commit 39c3a21

File tree

1 file changed

+92
-0
lines changed
  • Week 6 - Python/ProblemSet6/DNA

1 file changed

+92
-0
lines changed
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
"""
2+
https://cs50.harvard.edu/x/2023/psets/6/dna/
3+
"""
4+
5+
import csv
6+
import sys
7+
8+
def main():
9+
10+
database = []
11+
results = {}
12+
13+
# Check for command-line usage.
14+
if len(sys.argv) != 3:
15+
print("Usage: python dna.py DATABASE SEQUENCE")
16+
17+
# Read database file into a variable.
18+
with open(sys.argv[1]) as file:
19+
reader = csv.reader(file)
20+
21+
for row in reader:
22+
database.append(row)
23+
24+
# Read DNA sequence file into a variable.
25+
with open(sys.argv[2]) as file:
26+
sequence = file.read()
27+
28+
# Find longest match of each STR in DNA sequence.
29+
# Get the STRs from the CSV file by iterating through the row starting from 1 so we omit 'name'.
30+
for i in range(1, len(database[0])):
31+
# Each iteration save the STR in a variable.
32+
subsequence = "".join(x for x in database[0][i])
33+
34+
# In the 'results' dictionary, save each STR along with the length of the longest run.
35+
results[subsequence] = longest_match(sequence, subsequence)
36+
37+
# Convert the results of the STRs to a list containing only the values.
38+
sequence_dna = list(results.values())
39+
40+
# Check database for matching profiles by iterating through the CSV file and getting the STRs for each person.
41+
for person in database[1:][0:]:
42+
# Save the STR values to a list.
43+
person_dna = [int(i) for i in person[1:]]
44+
45+
# Compare the two lists and print the name of the person if there's a match.
46+
if person_dna == sequence_dna:
47+
sys.exit(person[0])
48+
49+
# Print 'No match' if there wasn't a match.
50+
print("No match")
51+
52+
53+
def longest_match(sequence, subsequence):
54+
"""Returns length of longest run of subsequence in sequence."""
55+
56+
# Initialize variables
57+
longest_run = 0
58+
subsequence_length = len(subsequence)
59+
sequence_length = len(sequence)
60+
61+
# Check each character in sequence for most consecutive runs of subsequence.
62+
for i in range(sequence_length):
63+
64+
# Initialize count of consecutive runs.
65+
count = 0
66+
67+
# Check for a subsequence match in a "substring" (a subset of characters) within sequence.
68+
# If a match, move substring to next potential match in sequence.
69+
# Continue moving substring and checking for matches until out of consecutive matches.
70+
while True:
71+
72+
# Adjust substring start and end
73+
start = i + count * subsequence_length
74+
end = start + subsequence_length
75+
76+
# If there is a match in the substring.
77+
if sequence[start:end] == subsequence:
78+
count += 1
79+
80+
# If there is no match in the substring.
81+
else:
82+
break
83+
84+
# Update most consecutive matches found.
85+
longest_run = max(longest_run, count)
86+
87+
# After checking for runs at each character in seqeuence, return longest run found.
88+
return longest_run
89+
90+
91+
if __name__ == "__main__":
92+
main()

0 commit comments

Comments
 (0)