input = """CCCGC ACCCA TATAC GAGGC TGTAA""" sequences = ("AT","TAC","CG") creatures = """Limaçon de Folfanga 5 3 2 Porc Taudanzain vert 6 2 5""" input = """CGTCGGCGCACCCGAAATCGGAGGTTCGCTAGCGAGGTTCTCACAGTCAGAACTTTTCTTCTTATGGGTAGTATGATCAG AAGGCAACTAGGTCTATTCTCGTATGCTCCCATTCATAAATTGGATTATAATACAAACTACGCGAGCATGGGATGACTAT GAGATCGAGTCTGTGAAAGTTAAGGGCGGTTAAGACTACAACGGTTATAGGTGCAATATCGTCAAGGCGAAGCCTCGTTA TTTGTTCTCCGATCGTCTTGTGGTCTACTAGCAATGTAAACCCCGATCACGCAACGGGTCCTACGCCCCTACGCTGGACG ATGATTAAATTCACCGAATGTTTAACCACGCTGTTGAAGGCACATCGTATGAACTGTGGACAAGGGCTAGTTGCCCGGGG GGTACATTCGTAGGCCATATGCCGACAAAAATGCTCATAACAAACACAGCGCGCTGGGTAGAGATCTGGAGTACATTAAT GTGAGTACTTTGATGGGAGCGACTCACGACTATACGTGTCGCGTCGTGATCGGAAACTACTCAGCGGTCTATCTATTTGC GTAGTAAAATCAAGGGGCAAAAACTCTAATAGTTGAACAAGGTGCGGATCCACATATTCTAGCTCATCCCATGTATCGTA TAAAGCAAACACACCCGGACTGCTTCGATCAGTAATACCGACAACCGCATGCTGTCTAACAAACCCGATAGTAACGGAAT GTCGCTCCCAAGATGTATAGAACTAAGCTGCACCACAGGTCTGATACATTTTTTGCCGGCGATAAGAAATTACCGGCAGT TGGCGAATTTAGTGTCCGGGGTAGCATAAAGTGGGAATGTCGCTAGATTGAATGTACGCAACTAATTCGGGAGGTAGATG CATATGTGGGAGGCGGGTCAGCAGCTGCAATTGGTAAGTCCCGTTTTACAGGACTGTACGAACACGTCCTACCATGATCG TCGTATCTACTGCGGAGAGCGTTGATTTTGGGGCGTACGAGACGAATCCCCACCAAAGTAGGAGATGTCGAGCTCGCATG GAGAGGATCGTGATTACCGTAGCATTGGCTGTTAGGAGCGTTGAAAGGTTGTTTATTGCATTTAAGCGGTAAGCCATCGA GCGAGTGGATACGTCGGAGGCGTATGCCGCACGCCTATAGAGGGGCAAGGGTGACCATTCAAAGTACTCTAGTTGTTATG TTCTATCGATATCATCGACTGCATCTGTGTACGGGGACGCGGCACTACCTAGGGGTTTCGAGGACTTGTGCCGCTATCCG TGCGGGCCGAATCATTGCGATATTGCTGCATTGTATTATGGCACGCACTCGTTAACCATGACCCACGCGATTAGTGTGGG GTTGATGCGACGAAAAGAGCTATAGACTAGTTCGAGAAGCGCAAAGATTATGCCGTTCACGCAGCCAAGAAACGTTCCGG AGTTTCCTGACGTCGTCACTAGGAAGCCCTATTGGGACACCGGCTCTGTTCCTCGCACCTCATAGGAGTCCGCGCAACTT TCCGCACTAGCACCCTTAATTCGTCACCGTTCCGCTACCTGTTCGAATCTGGAGACCTATCTGGTGGAACGCGGTAGGTG GAGTTTTATTCCGGCGTTCCCGAGTCGCTCCGTGGATTAATTGTCTGCCTGTATTATGTCTATAGCCTGGCCCTGAAAGC GGCTACCTTAGGCTCCTGCTATAAATCACTGTGATTTTAAACCCCTCGGGCAATTTTACTGTACAGCTAACGGGATCCTT TCCGTAATCTAGCCGTAGTACTCAAGTGAAGTGCTAAGGTATTTGAGTTTCTGGTCGAAGGCGCTTTGGCACTACAATTG CTCTAACGAACCGGACTGTCATGAGCTTCCCCGGACCCTTGACGATCATTCTTGGGGCTGGGTGGTTAGACAGCCTTATC GGGCTTGACCCTCTGACCATTAGTAGATATACTCTGACCTTGAGGATATACGTTGTCCCGCACCTCAAAGGGGGATATTG GAAATCCCATCCCTTCCAATAAAACGAGCGCACGCCCGTATATAGTACCAACCTTACCAGTCACATATGATGTTTATGGG ATTAACGCAGGTGACCGCAGGCAAAATAAGGAAATGCACGATTGGTTTGAAGCCGCGATAAGTCGCTGACAGTATAGTGG TCGTTACTTACACTTCTCGACTCGTAGAAAGGCCAAGGCCTCTCTACGAAGTAAAGCAGACTCGAACATGGAAACTCACG AGGATCCAGGCGAAGCTGATGTTGACTACAAACGCTGCCGAATGGCTAGTTATGTCTGTGAGCCTACTACTGCTGGAGTG GACGCCTAATTTGAGTTGGCGTTAAGGCCGTATGGGCAGATAGGGGGGAGAGGCCTACGACGGGTAAGTGATGAAGGTCG CATCGGTGCGCTTGGACATAGCCCAAGCCGCGAGGATTAGCGTGGTTACTAACGCGATACAGTTACTTTAGTGAAGTTTG TAGGATTGTGAGCTTTCCTCAAGGGGCCGATCCGTAAATTAAGCGGAATATGGAGATTCACTCGTATGTGGAATGATGCG CCAGGTCGACGTATGTTGATTTAATGTACTATCGGGATAACTGGACGCATACCGCCGACTCAGTGCGGTTGTGCCTTGGG CGTCGCCCCCCTGCTTCTGTAGGGTCATGACGCTCCGACATCTGCTGGGCTCTCTAACCGAGACGGGCTCTATGCAAGGT TTCCGTAAAGGGGGAGTGAAGGTATATACCGAAGCTAATTGAAACCAGGAGGACGACGGTAAAATCCCAGCCAGGACTCT GGACGCTATTGGGTCTAGGCAAGCATACCGAGAACGCCCGATAGGTTACTCACGTTGTGACGATACGATTGCGGTGGAAC GTACCAATCTTAAAGGCGTACTGTGGTTAAAGCCTGTGACGGTATCATGGCACAATTTGTCAAGCTGCGCAACACGTCTG AGGGACAGCCGCCTTCCTGAACCACGTCGTGCCTACGCTGTACGCGCTGGGGTACGGTCACTAGGCTCGGCACGGTAAAT GTCGAGCGCAATACGCGGAACGGCAGGATCAACCATTACCGTGTAGCTGATACTCTGTGTGTTATCTCTACATTCTGATG TTTCATTTTAGAGCTTAGGTGCATCCATCTTTGAACCCAGCTAACCGGCGGTGTCCGTTCACCCTAATGGCGCTGAACCT CCGGTCCGGGTTTATCAAGGAAAAATCCGAACTCGTATGCATCGCGCGTCGTCACAAAGTTCTTTGAGATGCTTCAAGGT TTGATCCTTGGGCGATTTACTGGCTTTACACTATCGACGGAATTATCCCAACCGGACTTATGTATACATTTCTTACACAG CTCGTTTCGGCCAGACCCCGTTTCGTATAGCTAATGTTACGATATTTTCCTATGGGGACGATGACGCAATTAAGGAAAAA GCATGATGCTAGATAGCACCCCCACGTAGGAAACTGTAGGATACAGGTCTGTAGCCGCACAAAGAAACCATCTTACAACT ACAACAGGGGCAGCCTGTCAAGATGGTCAGCACGTTATTACTTTGGCCCGTGCCGCGAGCAATATAAAGTATTAGGTGCG ACATAGTAAGAGCTTTCAGTTGCAGATAGTTGTCGGGGAGCCCGGTGAGTGTAACTCGTAGTGATGGCTCAGTTCGACCC ACATTGGAAACTCGGTTAGTACAAATCACTTGGATTTCGCCTGAGGACCGTACAATGCCTGCCACCTAAACAGTCATGGA GCTCAGGATCACCTATAGGGTGACATGTTGTCAACGCGCCAATGACAGGAGGGGATCTCTGAACTTCGCGCCAATCAGCT TATAAACTACTATCTTCCGATAGCACCATCCAGAGACTTCCGCGTATGAGAGAACAACTGCCATTGTCCCTAAGGGACTG GCCGCATAAGAAGGACCCCATTGTAACTCAACGTAAGTGTTCGACGGCTTGCATTTCTTGATTAGATCGAGCACGTCCCG TCGGGACTGTATTTGTATTGCTCCTAGTAGAACACGCATGGCAGCACTATGTTCTTTCACCCCATCGTTCGGTGATCGGA TTATACCCAACGATATCTATTTGACATTACATTCGTAACCTATAACTTCCTTTGCAGTGCTTTGCCTTTGCGAGTACGAA ACTCGGCACATTTGGCAGCCTACAGACAAACCGACCTGGAGTTTCTCGACAGGCGCTAAACTTGTTTGGTGGCAGTGCGC AGCGCTCCGTACCACCACGCGTCGGATTGATCCGGGCCAATGTCATGGCCGGGGCTGATACTTCGTTAGCGTGTAAGAAC AGGCGCTCCTGAAGTACCTACGATACGTTCACGCATGCAAACGAGCCCTTAGAGCCTGCCCGACGTCCGTTAATATGGCG AGCAGAAGGCATAAATGCTGTTAATCAAGCTTCCTGTACTCCCGCTTATTTACTACTGTGCCAAGTGGAGTTAATGTTGA GACAAGGACTTGCTTGGTGACCCAGCGGGCTACAACATCCTGAACTTGCCCATGGGTCCGGAGTGTTTCTAGGGGACAAG GGTTAACAGTATTACCGTGATATTTTCTAACCACGTCAGCCCAATTTAATTCGAAACATGTCGAGTGTCCACTTATGGGC TACCAAGCTCTAGCGAGTGTAATCTCCGCCTAATGCTCCCCCTAGGAACGTCAGAACGGGACGCGCGTTGCGTACAGAGA GTTCTGAAATCGGCACTATAAGTGATTAACTGGGTACCGGTCGTTCAGCATCAGAAGCGTCCATTAACCGTGAGCGCGGA AGATACTGCAGGCTTCATCTCCTCGAGTAATTTTGATGTATGGCATGAATATATACCGTGCATGTGGTTGCTGGCATGAC CCTGAAAGTCAGAGACTAGTTCCCAGGCCTCGTCAGCTCCTCCCGAGCGCTCACGTCACGTTGATTAATGTCTGCTTGGG TGGTCTGCCCGTAGCCAGAACTTGGTCGGTATTCCGCACGCACTAAGGCGTGTCATAGAAGCGTCCCTGCTCACCGCGGC GCAAGGTGAGCGTACATCTTGCCAGGACACAGGTACGGAGAATCCTATCGGGAAGGAGGCCATAACAGTGAGTACAGGTT CTTAACCCAAACATTCCCTTGGCGGACTACAGAGTAAGACCGGCAACAGCTAAACGAGATTGCCTAGACTTAACATACTC AGTGATAGTACTCTCCAGGATTTGCCCCGTTAGCAAGTTCGACAAGCTGCGAGCGGATCTCATAGATATCACGACGGGCT TCTCAATCGTCGTCTAACCAAAGCTACCTATTCGTTCCCCGGGCGACTAGGTTTCTGGGGCCCGGTATGGTAAATTTGTA TTCGTTATGGCTTTGGGGAGTAAGCACCTAAAAACTGACCTCAGGCCTGTAAATATTCGTAACCCAACACCGCTGGTCTG TGAGGCGAAAGGTCGCCGGCGCTGCTTGTCGAAGTGGTAACTTGGCTTCTTGCATGACCCATGGATATTTCATAAAGGGA AGGCTCGACACAATCGCACATCTGACAGAGTAGACTGATTGGTCCGTCGCGTTGTGAGAATTGGTCCAGTAAGCCCAAGT CTTATACGTCCGTAGTTGATGCGGTGGAACCCATTGAGAGACGAGAATCCCTGGTGATCAAGGCAAGGTGTACCACATCA AGTAATCCCGTAAAGGTCCTTGCCCTACAGTGAATTTGCCAGCCAGTCTCGGGATTACCATATAACCTCCGTATCCGATC TTATGGAAACTGGGAGAAGGTAGACCGTGGTAACTACCTAGAAGTCGCAGTCACGGAGTACCCCGTGACATCCATATGGG ACTTAGGCCGCACTATAACGGCCAGCCATTCAAAGGAGTATTTAGTCACCCGCATCTGATCGGACGTAGTGTCTTCAGCG CTACGCCTTGTTCGCAAACATTGCTAAGGTGTCCTACTCTCCCCGGATAACCAAAGGAGAGGCCAGATAAGCTTCATCTG CCGCAAGAACAAGCGCGAACTCAGTCCCGCTGGTTATGGGGTCACTTGCTCTTCACTCCCCCTCCAGGGGGGAACTCGTA TTGGCCGATCGGGATTGGAGTAGAGGTGATTCAAGGTAAACCGACATTTCGTGCGACTCTATGGAGTATACAGAAATTAC TGGACTCGGAGAGGCCCTAGCGACCCGAACCGATATAGCAAGGATTAAGGCTTTCAGCAGTTACGTTTCCCATTCGATAC GCGACTCGATGGCCGAGTAAAGACACAGAGAATTCCGGGTAAACTTAATCTGCATGTTGATCCAATCTACTCTCCGACGT CCTCGAGAAGTACTGGAAGGCTCCCTGAGATAAACACACGCAACTTGTCCTCACTAAATGTGCAACAACTGTGAAAAAAG""" sequences = ("AGTCA","TGCA","AAGCT","AGTC","AGTGG") creatures = """Andorien 31 100 36 97 20 Bétazoïde 36 102 32 91 21 Kazon 38 106 35 90 29 Cardassien 36 107 32 97 24 Tellarite 30 102 33 98 27 Ocampa 31 108 31 91 25 Trill 32 103 32 91 25 Rémien 35 100 36 97 26 Ferengi 31 108 38 97 22 Denobulan 37 109 36 92 25 Anticanien 33 109 36 98 28 Ligonien 39 100 30 91 22 Xindis 36 108 38 93 21 Talaxien 36 108 39 90 22 Suliban 33 101 33 99 21 Vorta 36 104 35 97 29 Klingon 38 100 32 90 27 Dreman 30 109 34 92 27 Benzite 33 104 37 90 27 Breen 34 108 37 93 20""" #reformat input table = list() lines = input.splitlines() for line in lines: order = len(line) table.append(line) table.append(line[::-1]) for i in range(order): s = "" for line in lines: s = s + line[i] table.append(s) table.append(s[::-1]) sequences = {k:0 for k in sequences} creatures_dict = dict() for cretature in creatures.splitlines(): creat = list(cretature.split("\t")) creature_name = creat[0] creatures_dict[creature_name] = dict() creat.pop(0) for i,seq in enumerate(sequences): creatures_dict[creature_name][seq] = int(creat[i].strip()) print(creatures_dict) print("*******") for seq in sequences: for item in table: offset = 0 while True: pos = item[offset:].find(seq) if pos==-1: break sequences[seq] += 1 offset += pos + 1 print(sequences) print("*******") creature_ressemble = "Rognogno" distance_ressemble = 999 for creature in creatures_dict: distance = 0 for seq in creatures_dict[creature]: distance = distance + abs(creatures_dict[creature][seq]-sequences[seq]) print(creature, distance) if distance < distance_ressemble: distance_ressemble = distance creature_ressemble = creature print("*******") print("{}:{}".format(distance_ressemble, creature_ressemble))