Benutzerspezifische Werkzeuge
Sie sind hier: Startseite Informatik Tutorium WS 2011/12 Lösungen Lösung Dotplot

Lösung Dotplot

15.12.2009, Fabian Schmich

Dotplot.java — Java source code, 5Kb

Dateiinhalt

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;

/**
 * Create a basic dotplot between two sequences.
 * -- An exercise to repeat what we've learned so far:
 * 		+ Conditionals
 * 		+ For and while loops
 * 		+ Constructors
 * 		+ Methods with different signatures
 * 		+ Basic File IO
 * @author Fabian Schmich, Fachschaft Bioinformatik
 * 
 */
public class Dotplot {
	
	private static char hitSymb = '*'; // A symbol to represent a hit in our plot
	
	private String sequence1; // Two input sequences
	private String sequence2;
	private boolean[][] hitMatrix; // Matrix to store true or false value depending on hit or no hit
		
	/**
	 * Constructor using two Strings as input
	 * @param seq1 String for Sequence 1
	 * @param seq2 String for Sequence 2
	 */
	public Dotplot(String seq1, String seq2) {
		this.sequence1 = seq1;
		this.sequence2 = seq2;
		this.hitMatrix = computeHitMatrix(this.sequence1, this.sequence2);
	}
	
	/**
	 * Constructor using two Files as input
	 * @param seq1File File object for Sequence 1
	 * @param seq2File File object for Sequence 2
	 */
	public Dotplot(File seq1File, File seq2File) {
		this(Dotplot.readSequence(seq1File), Dotplot.readSequence(seq2File));
	}
	
	/**
	 * Reads a sequence from file
	 * @param seq File object containing the sequence
	 * @return String holding the parsed sequence
	 */
public static final String readSequence(File seq) {
	// We use a StringBuffer instead of a string
	StringBuffer sequence = new StringBuffer();
	try {
		// Convenient way to read a file
		FileReader fr = new FileReader(seq);
		BufferedReader sequenceReader = new BufferedReader(fr);
		String line; // This is a helper variable to store lines while we read through the file
		/*
		 * Read through the file, line by line until readLine() method returns null
		 * and append each line to the sequence StringBuilder to generate a big sequence
		 */
		while ((line = sequenceReader.readLine()) != null) {
			sequence.append(line);
		}
		// Don't forget to close open readers!
		fr.close();
		sequenceReader.close(); 
		/*
		 * If we're dealing with files, some things can go wrong. This is
		 * why we use a try/catch block to tell java what to do, if we
		 * 1) don't find the file and 2) have problems reading from the file
		 */
	} catch (FileNotFoundException fnfe) {
		System.err.println("Cannot find file: " + seq.getPath());
		System.exit(-1);
	} catch (IOException ioe) {
		System.err.println("Cannot read from file: " + seq.getPath());
		System.exit(-1);
	}
	return sequence.toString(); // Everything's read! Convert the StringBuffer to a String and return
}
	
	/**
	 * Fills the similarity plot with a true value if the two input sequences
	 * show the same symbol at a certain position, or a false, otherwise
	 * @param seq1 Sequence 1
	 * @param seq2 Sequence 2
	 * @return Boolean array indicating hits
	 */
	private boolean[][] computeHitMatrix(String seq1, String seq2) {
		// We need a two dimensional matrix to store hits
		boolean[][] matrix = new boolean[seq1.length()][seq2.length()];
		/*
		 *  Compare every character of the two sequences with each other
		 *  and safe a true, if the characters are the same and false otherwise
		 */
		for (int s1pos = 0; s1pos < seq1.length(); s1pos++) {
			for (int s2pos = 0; s2pos < seq2.length(); s2pos++) {
				if (seq1.charAt(s1pos) == seq2.charAt(s2pos)) {
					matrix[s1pos][s2pos] = true;
				}
				else {
					matrix[s1pos][s2pos] = false;
				}
			}
		}
		return matrix;
	}
	
	/**
	 * Returns two different kinds of characters, depending on whether
	 * we have a hit or a miss in the similarity matrix
	 * 
	 * @param pos1 Position relative to sequence1
	 * @param pos2 Position relative to sequence2
	 * @return Character indicating hit or no hit
	 */
	private char getPlotChar(int pos1, int pos2) {
		// Translate the boolean to a character
		if (this.hitMatrix[pos1][pos2] == true) {
			return Dotplot.hitSymb;
		}
		else {
			return ' ';
		}
	}
		
	/**
	 * Print computed sequence similarities using sequences as 
	 * labels for the two dimensions to the console
	 */
	public void printPlot() {
		/* 
		 * We want to show the sequences as 'labels' at the top and at the left
		 * end of the plot. To add this extra row and column one way is to start 
		 * our loops at -1 instead of 0
		 */
		for (int s1pos = -1; s1pos < this.sequence1.length(); s1pos++) {
			// We collect information for a line in a StringBuffer
			StringBuffer nextLine = new StringBuffer();
			for (int s2pos = -1; s2pos < this.sequence2.length(); s2pos++) {
				if (s1pos == -1) { // First row will hold sequence 2
					if (s2pos == -1) {
						nextLine.append(" ");	// In the top left corner we want a blank
					}
					else { // Otherwise, we want the characters from sequence 2 seperated by a blank
						nextLine.append(" " + this.sequence2.charAt(s2pos));
					}
				}
				else  {
					if (s2pos == -1) { // First column will hold sequence 1
						nextLine.append(this.sequence1.charAt(s1pos));	
					}
					/*
					 * All other cases should be (for instance) a * for a hit
					 * and a blank for a miss. In order to see what we're dealing
					 * with at the current position we call the getPlotChar() function
					 */
					else {
						nextLine.append(" " + getPlotChar(s1pos, s2pos));
					}
				}
			}
			// Line is done! Transform it to a String and print it out
			System.out.println(nextLine.toString());
		}
	}
	
	/**
	 * Main function: Initialize dotplot object and print the plot using printPlot()
	 * @param args
	 */
	public static void main(String[] args) {
		String s1 = "Leben Sie mit im Eisnebel".toUpperCase();
		Dotplot dp = new Dotplot(s1, s1);
		dp.printPlot();
	}
}
Artikelaktionen
« Februar 2012 »
Februar
MoDiMiDoFrSaSo
12345
6789101112
13141516171819
20212223242526
272829