Newer
Older
/*
* created: 2010
*
* This file is part of Artemis
*
* Copyright(C) 2010 Genome Research Limited
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or(at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
package uk.ac.sanger.artemis.io;
import java.io.File;
import java.io.IOException;
import java.io.Writer;
import java.util.Iterator;
import net.sf.picard.reference.FastaSequenceIndex;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.reference.ReferenceSequence;
import uk.ac.sanger.artemis.io.Entry;
import uk.ac.sanger.artemis.Options;
import uk.ac.sanger.artemis.components.EntryFileDialog;
import uk.ac.sanger.artemis.util.ReadOnlyException;
public class IndexFastaStream extends StreamSequence
{
private IndexedFastaSequenceFile indexSeqFile;
private FastaSequenceIndex fastaIndex;
private int len;
private String contig;
public IndexFastaStream(Entry entry)
{
DocumentEntry doc = (DocumentEntry)entry;
if(doc instanceof URLDocument)
{
//URL url = (URL)((URLDocument)doc).getLocation();
// not supported yet
}
else
{
File fasta = ((FileDocument)doc.getDocument()).getFile();
File fastaIndexFile = new File(fasta.getParentFile().getAbsolutePath(), fasta.getName()+".fai");
fastaIndex = new FastaSequenceIndex(fastaIndexFile);
try
{
indexSeqFile = new IndexedFastaSequenceFile(fasta, fastaIndex);
}
catch(IllegalArgumentException ie)
{
JOptionPane.showConfirmDialog(null,
"Expecting fasta extensions:\n"+
ReferenceSequenceFileFactory.FASTA_EXTENSIONS.toString()+
"\n"+ie.getMessage(),
"Error", JOptionPane.ERROR_MESSAGE);
}
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
setContigByIndex(0);
}
/**
* Test if the entry contains an indexed sequence
* @param entry
* @return
*/
public static boolean isIndexed(Entry entry)
{
if( entry instanceof DocumentEntry &&
((DocumentEntry)entry).getDocument() instanceof FileDocument)
{
File fasta = ((FileDocument)((DocumentEntry)entry).getDocument()).getFile();
File fastaIndexFile = new File(fasta.getParentFile().getAbsolutePath(), fasta.getName()+".fai");
if(fastaIndexFile.exists())
return true;
}
return false;
}
public void setContigByIndex(int seqIndex)
{
/*ReferenceSequence ref = getReferenceSequence(seqIndex);
len = ref.length();
contig = ref.getName();*/
len = getLengthByIndex(seqIndex);
contig = getContigByIndex(seqIndex);
}
/**
* Return a the given range of bases as a String. Returns an empty
* sequence if the end position is less than the start position.
* @param start The start base of the range.
* @param end The end base of the range.
**/
public String getSubSequence(int start, int end)
{
byte b[] = indexSeqFile.getSubsequenceAt(contig, start, end).getBases();
return new String(b).toLowerCase();
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
}
public char[] getCharSubSequence(int start, int end)
{
return getSubSequence(start, end).toCharArray();
}
private int getLengthByIndex(int seqIndex)
{
Iterator it = fastaIndex.iterator();
int i = 0;
while(it.hasNext())
{
Object obj = it.next();
if(i == seqIndex)
{
String size = obj.toString().split(";")[2].substring(5).trim();
return Integer.parseInt(size);
}
i++;
}
return -1;
}
private String getContigByIndex(int seqIndex)
{
Iterator it = fastaIndex.iterator();
int i = 0;
while(it.hasNext())
{
Object obj = it.next();
if(i == seqIndex)
return obj.toString().split(";")[0].substring(6).trim();
i++;
}
return null;
}
public ReferenceSequence getReferenceSequence(int seqIndex)
{
int i = 0;
ReferenceSequence ref;
indexSeqFile.reset();
while( (ref=indexSeqFile.nextSequence()) != null )
{
if(i == seqIndex)
return ref;
i++;
}
return null;
}
/**
* Returns the length of the sequence in bases.
**/
public int length()
{
return len;
}
@Override
public StreamSequence copy()
{
// TODO Auto-generated method stub
return null;
}
@Override
public int getFormatType()
{
return StreamSequenceFactory.INDEXED_FASTA_FORMAT;
}
public void setFromChar(final char dna[])
{
JOptionPane.showMessageDialog(null,"Read only sequence.",
"Warning", JOptionPane.WARNING_MESSAGE);
throw new RuntimeException(new ReadOnlyException());
}
@Override
public void writeToStream(Writer writer) throws IOException
{
// TODO Auto-generated method stub
}
public IndexedFastaSequenceFile getIndexSeqFile()
{
return indexSeqFile;
}
public FastaSequenceIndex getFastaIndex()
{
return fastaIndex;
}
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
public static void main(String args[])
{
EntryInformation new_entry_information =
new SimpleEntryInformation(Options.getArtemisEntryInformation());
try
{
Entry emblEntry = null;
if(args[0].startsWith("http:"))
{
}
else
{
final uk.ac.sanger.artemis.Entry entry =
new uk.ac.sanger.artemis.Entry(EntryFileDialog.getEntryFromFile(
null, new FileDocument(new File(args[0])),
new_entry_information, true));
emblEntry = entry.getEMBLEntry();
}
IndexFastaStream istream = new IndexFastaStream(emblEntry);
System.out.println(istream.getCharSubSequence(1, 8000));
}
catch (Exception e)
{
e.printStackTrace();
}
}