/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is * Netscape Communications Corporation. * Portions created by the Initial Developer are Copyright (C) 1998 * the Initial Developer. All Rights Reserved. * * Contributor(s): * * Alternatively, the contents of this file may be used under the terms of * either of the GNU General Public License Version 2 or later (the "GPL"), * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ #include "nsConverterInputStream.h" #include "nsIInputStream.h" #include "nsICharsetConverterManager.h" #include "nsIServiceManager.h" #define CONVERTER_BUFFER_SIZE 8192 NS_IMPL_ISUPPORTS3(nsConverterInputStream, nsIConverterInputStream, nsIUnicharInputStream, nsIUnicharLineInputStream) static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID); NS_IMETHODIMP nsConverterInputStream::Init(nsIInputStream* aStream, const char *aCharset, PRInt32 aBufferSize, PRUnichar aReplacementChar) { if (!aCharset) aCharset = "UTF-8"; nsresult rv; if (aBufferSize <=0) aBufferSize=CONVERTER_BUFFER_SIZE; // get the decoder nsCOMPtr ccm = do_GetService(kCharsetConverterManagerCID, &rv); if (NS_FAILED(rv)) return rv; rv = ccm->GetUnicodeDecoder(aCharset ? aCharset : "ISO-8859-1", getter_AddRefs(mConverter)); if (NS_FAILED(rv)) return rv; // set up our buffers rv = NS_NewByteBuffer(getter_AddRefs(mByteData), nsnull, aBufferSize); if (NS_FAILED(rv)) return rv; rv = NS_NewUnicharBuffer(getter_AddRefs(mUnicharData), nsnull, aBufferSize); if (NS_FAILED(rv)) return rv; mInput = aStream; mReplacementChar = aReplacementChar; return NS_OK; } NS_IMETHODIMP nsConverterInputStream::Close() { nsresult rv = mInput ? mInput->Close() : NS_OK; PR_FREEIF(mLineBuffer); mInput = nsnull; mConverter = nsnull; mByteData = nsnull; mUnicharData = nsnull; return rv; } NS_IMETHODIMP nsConverterInputStream::Read(PRUnichar* aBuf, PRUint32 aCount, PRUint32 *aReadCount) { NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); PRUint32 readCount = mUnicharDataLength - mUnicharDataOffset; if (0 == readCount) { // Fill the unichar buffer readCount = Fill(&mLastErrorCode); if (readCount == 0) { *aReadCount = 0; return mLastErrorCode; } } if (readCount > aCount) { readCount = aCount; } memcpy(aBuf, mUnicharData->GetBuffer() + mUnicharDataOffset, readCount * sizeof(PRUnichar)); mUnicharDataOffset += readCount; *aReadCount = readCount; return NS_OK; } NS_IMETHODIMP nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, void* aClosure, PRUint32 aCount, PRUint32 *aReadCount) { NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); PRUint32 bytesToWrite = mUnicharDataLength - mUnicharDataOffset; nsresult rv; if (0 == bytesToWrite) { // Fill the unichar buffer bytesToWrite = Fill(&rv); if (bytesToWrite <= 0) { *aReadCount = 0; return rv; } } if (bytesToWrite > aCount) bytesToWrite = aCount; PRUint32 bytesWritten; PRUint32 totalBytesWritten = 0; while (bytesToWrite) { rv = aWriter(this, aClosure, mUnicharData->GetBuffer() + mUnicharDataOffset, totalBytesWritten, bytesToWrite, &bytesWritten); if (NS_FAILED(rv)) { // don't propagate errors to the caller break; } bytesToWrite -= bytesWritten; totalBytesWritten += bytesWritten; mUnicharDataOffset += bytesWritten; } *aReadCount = totalBytesWritten; return NS_OK; } NS_IMETHODIMP nsConverterInputStream::ReadString(PRUint32 aCount, nsAString& aString, PRUint32* aReadCount) { NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); PRUint32 readCount = mUnicharDataLength - mUnicharDataOffset; if (0 == readCount) { // Fill the unichar buffer readCount = Fill(&mLastErrorCode); if (readCount == 0) { *aReadCount = 0; return mLastErrorCode; } } if (readCount > aCount) { readCount = aCount; } const PRUnichar* buf = reinterpret_cast(mUnicharData->GetBuffer() + mUnicharDataOffset); aString.Assign(buf, readCount); mUnicharDataOffset += readCount; *aReadCount = readCount; return NS_OK; } PRUint32 nsConverterInputStream::Fill(nsresult * aErrorCode) { if (nsnull == mInput) { // We already closed the stream! *aErrorCode = NS_BASE_STREAM_CLOSED; return 0; } if (NS_FAILED(mLastErrorCode)) { // We failed to completely convert last time, and error-recovery // is disabled. We will fare no better this time, so... *aErrorCode = mLastErrorCode; return 0; } // We assume a many to one conversion and are using equal sizes for // the two buffers. However if an error happens at the very start // of a byte buffer we may end up in a situation where n bytes lead // to n+1 unicode chars. Thus we need to keep track of the leftover // bytes as we convert. PRInt32 nb = mByteData->Fill(aErrorCode, mInput, mLeftOverBytes); #if defined(DEBUG_bzbarsky) && 0 for (unsigned int foo = 0; foo < mByteData->GetLength(); ++foo) { fprintf(stderr, "%c", mByteData->GetBuffer()[foo]); } fprintf(stderr, "\n"); #endif if (nb <= 0 && mLeftOverBytes == 0) { // No more data *aErrorCode = NS_OK; return 0; } NS_ASSERTION(PRUint32(nb) + mLeftOverBytes == mByteData->GetLength(), "mByteData is lying to us somewhere"); // Now convert as much of the byte buffer to unicode as possible mUnicharDataOffset = 0; mUnicharDataLength = 0; PRUint32 srcConsumed = 0; do { PRInt32 srcLen = mByteData->GetLength() - srcConsumed; PRInt32 dstLen = mUnicharData->GetBufferSize() - mUnicharDataLength; *aErrorCode = mConverter->Convert(mByteData->GetBuffer()+srcConsumed, &srcLen, mUnicharData->GetBuffer()+mUnicharDataLength, &dstLen); mUnicharDataLength += dstLen; // XXX if srcLen is negative, we want to drop the _first_ byte in // the erroneous byte sequence and try again. This is not quite // possible right now -- see bug 160784 srcConsumed += srcLen; if (NS_FAILED(*aErrorCode) && mReplacementChar) { NS_ASSERTION(0 < mUnicharData->GetBufferSize() - mUnicharDataLength, "Decoder returned an error but filled the output buffer! " "Should not happen."); mUnicharData->GetBuffer()[mUnicharDataLength++] = mReplacementChar; ++srcConsumed; // XXX this is needed to make sure we don't underrun our buffer; // bug 160784 again srcConsumed = PR_MAX(srcConsumed, 0); mConverter->Reset(); } NS_ASSERTION(srcConsumed <= mByteData->GetLength(), "Whoa. The converter should have returned NS_OK_UDEC_MOREINPUT before this point!"); } while (mReplacementChar && NS_FAILED(*aErrorCode)); mLeftOverBytes = mByteData->GetLength() - srcConsumed; return mUnicharDataLength; } NS_IMETHODIMP nsConverterInputStream::ReadLine(nsAString& aLine, PRBool* aResult) { if (!mLineBuffer) { nsresult rv = NS_InitLineBuffer(&mLineBuffer); if (NS_FAILED(rv)) return rv; } return NS_ReadLine(this, mLineBuffer, aLine, aResult); }