d7/d26/scanner-character-streams_8cc_source.html

 // Copyright 2011 the V8 project authors. All rights reserved.

 // Redistribution and use in source and binary forms, with or without

 // modification, are permitted provided that the following conditions are

 // met:

 //

 //     * Redistributions of source code must retain the above copyright

 //       notice, this list of conditions and the following disclaimer.

 //     * Redistributions in binary form must reproduce the above

 //       copyright notice, this list of conditions and the following

 //       disclaimer in the documentation and/or other materials provided

 //       with the distribution.

 //     * Neither the name of Google Inc. nor the names of its

 //       contributors may be used to endorse or promote products derived

 //       from this software without specific prior written permission.

 //

 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


 #include "v8.h"


 #include "scanner-character-streams.h"


 #include "handles.h"

 #include "unicode-inl.h"


 namespace v8 {

 namespace internal {


 // ----------------------------------------------------------------------------

 // BufferedUtf16CharacterStreams


 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()

     : Utf16CharacterStream(),

       pushback_limit_(NULL) {

   // Initialize buffer as being empty. First read will fill the buffer.

   buffer_cursor_ = buffer_;

   buffer_end_ = buffer_;

 }


 BufferedUtf16CharacterStream::~BufferedUtf16CharacterStream() { }


 void BufferedUtf16CharacterStream::PushBack(uc32 character) {

   if (character == kEndOfInput) {

     pos_--;

     return;

   }

   if (pushback_limit_ == NULL && buffer_cursor_ > buffer_) {

     // buffer_ is writable, buffer_cursor_ is const pointer.

     buffer_[--buffer_cursor_ - buffer_] = static_cast<uc16>(character);

     pos_--;

     return;

   }

   SlowPushBack(static_cast<uc16>(character));

 }


 void BufferedUtf16CharacterStream::SlowPushBack(uc16 character) {

   // In pushback mode, the end of the buffer contains pushback,

   // and the start of the buffer (from buffer start to pushback_limit_)

   // contains valid data that comes just after the pushback.

   // We NULL the pushback_limit_ if pushing all the way back to the

   // start of the buffer.


   if (pushback_limit_ == NULL) {

     // Enter pushback mode.

     pushback_limit_ = buffer_end_;

     buffer_end_ = buffer_ + kBufferSize;

     buffer_cursor_ = buffer_end_;

   }

   // Ensure that there is room for at least one pushback.

   ASSERT(buffer_cursor_ > buffer_);

   ASSERT(pos_ > 0);

   buffer_[--buffer_cursor_ - buffer_] = character;

   if (buffer_cursor_ == buffer_) {

     pushback_limit_ = NULL;

   } else if (buffer_cursor_ < pushback_limit_) {

     pushback_limit_ = buffer_cursor_;

   }

   pos_--;

 }


 bool BufferedUtf16CharacterStream::ReadBlock() {

   buffer_cursor_ = buffer_;

   if (pushback_limit_ != NULL) {

     // Leave pushback mode.

     buffer_end_ = pushback_limit_;

     pushback_limit_ = NULL;

     // If there were any valid characters left at the

     // start of the buffer, use those.

     if (buffer_cursor_ < buffer_end_) return true;

     // Otherwise read a new block.

   }

   unsigned length = FillBuffer(pos_, kBufferSize);

   buffer_end_ = buffer_ + length;

   return length > 0;

 }


 unsigned BufferedUtf16CharacterStream::SlowSeekForward(unsigned delta) {

   // Leave pushback mode (i.e., ignore that there might be valid data

   // in the buffer before the pushback_limit_ point).

   pushback_limit_ = NULL;

   return BufferSeekForward(delta);

 }


 // ----------------------------------------------------------------------------

 // GenericStringUtf16CharacterStream


 GenericStringUtf16CharacterStream::GenericStringUtf16CharacterStream(

     Handle<String> data,

     unsigned start_position,

     unsigned end_position)

     : string_(data),

       length_(end_position) {

   ASSERT(end_position >= start_position);

   buffer_cursor_ = buffer_;

   buffer_end_ = buffer_;

   pos_ = start_position;

 }


 GenericStringUtf16CharacterStream::~GenericStringUtf16CharacterStream() { }


 unsigned GenericStringUtf16CharacterStream::BufferSeekForward(unsigned delta) {

   unsigned old_pos = pos_;

   pos_ = Min(pos_ + delta, length_);

   ReadBlock();

   return pos_ - old_pos;

 }


 unsigned GenericStringUtf16CharacterStream::FillBuffer(unsigned from_pos,

                                                       unsigned length) {

   if (from_pos >= length_) return 0;

   if (from_pos + length > length_) {

     length = length_ - from_pos;

   }

   String::WriteToFlat<uc16>(*string_, buffer_, from_pos, from_pos + length);

   return length;

 }


 // ----------------------------------------------------------------------------

 // Utf8ToUtf16CharacterStream

 Utf8ToUtf16CharacterStream::Utf8ToUtf16CharacterStream(const byte* data,

                                                        unsigned length)

     : BufferedUtf16CharacterStream(),

       raw_data_(data),

       raw_data_length_(length),

       raw_data_pos_(0),

       raw_character_position_(0) {

   ReadBlock();

 }


 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }


 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {

   unsigned old_pos = pos_;

   unsigned target_pos = pos_ + delta;

   SetRawPosition(target_pos);

   pos_ = raw_character_position_;

   ReadBlock();

   return pos_ - old_pos;

 }


 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position,

                                                 unsigned length) {

   static const unibrow::uchar kMaxUtf16Character = 0xffff;

   SetRawPosition(char_position);

   if (raw_character_position_ != char_position) {

     // char_position was not a valid position in the stream (hit the end

     // while spooling to it).

     return 0u;

   }

   unsigned i = 0;

   while (i < length - 1) {

     if (raw_data_pos_ == raw_data_length_) break;

     unibrow::uchar c = raw_data_[raw_data_pos_];

     if (c <= unibrow::Utf8::kMaxOneByteChar) {

       raw_data_pos_++;

     } else {

       c =  unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_,

                                          raw_data_length_ - raw_data_pos_,

                                          &raw_data_pos_);

     }

     if (c > kMaxUtf16Character) {

       buffer_[i++] = unibrow::Utf16::LeadSurrogate(c);

       buffer_[i++] = unibrow::Utf16::TrailSurrogate(c);

     } else {

       buffer_[i++] = static_cast<uc16>(c);

     }

   }

   raw_character_position_ = char_position + i;

   return i;

 }


 static const byte kUtf8MultiByteMask = 0xC0;

 static const byte kUtf8MultiByteCharFollower = 0x80;


 #ifdef DEBUG

 static const byte kUtf8MultiByteCharStart = 0xC0;

 static bool IsUtf8MultiCharacterStart(byte first_byte) {

   return (first_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharStart;

 }

 #endif


 static bool IsUtf8MultiCharacterFollower(byte later_byte) {

   return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower;

 }


 // Move the cursor back to point at the preceding UTF-8 character start

 // in the buffer.

 static inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) {

   byte character = buffer[--*cursor];

   if (character > unibrow::Utf8::kMaxOneByteChar) {

     ASSERT(IsUtf8MultiCharacterFollower(character));

     // Last byte of a multi-byte character encoding. Step backwards until

     // pointing to the first byte of the encoding, recognized by having the

     // top two bits set.

     while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { }

     ASSERT(IsUtf8MultiCharacterStart(buffer[*cursor]));

   }

 }


 // Move the cursor forward to point at the next following UTF-8 character start

 // in the buffer.

 static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) {

   byte character = buffer[(*cursor)++];

   if (character > unibrow::Utf8::kMaxOneByteChar) {

     // First character of a multi-byte character encoding.

     // The number of most-significant one-bits determines the length of the

     // encoding:

     //  110..... - (0xCx, 0xDx) one additional byte (minimum).

     //  1110.... - (0xEx) two additional bytes.

     //  11110... - (0xFx) three additional bytes (maximum).

     ASSERT(IsUtf8MultiCharacterStart(character));

     // Additional bytes is:

     // 1 if value in range 0xC0 .. 0xDF.

     // 2 if value in range 0xE0 .. 0xEF.

     // 3 if value in range 0xF0 .. 0xF7.

     // Encode that in a single value.

     unsigned additional_bytes =

         ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03;

     *cursor += additional_bytes;

     ASSERT(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes]));

   }

 }


 // This can't set a raw position between two surrogate pairs, since there

 // is no position in the UTF8 stream that corresponds to that.  This assumes

 // that the surrogate pair is correctly coded as a 4 byte UTF-8 sequence.  If

 // it is illegally coded as two 3 byte sequences then there is no problem here.

 void Utf8ToUtf16CharacterStream::SetRawPosition(unsigned target_position) {

   if (raw_character_position_ > target_position) {

     // Spool backwards in utf8 buffer.

     do {

       int old_pos = raw_data_pos_;

       Utf8CharacterBack(raw_data_, &raw_data_pos_);

       raw_character_position_--;

       ASSERT(old_pos - raw_data_pos_ <= 4);

       // Step back over both code units for surrogate pairs.

       if (old_pos - raw_data_pos_ == 4) raw_character_position_--;

     } while (raw_character_position_ > target_position);

     // No surrogate pair splitting.

     ASSERT(raw_character_position_ == target_position);

     return;

   }

   // Spool forwards in the utf8 buffer.

   while (raw_character_position_ < target_position) {

     if (raw_data_pos_ == raw_data_length_) return;

     int old_pos = raw_data_pos_;

     Utf8CharacterForward(raw_data_, &raw_data_pos_);

     raw_character_position_++;

     ASSERT(raw_data_pos_ - old_pos <= 4);

     if (raw_data_pos_ - old_pos == 4) raw_character_position_++;

   }

   // No surrogate pair splitting.

   ASSERT(raw_character_position_ == target_position);

 }


 // ----------------------------------------------------------------------------

 // ExternalTwoByteStringUtf16CharacterStream


 ExternalTwoByteStringUtf16CharacterStream::

     ~ExternalTwoByteStringUtf16CharacterStream() { }


 ExternalTwoByteStringUtf16CharacterStream

     ::ExternalTwoByteStringUtf16CharacterStream(

         Handle<ExternalTwoByteString> data,

         int start_position,

         int end_position)

     : Utf16CharacterStream(),

       source_(data),

       raw_data_(data->GetTwoByteData(start_position)) {

   buffer_cursor_ = raw_data_,

   buffer_end_ = raw_data_ + (end_position - start_position);

   pos_ = start_position;

 }


 } }  // namespace v8::internal

v8::internal::NULL
enable upcoming ES6 features enable harmony block scoping enable harmony enable harmony proxies enable harmony generators enable harmony numeric enable harmony string enable harmony math functions harmony_scoping harmony_symbols harmony_collections harmony_iteration harmony_strings harmony_scoping harmony_maths tracks arrays with only smi values Optimize object Array DOM strings and string pretenure call new trace pretenuring decisions of HAllocate instructions track fields with only smi values track fields with heap values track_fields track_fields Enables optimizations which favor memory size over execution speed use string slices optimization filter maximum number of GVN fix point iterations use function inlining use allocation folding eliminate write barriers targeting allocations in optimized code maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining crankshaft harvests type feedback from stub cache trace check elimination phase hydrogen tracing filter NULL
Definition: flags.cc:269

v8::internal::Utf16CharacterStream::buffer_cursor_
const uint16_t * buffer_cursor_
Definition: scanner.h:123

v8::internal::Handle< String >

v8::internal::BufferedUtf16CharacterStream::FillBuffer
virtual unsigned FillBuffer(unsigned position, unsigned length)=0

v8::internal::Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream
virtual ~Utf8ToUtf16CharacterStream()
Definition: scanner-character-streams.cc:170

v8::internal::Utf8ToUtf16CharacterStream::raw_character_position_
unsigned raw_character_position_
Definition: scanner-character-streams.h:96

v8::internal::BufferedUtf16CharacterStream::SlowPushBack
virtual void SlowPushBack(uc16 character)
Definition: scanner-character-streams.cc:67

v8::internal::BufferedUtf16CharacterStream::kBufferSize
static const unsigned kBufferSize
Definition: scanner-character-streams.h:47

v8::internal::Utf16CharacterStream::kEndOfInput
static const uc32 kEndOfInput
Definition: scanner.h:114

v8::internal::uc32
int32_t uc32
Definition: globals.h:310

unibrow::Utf16::TrailSurrogate
static uint16_t TrailSurrogate(uint32_t char_code)
Definition: unicode.h:134

v8::internal::Utf16CharacterStream::buffer_end_
const uint16_t * buffer_end_
Definition: scanner.h:124

v8::internal::Utf8ToUtf16CharacterStream::SetRawPosition
void SetRawPosition(unsigned char_position)
Definition: scanner-character-streams.cc:276

v8::internal::Utf16CharacterStream::pos_
unsigned pos_
Definition: scanner.h:125

v8::internal::BufferedUtf16CharacterStream::ReadBlock
virtual bool ReadBlock()
Definition: scanner-character-streams.cc:93

handles.h

v8::internal::BufferedUtf16CharacterStream::~BufferedUtf16CharacterStream
virtual ~BufferedUtf16CharacterStream()
Definition: scanner-character-streams.cc:50

ASSERT
#define ASSERT(condition)
Definition: checks.h:329

v8::internal::BufferedUtf16CharacterStream
Definition: scanner-character-streams.h:39

scanner-character-streams.h

unibrow::Utf8::CalculateValue
static uchar CalculateValue(const byte *str, unsigned length, unsigned *cursor)
Definition: unicode.cc:214

unicode-inl.h

v8::internal::Utf8ToUtf16CharacterStream::raw_data_pos_
unsigned raw_data_pos_
Definition: scanner-character-streams.h:93

v8::internal::Utf16CharacterStream
Definition: scanner.h:66

v8::internal::byte
uint8_t byte
Definition: globals.h:185

v8::internal::BufferedUtf16CharacterStream::SlowSeekForward
virtual unsigned SlowSeekForward(unsigned delta)
Definition: scanner-character-streams.cc:110

v8::internal::Utf8ToUtf16CharacterStream::raw_data_length_
unsigned raw_data_length_
Definition: scanner-character-streams.h:92

v8.h

unibrow::Utf16::LeadSurrogate
static uint16_t LeadSurrogate(uint32_t char_code)
Definition: unicode.h:131

v8::internal::Utf8ToUtf16CharacterStream::raw_data_
const byte * raw_data_
Definition: scanner-character-streams.h:91

v8::internal::BufferedUtf16CharacterStream::BufferSeekForward
virtual unsigned BufferSeekForward(unsigned delta)=0

v8::internal::Utf8ToUtf16CharacterStream::Utf8ToUtf16CharacterStream
Utf8ToUtf16CharacterStream(const byte *data, unsigned length)
Definition: scanner-character-streams.cc:159

v8::internal::GenericStringUtf16CharacterStream::string_
Handle< String > string_
Definition: scanner-character-streams.h:74

v8::internal::GenericStringUtf16CharacterStream::GenericStringUtf16CharacterStream
GenericStringUtf16CharacterStream(Handle< String > data, unsigned start_position, unsigned end_position)
Definition: scanner-character-streams.cc:122

v8::internal::uc16
uint16_t uc16
Definition: globals.h:309

v8::internal::BufferedUtf16CharacterStream::BufferedUtf16CharacterStream
BufferedUtf16CharacterStream()
Definition: scanner-character-streams.cc:41

v8::internal::GenericStringUtf16CharacterStream::~GenericStringUtf16CharacterStream
virtual ~GenericStringUtf16CharacterStream()
Definition: scanner-character-streams.cc:135

v8::internal::ExternalTwoByteStringUtf16CharacterStream::~ExternalTwoByteStringUtf16CharacterStream
virtual ~ExternalTwoByteStringUtf16CharacterStream()
Definition: scanner-character-streams.cc:309

unibrow::Utf8::kMaxOneByteChar
static const unsigned kMaxOneByteChar
Definition: unicode.h:164

v8::internal::ExternalTwoByteStringUtf16CharacterStream::ExternalTwoByteStringUtf16CharacterStream
ExternalTwoByteStringUtf16CharacterStream(Handle< ExternalTwoByteString > data, int start_position, int end_position)
Definition: scanner-character-streams.cc:313

v8::internal::BufferedUtf16CharacterStream::buffer_
uc16 buffer_[kBufferSize]
Definition: scanner-character-streams.h:58

v8::internal::Utf8ToUtf16CharacterStream::BufferSeekForward
virtual unsigned BufferSeekForward(unsigned delta)
Definition: scanner-character-streams.cc:173

v8::internal::Utf8ToUtf16CharacterStream::FillBuffer
virtual unsigned FillBuffer(unsigned char_position, unsigned length)
Definition: scanner-character-streams.cc:183

v8::internal::Min
T Min(T a, T b)
Definition: utils.h:234

v8::internal::GenericStringUtf16CharacterStream::length_
unsigned length_
Definition: scanner-character-streams.h:76

v8::internal::GenericStringUtf16CharacterStream::FillBuffer
virtual unsigned FillBuffer(unsigned position, unsigned length)
Definition: scanner-character-streams.cc:146

unibrow::uchar
unsigned int uchar
Definition: unicode.h:40

v8::internal::GenericStringUtf16CharacterStream::BufferSeekForward
virtual unsigned BufferSeekForward(unsigned delta)
Definition: scanner-character-streams.cc:138

v8::internal::BufferedUtf16CharacterStream::PushBack
virtual void PushBack(uc32 character)
Definition: scanner-character-streams.cc:52

v8::internal::BufferedUtf16CharacterStream::pushback_limit_
const uc16 * pushback_limit_
Definition: scanner-character-streams.h:57