Files
rust/src/libsyntax/parse/lexer/comments.rs

429 lines
14 KiB
Rust
Raw Normal View History

// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use ast;
2013-03-26 16:38:07 -04:00
use codemap::{BytePos, CharPos, CodeMap, Pos};
use diagnostic;
2014-05-21 16:57:31 -07:00
use parse::lexer::{is_whitespace, Reader};
use parse::lexer::{StringReader, TokenAndSpan};
use parse::lexer::is_block_doc_comment;
use parse::lexer;
use print::pprust;
2013-11-10 22:46:32 -08:00
use std::io;
2013-06-24 20:40:33 -04:00
use std::str;
use std::string::String;
2013-06-24 20:40:33 -04:00
use std::uint;
#[deriving(Clone, PartialEq)]
pub enum CommentStyle {
2014-06-09 13:12:30 -07:00
/// No code on either side of each line of the comment
Isolated,
/// Code exists to the left of the comment
Trailing,
/// Code before /* foo */ and after the comment
Mixed,
/// Just a manual blank line "\n\n", for layout
BlankLine,
}
2013-07-02 12:47:32 -07:00
#[deriving(Clone)]
pub struct Comment {
pub style: CommentStyle,
pub lines: Vec<String>,
pub pos: BytePos,
}
pub fn is_doc_comment(s: &str) -> bool {
(s.starts_with("///") && super::is_doc_comment(s)) ||
s.starts_with("//!") ||
(s.starts_with("/**") && is_block_doc_comment(s)) ||
s.starts_with("/*!")
}
pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
2013-03-28 18:39:09 -07:00
assert!(is_doc_comment(comment));
if comment.starts_with("//!") || comment.starts_with("/*!") {
ast::AttrInner
} else {
ast::AttrOuter
}
}
pub fn strip_doc_comment_decoration(comment: &str) -> String {
/// remove whitespace-only lines from the start/end of lines
fn vertical_trim(lines: Vec<String> ) -> Vec<String> {
let mut i = 0u;
let mut j = lines.len();
// first line of all-stars should be omitted
if lines.len() > 0 &&
lines[0].as_slice().chars().all(|c| c == '*') {
i += 1;
}
while i < j && lines[i].as_slice().trim().is_empty() {
i += 1;
}
// like the first, a last line of all stars should be omitted
if j > i && lines[j - 1]
.as_slice()
.chars()
.skip(1)
.all(|c| c == '*') {
j -= 1;
}
while j > i && lines[j - 1].as_slice().trim().is_empty() {
j -= 1;
}
return lines.slice(i, j).iter().map(|x| (*x).clone()).collect();
}
/// remove a "[ \t]*\*" block from each line, if possible
fn horizontal_trim(lines: Vec<String> ) -> Vec<String> {
let mut i = uint::MAX;
let mut can_trim = true;
let mut first = true;
for line in lines.iter() {
for (j, c) in line.as_slice().chars().enumerate() {
if j > i || !"* \t".contains_char(c) {
can_trim = false;
break;
}
if c == '*' {
if first {
i = j;
first = false;
} else if i != j {
can_trim = false;
}
break;
}
}
if i > line.len() {
can_trim = false;
}
if !can_trim {
break;
}
}
if can_trim {
lines.iter().map(|line| {
line.as_slice().slice(i + 1, line.len()).to_string()
}).collect()
} else {
lines
}
}
// one-line comments lose their prefix
static ONLINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
for prefix in ONLINERS.iter() {
if comment.starts_with(*prefix) {
return comment.slice_from(prefix.len()).to_string();
}
}
if comment.starts_with("/*") {
let lines = comment.slice(3u, comment.len() - 2u)
.lines_any()
.map(|s| s.to_string())
.collect::<Vec<String> >();
let lines = vertical_trim(lines);
let lines = horizontal_trim(lines);
return lines.connect("\n");
}
panic!("not a doc-comment: {}", comment);
}
fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
debug!(">>> blank-line comment");
comments.push(Comment {
style: BlankLine,
lines: Vec::new(),
pos: rdr.last_pos,
});
}
fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
comments: &mut Vec<Comment>) {
2014-05-21 16:57:31 -07:00
while is_whitespace(rdr.curr) && !rdr.is_eof() {
if rdr.col == CharPos(0u) && rdr.curr_is('\n') {
push_blank_line_comment(rdr, &mut *comments);
}
2014-05-21 16:57:31 -07:00
rdr.bump();
}
}
fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
comments: &mut Vec<Comment>) {
debug!(">>> shebang comment");
let p = rdr.last_pos;
debug!("<<< shebang comment");
comments.push(Comment {
style: if code_to_the_left { Trailing } else { Isolated },
2014-05-21 16:57:31 -07:00
lines: vec!(rdr.read_one_line_comment()),
pos: p
});
}
fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
comments: &mut Vec<Comment>) {
debug!(">>> line comments");
let p = rdr.last_pos;
let mut lines: Vec<String> = Vec::new();
2014-05-21 16:57:31 -07:00
while rdr.curr_is('/') && rdr.nextch_is('/') {
let line = rdr.read_one_line_comment();
debug!("{}", line);
// Doc comments are not put in comments.
if is_doc_comment(line.as_slice()) {
break;
}
lines.push(line);
2014-05-21 16:57:31 -07:00
rdr.consume_non_eol_whitespace();
}
debug!("<<< line comments");
if !lines.is_empty() {
comments.push(Comment {
style: if code_to_the_left { Trailing } else { Isolated },
lines: lines,
pos: p
});
}
}
2014-06-09 13:12:30 -07:00
/// Returns None if the first col chars of s contain a non-whitespace char.
/// Otherwise returns Some(k) where k is first char offset after that leading
/// whitespace. Note k may be outside bounds of s.
fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
let len = s.len();
let mut col = col.to_uint();
let mut cursor: uint = 0;
while col > 0 && cursor < len {
let r: str::CharRange = s.char_range_at(cursor);
if !r.ch.is_whitespace() {
return None;
}
cursor = r.next;
col -= 1;
2012-08-01 17:30:05 -07:00
}
return Some(cursor);
}
fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String> ,
s: String, col: CharPos) {
2013-05-09 13:27:24 -07:00
let len = s.len();
let s1 = match all_whitespace(s.as_slice(), col) {
Some(col) => {
if col < len {
s.as_slice().slice(col, len).to_string()
} else {
"".to_string()
}
}
None => s,
};
debug!("pushing line: {}", s1);
lines.push(s1);
}
fn read_block_comment(rdr: &mut StringReader,
code_to_the_left: bool,
comments: &mut Vec<Comment> ) {
debug!(">>> block comment");
let p = rdr.last_pos;
let mut lines: Vec<String> = Vec::new();
let col = rdr.col;
2014-05-21 16:57:31 -07:00
rdr.bump();
rdr.bump();
let mut curr_line = String::from_str("/*");
// doc-comments are not really comments, they are attributes
2014-05-21 16:57:31 -07:00
if (rdr.curr_is('*') && !rdr.nextch_is('*')) || rdr.curr_is('!') {
while !(rdr.curr_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() {
curr_line.push(rdr.curr.unwrap());
2014-05-21 16:57:31 -07:00
rdr.bump();
}
2014-05-21 16:57:31 -07:00
if !rdr.is_eof() {
curr_line.push_str("*/");
2014-05-21 16:57:31 -07:00
rdr.bump();
rdr.bump();
}
if is_block_doc_comment(curr_line.as_slice()) {
return
}
assert!(!curr_line.as_slice().contains_char('\n'));
lines.push(curr_line);
} else {
let mut level: int = 1;
while level > 0 {
debug!("=== block comment level {}", level);
2014-05-21 16:57:31 -07:00
if rdr.is_eof() {
rdr.fatal("unterminated block comment");
}
if rdr.curr_is('\n') {
trim_whitespace_prefix_and_push_line(&mut lines,
curr_line,
col);
curr_line = String::new();
2014-05-21 16:57:31 -07:00
rdr.bump();
} else {
curr_line.push(rdr.curr.unwrap());
2014-05-21 16:57:31 -07:00
if rdr.curr_is('/') && rdr.nextch_is('*') {
rdr.bump();
rdr.bump();
curr_line.push('*');
level += 1;
} else {
2014-05-21 16:57:31 -07:00
if rdr.curr_is('*') && rdr.nextch_is('/') {
rdr.bump();
rdr.bump();
curr_line.push('/');
level -= 1;
2014-05-21 16:57:31 -07:00
} else { rdr.bump(); }
}
}
}
if curr_line.len() != 0 {
trim_whitespace_prefix_and_push_line(&mut lines,
curr_line,
col);
}
}
let mut style = if code_to_the_left { Trailing } else { Isolated };
2014-05-21 16:57:31 -07:00
rdr.consume_non_eol_whitespace();
if !rdr.is_eof() && !rdr.curr_is('\n') && lines.len() == 1u {
style = Mixed;
}
debug!("<<< block comment");
comments.push(Comment {style: style, lines: lines, pos: p});
}
fn consume_comment(rdr: &mut StringReader,
code_to_the_left: bool,
comments: &mut Vec<Comment> ) {
debug!(">>> consume comment");
2014-05-21 16:57:31 -07:00
if rdr.curr_is('/') && rdr.nextch_is('/') {
read_line_comments(rdr, code_to_the_left, comments);
2014-05-21 16:57:31 -07:00
} else if rdr.curr_is('/') && rdr.nextch_is('*') {
read_block_comment(rdr, code_to_the_left, comments);
2014-05-21 16:57:31 -07:00
} else if rdr.curr_is('#') && rdr.nextch_is('!') {
read_shebang_comment(rdr, code_to_the_left, comments);
} else { panic!(); }
debug!("<<< consume comment");
}
2013-07-02 12:47:32 -07:00
#[deriving(Clone)]
pub struct Literal {
pub lit: String,
pub pos: BytePos,
}
2013-04-02 16:44:01 -07:00
// it appears this function is called only from pprust... that's
// probably not a good thing.
2014-05-21 16:57:31 -07:00
pub fn gather_comments_and_literals(span_diagnostic: &diagnostic::SpanHandler,
path: String,
srdr: &mut io::Reader)
2014-03-16 20:56:24 +02:00
-> (Vec<Comment>, Vec<Literal>) {
2014-01-29 17:39:21 -08:00
let src = srdr.read_to_end().unwrap();
let src = String::from_utf8(src).unwrap();
let cm = CodeMap::new();
let filemap = cm.new_filemap(path, src);
2014-05-21 16:57:31 -07:00
let mut rdr = lexer::StringReader::new_raw(span_diagnostic, filemap);
2012-06-15 09:32:17 -07:00
let mut comments: Vec<Comment> = Vec::new();
let mut literals: Vec<Literal> = Vec::new();
let mut first_read: bool = true;
2014-05-21 16:57:31 -07:00
while !rdr.is_eof() {
loop {
let mut code_to_the_left = !first_read;
2014-05-21 16:57:31 -07:00
rdr.consume_non_eol_whitespace();
if rdr.curr_is('\n') {
code_to_the_left = false;
consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
}
2014-05-21 16:57:31 -07:00
while rdr.peeking_at_comment() {
consume_comment(&mut rdr, code_to_the_left, &mut comments);
consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
}
break;
}
2012-06-15 09:32:17 -07:00
let bstart = rdr.last_pos;
2012-06-12 10:59:50 -07:00
rdr.next_token();
2012-06-15 09:32:17 -07:00
//discard, and look ahead; we're working with internal state
let TokenAndSpan { tok, sp } = rdr.peek();
if tok.is_lit() {
2014-05-21 16:57:31 -07:00
rdr.with_str_from(bstart, |s| {
debug!("tok lit: {}", s);
literals.push(Literal {lit: s.to_string(), pos: sp.lo});
})
} else {
debug!("tok: {}", pprust::token_to_string(&tok));
}
first_read = false;
}
(comments, literals)
}
#[cfg(test)]
mod test {
use super::*;
#[test] fn test_block_doc_comment_1() {
let comment = "/**\n * Test \n ** Test\n * Test\n*/";
let stripped = strip_doc_comment_decoration(comment);
assert_eq!(stripped, " Test \n* Test\n Test".to_string());
}
#[test] fn test_block_doc_comment_2() {
let comment = "/**\n * Test\n * Test\n*/";
let stripped = strip_doc_comment_decoration(comment);
assert_eq!(stripped, " Test\n Test".to_string());
}
#[test] fn test_block_doc_comment_3() {
let comment = "/**\n let a: *int;\n *a = 5;\n*/";
let stripped = strip_doc_comment_decoration(comment);
assert_eq!(stripped, " let a: *int;\n *a = 5;".to_string());
}
#[test] fn test_block_doc_comment_4() {
let comment = "/*******************\n test\n *********************/";
let stripped = strip_doc_comment_decoration(comment);
assert_eq!(stripped, " test".to_string());
}
#[test] fn test_line_doc_comment() {
let stripped = strip_doc_comment_decoration("/// test");
assert_eq!(stripped, " test".to_string());
let stripped = strip_doc_comment_decoration("///! test");
assert_eq!(stripped, " test".to_string());
let stripped = strip_doc_comment_decoration("// test");
assert_eq!(stripped, " test".to_string());
let stripped = strip_doc_comment_decoration("// test");
assert_eq!(stripped, " test".to_string());
let stripped = strip_doc_comment_decoration("///test");
assert_eq!(stripped, "test".to_string());
let stripped = strip_doc_comment_decoration("///!test");
assert_eq!(stripped, "test".to_string());
let stripped = strip_doc_comment_decoration("//test");
assert_eq!(stripped, "test".to_string());
}
}