Bump samples/versions - reduce image sizes through multi-stage builds

Signed-off-by: Alex Ellis <alexellis2@gmail.com>
This commit is contained in:
Alex Ellis 2017-11-09 10:39:00 +00:00
parent f62309f388
commit a077dd7627
736 changed files with 177333 additions and 185 deletions

View File

@ -120,7 +120,7 @@ services:
# Pass a username as an argument to find how many images user has pushed to Docker Hub.
hubstats:
image: alexellis2/faas-dockerhubstats:latest
image: functions/hubstats:latest
labels:
function: "true"
depends_on:
@ -131,6 +131,11 @@ services:
no_proxy: "gateway"
https_proxy: $https_proxy
deploy:
resources:
limits:
memory: 50M
reservations:
memory: 20M
placement:
constraints:
- 'node.platform.os == linux'
@ -148,6 +153,11 @@ services:
no_proxy: "gateway"
https_proxy: $https_proxy
deploy:
resources:
limits:
memory: 50M
reservations:
memory: 20M
placement:
constraints:
- 'node.platform.os == linux'
@ -166,6 +176,11 @@ services:
no_proxy: "gateway"
https_proxy: $https_proxy
deploy:
resources:
limits:
memory: 50M
reservations:
memory: 20M
placement:
constraints:
- 'node.platform.os == linux'

View File

@ -206,7 +206,7 @@ services:
# Converts body in (markdown format) -> (html)
markdown:
image: alexellis2/faas-markdownrender:latest
image: functions/markdown-render:latest
labels:
function: "true"
depends_on:

View File

@ -1,6 +1,6 @@
FROM alpine:latest
FROM alpine:3.6
ADD https://github.com/openfaas/faas/releases/download/0.6.5/fwatchdog /usr/bin
ADD https://github.com/openfaas/faas/releases/download/0.6.9/fwatchdog /usr/bin
# COPY ./fwatchdog /usr/bin/
RUN chmod +x /usr/bin/fwatchdog

View File

@ -1,6 +1,6 @@
FROM arm64v8/alpine:3.6
ADD https://github.com/alexellis/faas/releases/download/0.6.5/fwatchdog-arm64 /usr/bin/fwatchdog
ADD https://github.com/alexellis/faas/releases/download/0.6.9/fwatchdog-arm64 /usr/bin/fwatchdog
# COPY ./fwatchdog /usr/bin/
RUN chmod +x /usr/bin/fwatchdog

View File

@ -1,16 +1,26 @@
FROM alpine:latest
FROM golang:1.9.2-alpine as builder
WORKDIR /root/
MAINTAINER alex@openfaas.com
ENTRYPOINT []
EXPOSE 8080
ENV http_proxy ""
ENV https_proxy ""
RUN apk --no-cache add make curl \
&& curl -sL https://github.com/openfaas/faas/releases/download/0.6.9/fwatchdog > /usr/bin/fwatchdog \
&& chmod +x /usr/bin/fwatchdog
ADD https://github.com/openfaas/faas/releases/download/0.5.1-alpha/fwatchdog /usr/bin
# COPY fwatchdog /usr/bin/
RUN chmod +x /usr/bin/fwatchdog
WORKDIR /go/src/github.com/openfaas/faas/sample-functions/ApiKeyProtected
COPY app .
COPY handler.go .
# COPY vendor vendor
ENV fprocess="/root/app"
CMD ["fwatchdog"]
RUN go install
FROM alpine:3.6
# Needed to reach the hub
RUN apk --no-cache add ca-certificates
COPY --from=builder /usr/bin/fwatchdog /usr/bin/fwatchdog
COPY --from=builder /go/bin/ApiKeyProtected /usr/bin/ApiKeyProtected
ENV fprocess "/usr/bin/ApiKeyProtected"
CMD ["/usr/bin/fwatchdog"]

View File

@ -1,9 +0,0 @@
FROM golang:1.7.5
RUN mkdir -p /go/src/app
COPY handler.go /go/src/app
WORKDIR /go/src/app
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o app .
CMD ["echo"]

View File

@ -1,12 +0,0 @@
#!/bin/sh
echo Building functions/api-key-protected:build
docker build --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy \
-t functions/api-key-protected . -f Dockerfile.build
docker create --name render_extract functions/api-key-protected
docker cp render_extract:/go/src/app/app ./app
docker rm -f render_extract
echo Building functions/api-key-protected:latest
docker build --no-cache -t functions/api-key-protected:latest .

View File

@ -1,6 +1,6 @@
FROM microsoft/dotnet:sdk
ADD https://github.com/openfaas/faas/releases/download/v0.5-alpha/fwatchdog /usr/bin
ADD https://github.com/openfaas/faas/releases/download/0.6.9/fwatchdog /usr/bin
RUN chmod +x /usr/bin/fwatchdog
ENV DOTNET_CLI_TELEMETRY_OPTOUT 1

View File

@ -8,7 +8,7 @@ COPY . /go/src/github.com/openfaas/faas/sample-functions/golang
RUN go install
ADD https://github.com/openfaas/faas/releases/download/v0.5-alpha/fwatchdog /usr/bin
ADD https://github.com/openfaas/faas/releases/download/0.6.9/fwatchdog /usr/bin
RUN chmod +x /usr/bin/fwatchdog
ENV fprocess "/go/bin/golang"

View File

@ -6,7 +6,7 @@ WORKDIR /go/src/github.com/openfaas/faas/sample-functions/golang
COPY . /go/src/github.com/openfaas/faas/sample-functions/golang
RUN go build
ADD ./watchdog.exe /
ADD https://github.com/openfaas/faas/releases/download/0.6.9/fwatchdog.exe /watchdog.exe
EXPOSE 8080
ENV fprocess="golang.exe"

View File

@ -1,7 +1,7 @@
FROM alpine:latest
FROM alpine:3.6
RUN apk --update add nodejs nodejs-npm
ADD https://github.com/openfaas/faas/releases/download/v0.5-alpha/fwatchdog /usr/bin
ADD https://github.com/openfaas/faas/releases/download/0.6.9/fwatchdog /usr/bin
RUN chmod +x /usr/bin/fwatchdog
COPY package.json .

View File

@ -1,7 +1,7 @@
FROM alpine:latest
FROM alpine:3.6
RUN apk --update add nodejs nodejs-npm
ADD https://github.com/openfaas/faas/releases/download/v0.5-alpha/fwatchdog /usr/bin
ADD https://github.com/openfaas/faas/releases/download/0.6.9/fwatchdog /usr/bin
#COPY ./fwatchdog /usr/bin/
RUN chmod +x /usr/bin/fwatchdog

View File

@ -1,17 +1,26 @@
FROM golang:1.7.5-alpine
MAINTAINER alexellis2@gmail.com
FROM golang:1.9.2-alpine as builder
MAINTAINER alex@openfaas.com
ENTRYPOINT []
RUN apk --no-cache add make
RUN apk --no-cache add make curl \
&& curl -sL https://github.com/openfaas/faas/releases/download/0.6.9/fwatchdog > /usr/bin/fwatchdog \
&& chmod +x /usr/bin/fwatchdog
WORKDIR /go/src/github.com/openfaas/faas/sample-functions/DockerHubStats
COPY . /go/src/github.com/openfaas/faas/sample-functions/DockerHubStats
RUN make
RUN make install
ADD https://github.com/openfaas/faas/releases/download/0.6.0/fwatchdog /usr/bin
RUN chmod +x /usr/bin/fwatchdog
FROM alpine:3.6
# Needed to reach the hub
RUN apk --no-cache add ca-certificates
COPY --from=builder /usr/bin/fwatchdog /usr/bin/fwatchdog
COPY --from=builder /go/bin/DockerHubStats /usr/bin/DockerHubStats
ENV fprocess "/usr/bin/DockerHubStats"
ENV fprocess "/go/bin/DockerHubStats"
HEALTHCHECK --interval=5s CMD [ -e /tmp/.lock ] || exit 1
CMD [ "/usr/bin/fwatchdog"]
CMD ["/usr/bin/fwatchdog"]

View File

@ -1,17 +1,25 @@
FROM alexellis2/go-armhf:1.7.4
FROM golang:1.9.2-alpine as builder
MAINTAINER alexellis2@gmail.com
MAINTAINER alex@openfaas.com
ENTRYPOINT []
RUN apk --no-cache add make
RUN apk --no-cache add make curl \
&& curl -sL https://github.com/openfaas/faas/releases/download/0.6.9/fwatchdog-armhf > /usr/bin/fwatchdog \
&& chmod +x /usr/bin/fwatchdog
WORKDIR /go/src/github.com/openfaas/faas/sample-functions/DockerHubStats
COPY . /go/src/github.com/openfaas/faas/sample-functions/DockerHubStats
RUN make
RUN make install
COPY ./fwatchdog /usr/bin
RUN chmod +x /usr/bin/fwatchdog
FROM alpine:3.6
ENV fprocess "/go/bin/DockerHubStats"
# Needed to reach the hub
RUN apk --no-cache add ca-certificates
CMD [ "/usr/bin/fwatchdog"]
COPY --from=builder /usr/bin/fwatchdog /usr/bin/fwatchdog
COPY --from=builder /go/bin/DockerHubStats /usr/bin/DockerHubStats
ENV fprocess "/usr/bin/DockerHubStats"
CMD ["/usr/bin/fwatchdog"]

View File

@ -1,9 +1,5 @@
.PHONY: install image
.PHONY: install
install:
@go install .
image:
@docker build -t alexellis2/dockerhub-stats .

View File

@ -1,3 +0,0 @@
#!/bin/bash
docker build -t functions/hubstats:latest-armhf .

View File

@ -1,4 +0,0 @@
#!/bin/bash
docker build -t functions/hubstats:latest .

View File

@ -1,7 +1,7 @@
FROM alpine:latest
FROM alpine:3.6
RUN apk --update add nodejs nodejs-npm
ADD https://github.com/openfaas/faas/releases/download/v0.5-alpha/fwatchdog /usr/bin
ADD https://github.com/openfaas/faas/releases/download/0.6.9/fwatchdog /usr/bin
RUN chmod +x /usr/bin/fwatchdog
COPY package.json .

View File

@ -1,16 +1,26 @@
FROM alpine:latest
FROM golang:1.9.2-alpine as builder
WORKDIR /root/
MAINTAINER alex@openfaas.com
ENTRYPOINT []
EXPOSE 8080
ENV http_proxy ""
ENV https_proxy ""
RUN apk --no-cache add make curl \
&& curl -sL https://github.com/openfaas/faas/releases/download/0.6.9/fwatchdog > /usr/bin/fwatchdog \
&& chmod +x /usr/bin/fwatchdog
ADD https://github.com/openfaas/faas/releases/download/v0.5-alpha/fwatchdog /usr/bin
RUN chmod +x /usr/bin/fwatchdog
# COPY fwatchdog /usr/bin/
WORKDIR /go/src/github.com/openfaas/faas/sample-functions/MarkdownRender
COPY app .
COPY handler.go .
COPY vendor vendor
ENV fprocess="/root/app"
CMD ["fwatchdog"]
RUN go install
FROM alpine:3.6
# Needed to reach the hub
RUN apk --no-cache add ca-certificates
COPY --from=builder /usr/bin/fwatchdog /usr/bin/fwatchdog
COPY --from=builder /go/bin/MarkdownRender /usr/bin/MarkdownRender
ENV fprocess "/usr/bin/MarkdownRender"
CMD ["/usr/bin/fwatchdog"]

View File

@ -1,17 +0,0 @@
FROM arm32v6/alpine:3.6
ADD https://github.com/openfaas/faas/releases/download/0.6.0/fwatchdog-armhf /usr/bin/fwatchdog
RUN chmod +x /usr/bin/fwatchdog
WORKDIR /root/
EXPOSE 8080
ENV http_proxy ""
ENV https_proxy ""
COPY app .
ENV fprocess="/root/app"
HEALTHCHECK --interval=5s CMD [ -e /tmp/.lock ] || exit 1
CMD ["fwatchdog"]

View File

@ -1,10 +0,0 @@
FROM golang:1.7.5
RUN mkdir -p /go/src/app
COPY handler.go /go/src/app
WORKDIR /go/src/app
RUN go get github.com/microcosm-cc/bluemonday && \
go get github.com/russross/blackfriday
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o app .
CMD ["echo"]

View File

@ -1,11 +0,0 @@
FROM alexellis2/go-armhf:1.7.4
RUN mkdir -p /go/src/app
COPY handler.go /go/src/app
WORKDIR /go/src/app
RUN go get github.com/microcosm-cc/bluemonday && \
go get github.com/russross/blackfriday
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o app .
CMD ["echo"]

View File

@ -0,0 +1,33 @@
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
[[projects]]
branch = "master"
name = "github.com/microcosm-cc/bluemonday"
packages = ["."]
revision = "68fecaef60268522d2ac3f0123cec9d3bcab7b6e"
[[projects]]
name = "github.com/russross/blackfriday"
packages = ["."]
revision = "cadec560ec52d93835bf2f15bd794700d3a2473b"
version = "v2.0.0"
[[projects]]
branch = "master"
name = "github.com/shurcooL/sanitized_anchor_name"
packages = ["."]
revision = "86672fcb3f950f35f2e675df2240550f2a50762f"
[[projects]]
branch = "master"
name = "golang.org/x/net"
packages = ["html","html/atom"]
revision = "a337091b0525af65de94df2eb7e98bd9962dcbe2"
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "501ddd966c3f040b4158a459f36eeda2818e57897613b965188a4f4b15579034"
solver-name = "gps-cdcl"
solver-version = 1

View File

@ -0,0 +1,30 @@
# Gopkg.toml example
#
# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md
# for detailed Gopkg.toml documentation.
#
# required = ["github.com/user/thing/cmd/thing"]
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
#
# [[constraint]]
# name = "github.com/user/project"
# version = "1.0.0"
#
# [[constraint]]
# name = "github.com/user/project2"
# branch = "dev"
# source = "github.com/myfork/project2"
#
# [[override]]
# name = "github.com/x/y"
# version = "2.4.0"
[[constraint]]
branch = "master"
name = "github.com/microcosm-cc/bluemonday"
[[constraint]]
name = "github.com/russross/blackfriday"
version = "2.0.0"

Binary file not shown.

View File

@ -1,14 +0,0 @@
#!/bin/sh
echo Building functions/markdownrender:build-armhf
docker build --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy \
-t functions/markdownrender:build-armhf \
. -f Dockerfile.build.armhf
docker create --name render_extract functions/markdownrender:build-armhf
docker cp render_extract:/go/src/app/app ./app
docker rm -f render_extract
echo Building functions/markdownrender:latest-armhf
docker build --no-cache -t functions/markdownrender:latest-armhf .\
-f Dockerfile.armhf

View File

@ -1,12 +0,0 @@
#!/bin/sh
echo Building functions/markdownrender:build
docker build --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy \
-t functions/markdownrender . -f Dockerfile.build
docker create --name render_extract functions/markdownrender
docker cp render_extract:/go/src/app/app ./app
docker rm -f render_extract
echo Building functions/markdownrender:latest
docker build --no-cache -t functions/markdownrender:latest .

View File

@ -11,7 +11,7 @@ import (
func main() {
input, _ := ioutil.ReadAll(os.Stdin)
unsafe := blackfriday.MarkdownCommon([]byte(input))
unsafe := blackfriday.Run([]byte(input), blackfriday.WithNoExtensions())
html := bluemonday.UGCPolicy().SanitizeBytes(unsafe)
fmt.Println(string(html))
}

View File

@ -0,0 +1 @@
repo_token: x2wlA1x0X8CK45ybWpZRCVRB4g7vtkhaw

View File

@ -0,0 +1,20 @@
language: go
go:
- 1.1
- 1.2
- 1.3
- 1.4
- 1.5
- 1.6
- 1.7
- 1.8
- 1.9
- tip
matrix:
allow_failures:
- go: tip
fast_finish: true
install:
- go get golang.org/x/net/html
script:
- go test -v ./...

View File

@ -0,0 +1,51 @@
# Contributing to bluemonday
Third-party patches are essential for keeping bluemonday secure and offering the features developers want. However there are a few guidelines that we need contributors to follow so that we can maintain the quality of work that developers who use bluemonday expect.
## Getting Started
* Make sure you have a [Github account](https://github.com/signup/free)
## Guidelines
1. Do not vendor dependencies. As a security package, were we to vendor dependencies the projects that then vendor bluemonday may not receive the latest security updates to the dependencies. By not vendoring dependencies the project that implements bluemonday will vendor the latest version of any dependent packages. Vendoring is a project problem, not a package problem. bluemonday will be tested against the latest version of dependencies periodically and during any PR/merge.
## Submitting an Issue
* Submit a ticket for your issue, assuming one does not already exist
* Clearly describe the issue including the steps to reproduce (with sample input and output) if it is a bug
If you are reporting a security flaw, you may expect that we will provide the code to fix it for you. Otherwise you may want to submit a pull request to ensure the resolution is applied sooner rather than later:
* Fork the repository on Github
* Issue a pull request containing code to resolve the issue
## Submitting a Pull Request
* Submit a ticket for your issue, assuming one does not already exist
* Describe the reason for the pull request and if applicable show some example inputs and outputs to demonstrate what the patch does
* Fork the repository on Github
* Before submitting the pull request you should
1. Include tests for your patch, 1 test should encapsulate the entire patch and should refer to the Github issue
1. If you have added new exposed/public functionality, you should ensure it is documented appropriately
1. If you have added new exposed/public functionality, you should consider demonstrating how to use it within one of the helpers or shipped policies if appropriate or within a test if modifying a helper or policy is not appropriate
1. Run all of the tests `go test -v ./...` or `make test` and ensure all tests pass
1. Run gofmt `gofmt -w ./$*` or `make fmt`
1. Run vet `go tool vet *.go` or `make vet` and resolve any issues
1. Install golint using `go get -u github.com/golang/lint/golint` and run vet `golint *.go` or `make lint` and resolve every warning
* When submitting the pull request you should
1. Note the issue(s) it resolves, i.e. `Closes #6` in the pull request comment to close issue #6 when the pull request is accepted
Once you have submitted a pull request, we *may* merge it without changes. If we have any comments or feedback, or need you to make changes to your pull request we will update the Github pull request or the associated issue. We expect responses from you within two weeks, and we may close the pull request is there is no activity.
### Contributor Licence Agreement
We haven't gone for the formal "Sign a Contributor Licence Agreement" thing that projects like [puppet](https://cla.puppetlabs.com/), [Mojito](https://developer.yahoo.com/cocktails/mojito/cla/) and companies like [Google](http://code.google.com/legal/individual-cla-v1.0.html) are using.
But we do need to know that we can accept and merge your contributions, so for now the act of contributing a pull request should be considered equivalent to agreeing to a contributor licence agreement, specifically:
You accept that the act of submitting code to the bluemonday project is to grant a copyright licence to the project that is perpetual, worldwide, non-exclusive, no-charge, royalty free and irrevocable.
You accept that all who comply with the licence of the project (BSD 3-clause) are permitted to use your contributions to the project.
You accept, and by submitting code do declare, that you have the legal right to grant such a licence to the project and that each of the contributions is your own original creation.

View File

@ -0,0 +1,6 @@
1. John Graham-Cumming http://jgc.org/
1. Mike Samuel mikesamuel@gmail.com
1. Dmitri Shuralyov shurcooL@gmail.com
1. https://github.com/opennota
1. https://github.com/Gufran

View File

@ -0,0 +1,28 @@
Copyright (c) 2014, David Kitchen <david@buro9.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the organisation (Microcosm) nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,42 @@
# Targets:
#
# all: Builds the code locally after testing
#
# fmt: Formats the source files
# build: Builds the code locally
# vet: Vets the code
# lint: Runs lint over the code (you do not need to fix everything)
# test: Runs the tests
# cover: Gives you the URL to a nice test coverage report
#
# install: Builds, tests and installs the code locally
.PHONY: all fmt build vet lint test cover install
# The first target is always the default action if `make` is called without
# args we build and install into $GOPATH so that it can just be run
all: fmt vet test install
fmt:
@gofmt -s -w ./$*
build:
@go build
vet:
@go vet *.go
lint:
@golint *.go
test:
@go test -v ./...
cover: COVERAGE_FILE := coverage.out
cover:
@go test -coverprofile=$(COVERAGE_FILE) && \
cover -html=$(COVERAGE_FILE) && rm $(COVERAGE_FILE)
install:
@go install ./...

View File

@ -0,0 +1,346 @@
# bluemonday [![Build Status](https://travis-ci.org/microcosm-cc/bluemonday.svg?branch=master)](https://travis-ci.org/microcosm-cc/bluemonday) [![GoDoc](https://godoc.org/github.com/microcosm-cc/bluemonday?status.png)](https://godoc.org/github.com/microcosm-cc/bluemonday) [![Sourcegraph](https://sourcegraph.com/github.com/microcosm-cc/bluemonday/-/badge.svg)](https://sourcegraph.com/github.com/microcosm-cc/bluemonday?badge)
bluemonday is a HTML sanitizer implemented in Go. It is fast and highly configurable.
bluemonday takes untrusted user generated content as an input, and will return HTML that has been sanitised against a whitelist of approved HTML elements and attributes so that you can safely include the content in your web page.
If you accept user generated content, and your server uses Go, you **need** bluemonday.
The default policy for user generated content (`bluemonday.UGCPolicy().Sanitize()`) turns this:
```html
Hello <STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A>World
```
Into a harmless:
```html
Hello World
```
And it turns this:
```html
<a href="javascript:alert('XSS1')" onmouseover="alert('XSS2')">XSS<a>
```
Into this:
```html
XSS
```
Whilst still allowing this:
```html
<a href="http://www.google.com/">
<img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/>
</a>
```
To pass through mostly unaltered (it gained a rel="nofollow" which is a good thing for user generated content):
```html
<a href="http://www.google.com/" rel="nofollow">
<img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/>
</a>
```
It protects sites from [XSS](http://en.wikipedia.org/wiki/Cross-site_scripting) attacks. There are many [vectors for an XSS attack](https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet) and the best way to mitigate the risk is to sanitize user input against a known safe list of HTML elements and attributes.
You should **always** run bluemonday **after** any other processing.
If you use [blackfriday](https://github.com/russross/blackfriday) or [Pandoc](http://johnmacfarlane.net/pandoc/) then bluemonday should be run after these steps. This ensures that no insecure HTML is introduced later in your process.
bluemonday is heavily inspired by both the [OWASP Java HTML Sanitizer](https://code.google.com/p/owasp-java-html-sanitizer/) and the [HTML Purifier](http://htmlpurifier.org/).
## Technical Summary
Whitelist based, you need to either build a policy describing the HTML elements and attributes to permit (and the `regexp` patterns of attributes), or use one of the supplied policies representing good defaults.
The policy containing the whitelist is applied using a fast non-validating, forward only, token-based parser implemented in the [Go net/html library](https://godoc.org/golang.org/x/net/html) by the core Go team.
We expect to be supplied with well-formatted HTML (closing elements for every applicable open element, nested correctly) and so we do not focus on repairing badly nested or incomplete HTML. We focus on simply ensuring that whatever elements do exist are described in the policy whitelist and that attributes and links are safe for use on your web page. [GIGO](http://en.wikipedia.org/wiki/Garbage_in,_garbage_out) does apply and if you feed it bad HTML bluemonday is not tasked with figuring out how to make it good again.
### Supported Go Versions
bluemonday is tested against Go 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, and tip.
We do not support Go 1.0 as we depend on `golang.org/x/net/html` which includes a reference to `io.ErrNoProgress` which did not exist in Go 1.0.
## Is it production ready?
*Yes*
We are using bluemonday in production having migrated from the widely used and heavily field tested OWASP Java HTML Sanitizer.
We are passing our extensive test suite (including AntiSamy tests as well as tests for any issues raised). Check for any [unresolved issues](https://github.com/microcosm-cc/bluemonday/issues?page=1&state=open) to see whether anything may be a blocker for you.
We invite pull requests and issues to help us ensure we are offering comprehensive protection against various attacks via user generated content.
## Usage
Install in your `${GOPATH}` using `go get -u github.com/microcosm-cc/bluemonday`
Then call it:
```go
package main
import (
"fmt"
"github.com/microcosm-cc/bluemonday"
)
func main() {
p := bluemonday.UGCPolicy()
html := p.Sanitize(
`<a onblur="alert(secret)" href="http://www.google.com">Google</a>`,
)
// Output:
// <a href="http://www.google.com" rel="nofollow">Google</a>
fmt.Println(html)
}
```
We offer three ways to call Sanitize:
```go
p.Sanitize(string) string
p.SanitizeBytes([]byte) []byte
p.SanitizeReader(io.Reader) bytes.Buffer
```
If you are obsessed about performance, `p.SanitizeReader(r).Bytes()` will return a `[]byte` without performing any unnecessary casting of the inputs or outputs. Though the difference is so negligible you should never need to care.
You can build your own policies:
```go
package main
import (
"fmt"
"github.com/microcosm-cc/bluemonday"
)
func main() {
p := bluemonday.NewPolicy()
// Require URLs to be parseable by net/url.Parse and either:
// mailto: http:// or https://
p.AllowStandardURLs()
// We only allow <p> and <a href="">
p.AllowAttrs("href").OnElements("a")
p.AllowElements("p")
html := p.Sanitize(
`<a onblur="alert(secret)" href="http://www.google.com">Google</a>`,
)
// Output:
// <a href="http://www.google.com">Google</a>
fmt.Println(html)
}
```
We ship two default policies:
1. `bluemonday.StrictPolicy()` which can be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on it's whitelist. An example usage scenario would be blog post titles where HTML tags are not expected at all and if they are then the elements *and* the content of the elements should be stripped. This is a *very* strict policy.
2. `bluemonday.UGCPolicy()` which allows a broad selection of HTML elements and attributes that are safe for user generated content. Note that this policy does *not* whitelist iframes, object, embed, styles, script, etc. An example usage scenario would be blog post bodies where a variety of formatting is expected along with the potential for TABLEs and IMGs.
## Policy Building
The essence of building a policy is to determine which HTML elements and attributes are considered safe for your scenario. OWASP provide an [XSS prevention cheat sheet](https://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet) to help explain the risks, but essentially:
1. Avoid anything other than the standard HTML elements
1. Avoid `script`, `style`, `iframe`, `object`, `embed`, `base` elements that allow code to be executed by the client or third party content to be included that can execute code
1. Avoid anything other than plain HTML attributes with values matched to a regexp
Basically, you should be able to describe what HTML is fine for your scenario. If you do not have confidence that you can describe your policy please consider using one of the shipped policies such as `bluemonday.UGCPolicy()`.
To create a new policy:
```go
p := bluemonday.NewPolicy()
```
To add elements to a policy either add just the elements:
```go
p.AllowElements("b", "strong")
```
Or add elements as a virtue of adding an attribute:
```go
// Not the recommended pattern, see the recommendation on using .Matching() below
p.AllowAttrs("nowrap").OnElements("td", "th")
```
Attributes can either be added to all elements:
```go
p.AllowAttrs("dir").Matching(regexp.MustCompile("(?i)rtl|ltr")).Globally()
```
Or attributes can be added to specific elements:
```go
// Not the recommended pattern, see the recommendation on using .Matching() below
p.AllowAttrs("value").OnElements("li")
```
It is **always** recommended that an attribute be made to match a pattern. XSS in HTML attributes is very easy otherwise:
```go
// \p{L} matches unicode letters, \p{N} matches unicode numbers
p.AllowAttrs("title").Matching(regexp.MustCompile(`[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&]*`)).Globally()
```
You can stop at any time and call .Sanitize():
```go
// string htmlIn passed in from a HTTP POST
htmlOut := p.Sanitize(htmlIn)
```
And you can take any existing policy and extend it:
```go
p := bluemonday.UGCPolicy()
p.AllowElements("fieldset", "select", "option")
```
### Links
Links are difficult beasts to sanitise safely and also one of the biggest attack vectors for malicious content.
It is possible to do this:
```go
p.AllowAttrs("href").Matching(regexp.MustCompile(`(?i)mailto|https?`)).OnElements("a")
```
But that will not protect you as the regular expression is insufficient in this case to have prevented a malformed value doing something unexpected.
We provide some additional global options for safely working with links.
`RequireParseableURLs` will ensure that URLs are parseable by Go's `net/url` package:
```go
p.RequireParseableURLs(true)
```
If you have enabled parseable URLs then the following option will `AllowRelativeURLs`. By default this is disabled (bluemonday is a whitelist tool... you need to explicitly tell us to permit things) and when disabled it will prevent all local and scheme relative URLs (i.e. `href="localpage.html"`, `href="../home.html"` and even `href="//www.google.com"` are relative):
```go
p.AllowRelativeURLs(true)
```
If you have enabled parseable URLs then you can whitelist the schemes (commonly called protocol when thinking of `http` and `https`) that are permitted. Bear in mind that allowing relative URLs in the above option will allow for a blank scheme:
```go
p.AllowURLSchemes("mailto", "http", "https")
```
Regardless of whether you have enabled parseable URLs, you can force all URLs to have a rel="nofollow" attribute. This will be added if it does not exist, but only when the `href` is valid:
```go
// This applies to "a" "area" "link" elements that have a "href" attribute
p.RequireNoFollowOnLinks(true)
```
We provide a convenience method that applies all of the above, but you will still need to whitelist the linkable elements for the URL rules to be applied to:
```go
p.AllowStandardURLs()
p.AllowAttrs("cite").OnElements("blockquote", "q")
p.AllowAttrs("href").OnElements("a", "area")
p.AllowAttrs("src").OnElements("img")
```
An additional complexity regarding links is the data URI as defined in [RFC2397](http://tools.ietf.org/html/rfc2397). The data URI allows for images to be served inline using this format:
```html
<img src="data:image/webp;base64,UklGRh4AAABXRUJQVlA4TBEAAAAvAAAAAAfQ//73v/+BiOh/AAA=">
```
We have provided a helper to verify the mimetype followed by base64 content of data URIs links:
```go
p.AllowDataURIImages()
```
That helper will enable GIF, JPEG, PNG and WEBP images.
It should be noted that there is a potential [security](http://palizine.plynt.com/issues/2010Oct/bypass-xss-filters/) [risk](https://capec.mitre.org/data/definitions/244.html) with the use of data URI links. You should only enable data URI links if you already trust the content.
We also have some features to help deal with user generated content:
```go
p.AddTargetBlankToFullyQualifiedLinks(true)
```
This will ensure that anchor `<a href="" />` links that are fully qualified (the href destination includes a host name) will get `target="_blank"` added to them.
Additionally any link that has `target="_blank"` after the policy has been applied will also have the `rel` attribute adjusted to add `noopener`. This means a link may start like `<a href="//host/path"/>` and will end up as `<a href="//host/path" rel="noopener" target="_blank">`. It is important to note that the addition of `noopener` is a security feature and not an issue. There is an unfortunate feature to browsers that a browser window opened as a result of `target="_blank"` can still control the opener (your web page) and this protects against that. The background to this can be found here: [https://dev.to/ben/the-targetblank-vulnerability-by-example](https://dev.to/ben/the-targetblank-vulnerability-by-example)
### Policy Building Helpers
We also bundle some helpers to simplify policy building:
```go
// Permits the "dir", "id", "lang", "title" attributes globally
p.AllowStandardAttributes()
// Permits the "img" element and it's standard attributes
p.AllowImages()
// Permits ordered and unordered lists, and also definition lists
p.AllowLists()
// Permits HTML tables and all applicable elements and non-styling attributes
p.AllowTables()
```
### Invalid Instructions
The following are invalid:
```go
// This does not say where the attributes are allowed, you need to add
// .Globally() or .OnElements(...)
// This will be ignored without error.
p.AllowAttrs("value")
// This does not say where the attributes are allowed, you need to add
// .Globally() or .OnElements(...)
// This will be ignored without error.
p.AllowAttrs(
"type",
).Matching(
regexp.MustCompile("(?i)^(circle|disc|square|a|A|i|I|1)$"),
)
```
Both examples exhibit the same issue, they declare attributes but do not then specify whether they are whitelisted globally or only on specific elements (and which elements). Attributes belong to one or more elements, and the policy needs to declare this.
## Limitations
We are not yet including any tools to help whitelist and sanitize CSS. Which means that unless you wish to do the heavy lifting in a single regular expression (inadvisable), **you should not allow the "style" attribute anywhere**.
It is not the job of bluemonday to fix your bad HTML, it is merely the job of bluemonday to prevent malicious HTML getting through. If you have mismatched HTML elements, or non-conforming nesting of elements, those will remain. But if you have well-structured HTML bluemonday will not break it.
## TODO
* Add support for CSS sanitisation to allow some CSS properties based on a whitelist, possibly using the [Gorilla CSS3 scanner](http://www.gorillatoolkit.org/pkg/css/scanner)
* Investigate whether devs want to blacklist elements and attributes. This would allow devs to take an existing policy (such as the `bluemonday.UGCPolicy()` ) that encapsulates 90% of what they're looking for but does more than they need, and to remove the extra things they do not want to make it 100% what they want
* Investigate whether devs want a validating HTML mode, in which the HTML elements are not just transformed into a balanced tree (every start tag has a closing tag at the correct depth) but also that elements and character data appear only in their allowed context (i.e. that a `table` element isn't a descendent of a `caption`, that `colgroup`, `thead`, `tbody`, `tfoot` and `tr` are permitted, and that character data is not permitted)
## Development
If you have cloned this repo you will probably need the dependency:
`go get golang.org/x/net/html`
Gophers can use their familiar tools:
`go build`
`go test`
I personally use a Makefile as it spares typing the same args over and over whilst providing consistency for those of us who jump from language to language and enjoy just typing `make` in a project directory and watch magic happen.
`make` will build, vet, test and install the library.
`make clean` will remove the library from a *single* `${GOPATH}/pkg` directory tree
`make test` will run the tests
`make cover` will run the tests and *open a browser window* with the coverage report
`make lint` will run golint (install via `go get github.com/golang/lint/golint`)
## Long term goals
1. Open the code to adversarial peer review similar to the [Attack Review Ground Rules](https://code.google.com/p/owasp-java-html-sanitizer/wiki/AttackReviewGroundRules)
1. Raise funds and pay for an external security review

View File

@ -0,0 +1,12 @@
/*
Package main demonstrates a HTML email cleaner.
It should be noted that this uses bluemonday to sanitize the HTML but as it
preserves the styling of the email this should not be considered a safe or XSS
secure approach.
It does function as a basic demonstration of how to take HTML emails, which are
notorious for having inconsistent, obselete and poorly formatted HTML, and to
use bluemonday to normalise the output.
*/
package main

View File

@ -0,0 +1,77 @@
package main
import (
"fmt"
"io/ioutil"
"log"
"os"
"regexp"
"github.com/microcosm-cc/bluemonday"
)
var (
// Color is a valid hex color or name of a web safe color
Color = regexp.MustCompile(`(?i)^(#[0-9a-fA-F]{1,6}|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua|orange|aliceblue|antiquewhite|aquamarine|azure|beige|bisque|blanchedalmond|blueviolet|brown|burlywood|cadetblue|chartreuse|chocolate|coral|cornflowerblue|cornsilk|crimson|darkblue|darkcyan|darkgoldenrod|darkgray|darkgreen|darkgrey|darkkhaki|darkmagenta|darkolivegreen|darkorange|darkorchid|darkred|darksalmon|darkseagreen|darkslateblue|darkslategray|darkslategrey|darkturquoise|darkviolet|deeppink|deepskyblue|dimgray|dimgrey|dodgerblue|firebrick|floralwhite|forestgreen|gainsboro|ghostwhite|gold|goldenrod|greenyellow|grey|honeydew|hotpink|indianred|indigo|ivory|khaki|lavender|lavenderblush|lawngreen|lemonchiffon|lightblue|lightcoral|lightcyan|lightgoldenrodyellow|lightgray|lightgreen|lightgrey|lightpink|lightsalmon|lightseagreen|lightskyblue|lightslategray|lightslategrey|lightsteelblue|lightyellow|limegreen|linen|mediumaquamarine|mediumblue|mediumorchid|mediumpurple|mediumseagreen|mediumslateblue|mediumspringgreen|mediumturquoise|mediumvioletred|midnightblue|mintcream|mistyrose|moccasin|navajowhite|oldlace|olivedrab|orangered|orchid|palegoldenrod|palegreen|paleturquoise|palevioletred|papayawhip|peachpuff|peru|pink|plum|powderblue|rosybrown|royalblue|saddlebrown|salmon|sandybrown|seagreen|seashell|sienna|skyblue|slateblue|slategray|slategrey|snow|springgreen|steelblue|tan|thistle|tomato|turquoise|violet|wheat|whitesmoke|yellowgreen|rebeccapurple)$`)
// ButtonType is a button type, or a style type, i.e. "submit"
ButtonType = regexp.MustCompile(`(?i)^[a-zA-Z][a-zA-Z-]{1,30}[a-zA-Z]$`)
// StyleType is the valid type attribute on a style tag in the <head>
StyleType = regexp.MustCompile(`(?i)^text\/css$`)
)
func main() {
// Define a policy, we are using the UGC policy as a base.
p := bluemonday.UGCPolicy()
// HTML email is often displayed in iframes and needs to preserve core
// structure
p.AllowDocType(true)
p.AllowElements("html", "head", "body", "title")
// There are not safe, and is only being done here to demonstrate how to
// process HTML emails where styling has to be preserved. This is at the
// expense of security.
p.AllowAttrs("type").Matching(StyleType).OnElements("style")
p.AllowAttrs("style").Globally()
// HTML email frequently contains obselete and basic HTML
p.AllowElements("font", "main", "nav", "header", "footer", "kbd", "legend")
// Need to permit the style tag, and buttons are often found in emails (why?)
p.AllowAttrs("type").Matching(ButtonType).OnElements("button")
// HTML email tends to see the use of obselete spacing and styling attributes
p.AllowAttrs("bgcolor", "color").Matching(Color).OnElements("basefont", "font", "hr")
p.AllowAttrs("border").Matching(bluemonday.Integer).OnElements("img", "table")
p.AllowAttrs("cellpadding", "cellspacing").Matching(bluemonday.Integer).OnElements("table")
// Allow "class" attributes on all elements
p.AllowStyling()
// Allow images to be embedded via data-uri
p.AllowDataURIImages()
// Add "rel=nofollow" to links
p.RequireNoFollowOnLinks(true)
p.RequireNoFollowOnFullyQualifiedLinks(true)
// Open external links in a new window/tab
p.AddTargetBlankToFullyQualifiedLinks(true)
// Read input from stdin so that this is a nice unix utility and can receive
// piped input
dirty, err := ioutil.ReadAll(os.Stdin)
if err != nil {
log.Fatal(err)
}
// Apply the policy and write to stdout
fmt.Fprint(
os.Stdout,
p.Sanitize(
string(dirty),
),
)
}

View File

@ -0,0 +1,13 @@
/*
Package main demonstrates a simple user generated content sanitizer.
This is the configuration I use on the sites that I run, it allows a lot of safe
HTML that in my case comes from the blackfriday markdown package. As markdown
itself allows HTML the UGCPolicy includes most common HTML.
CSS and JavaScript is excluded (not white-listed), as are form elements and most
embedded media that isn't just an image or image map.
As I'm paranoid, I also do not allow data-uri images and embeds.
*/
package main

View File

@ -0,0 +1,37 @@
package main
import (
"fmt"
"io/ioutil"
"log"
"os"
"github.com/microcosm-cc/bluemonday"
)
func main() {
// Define a policy, we are using the UGC policy as a base.
p := bluemonday.UGCPolicy()
// Add "rel=nofollow" to links
p.RequireNoFollowOnLinks(true)
p.RequireNoFollowOnFullyQualifiedLinks(true)
// Open external links in a new window/tab
p.AddTargetBlankToFullyQualifiedLinks(true)
// Read input from stdin so that this is a nice unix utility and can receive
// piped input
dirty, err := ioutil.ReadAll(os.Stdin)
if err != nil {
log.Fatal(err)
}
// Apply the policy and write to stdout
fmt.Fprint(
os.Stdout,
p.Sanitize(
string(dirty),
),
)
}

View File

@ -0,0 +1,104 @@
// Copyright (c) 2014, David Kitchen <david@buro9.com>
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * Neither the name of the organisation (Microcosm) nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*
Package bluemonday provides a way of describing a whitelist of HTML elements
and attributes as a policy, and for that policy to be applied to untrusted
strings from users that may contain markup. All elements and attributes not on
the whitelist will be stripped.
The default bluemonday.UGCPolicy().Sanitize() turns this:
Hello <STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A>World
Into the more harmless:
Hello World
And it turns this:
<a href="javascript:alert('XSS1')" onmouseover="alert('XSS2')">XSS<a>
Into this:
XSS
Whilst still allowing this:
<a href="http://www.google.com/">
<img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/>
</a>
To pass through mostly unaltered (it gained a rel="nofollow"):
<a href="http://www.google.com/" rel="nofollow">
<img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/>
</a>
The primary purpose of bluemonday is to take potentially unsafe user generated
content (from things like Markdown, HTML WYSIWYG tools, etc) and make it safe
for you to put on your website.
It protects sites against XSS (http://en.wikipedia.org/wiki/Cross-site_scripting)
and other malicious content that a user interface may deliver. There are many
vectors for an XSS attack (https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet)
and the safest thing to do is to sanitize user input against a known safe list
of HTML elements and attributes.
Note: You should always run bluemonday after any other processing.
If you use blackfriday (https://github.com/russross/blackfriday) or
Pandoc (http://johnmacfarlane.net/pandoc/) then bluemonday should be run after
these steps. This ensures that no insecure HTML is introduced later in your
process.
bluemonday is heavily inspired by both the OWASP Java HTML Sanitizer
(https://code.google.com/p/owasp-java-html-sanitizer/) and the HTML Purifier
(http://htmlpurifier.org/).
We ship two default policies, one is bluemonday.StrictPolicy() and can be
thought of as equivalent to stripping all HTML elements and their attributes as
it has nothing on it's whitelist.
The other is bluemonday.UGCPolicy() and allows a broad selection of HTML
elements and attributes that are safe for user generated content. Note that
this policy does not whitelist iframes, object, embed, styles, script, etc.
The essence of building a policy is to determine which HTML elements and
attributes are considered safe for your scenario. OWASP provide an XSS
prevention cheat sheet ( https://www.google.com/search?q=xss+prevention+cheat+sheet )
to help explain the risks, but essentially:
1. Avoid whitelisting anything other than plain HTML elements
2. Avoid whitelisting `script`, `style`, `iframe`, `object`, `embed`, `base`
elements
3. Avoid whitelisting anything other than plain HTML elements with simple
values that you can match to a regexp
*/
package bluemonday

View File

@ -0,0 +1,240 @@
// Copyright (c) 2014, David Kitchen <david@buro9.com>
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * Neither the name of the organisation (Microcosm) nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package bluemonday_test
import (
"fmt"
"regexp"
"strings"
"github.com/microcosm-cc/bluemonday"
)
func Example() {
// Create a new policy
p := bluemonday.NewPolicy()
// Add elements to a policy without attributes
p.AllowElements("b", "strong")
// Add elements as a virtue of adding an attribute
p.AllowAttrs("nowrap").OnElements("td", "th")
// Attributes can either be added to all elements
p.AllowAttrs("dir").Globally()
//Or attributes can be added to specific elements
p.AllowAttrs("value").OnElements("li")
// It is ALWAYS recommended that an attribute be made to match a pattern
// XSS in HTML attributes is a very easy attack vector
// \p{L} matches unicode letters, \p{N} matches unicode numbers
p.AllowAttrs("title").Matching(regexp.MustCompile(`[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&]*`)).Globally()
// You can stop at any time and call .Sanitize()
// Assumes that string htmlIn was passed in from a HTTP POST and contains
// untrusted user generated content
htmlIn := `untrusted user generated content <body onload="alert('XSS')">`
fmt.Println(p.Sanitize(htmlIn))
// And you can take any existing policy and extend it
p = bluemonday.UGCPolicy()
p.AllowElements("fieldset", "select", "option")
// Links are complex beasts and one of the biggest attack vectors for
// malicious content so we have included features specifically to help here.
// This is not recommended:
p = bluemonday.NewPolicy()
p.AllowAttrs("href").Matching(regexp.MustCompile(`(?i)mailto|https?`)).OnElements("a")
// The regexp is insufficient in this case to have prevented a malformed
// value doing something unexpected.
// This will ensure that URLs are not considered invalid by Go's net/url
// package.
p.RequireParseableURLs(true)
// If you have enabled parseable URLs then the following option will allow
// relative URLs. By default this is disabled and will prevent all local and
// schema relative URLs (i.e. `href="//www.google.com"` is schema relative).
p.AllowRelativeURLs(true)
// If you have enabled parseable URLs then you can whitelist the schemas
// that are permitted. Bear in mind that allowing relative URLs in the above
// option allows for blank schemas.
p.AllowURLSchemes("mailto", "http", "https")
// Regardless of whether you have enabled parseable URLs, you can force all
// URLs to have a rel="nofollow" attribute. This will be added if it does
// not exist.
// This applies to "a" "area" "link" elements that have a "href" attribute
p.RequireNoFollowOnLinks(true)
// We provide a convenience function that applies all of the above, but you
// will still need to whitelist the linkable elements:
p = bluemonday.NewPolicy()
p.AllowStandardURLs()
p.AllowAttrs("cite").OnElements("blockquote")
p.AllowAttrs("href").OnElements("a", "area")
p.AllowAttrs("src").OnElements("img")
// Policy Building Helpers
// If you've got this far and you're bored already, we also bundle some
// other convenience functions
p = bluemonday.NewPolicy()
p.AllowStandardAttributes()
p.AllowImages()
p.AllowLists()
p.AllowTables()
}
func ExampleNewPolicy() {
// NewPolicy is a blank policy and we need to explicitly whitelist anything
// that we wish to allow through
p := bluemonday.NewPolicy()
// We ensure any URLs are parseable and have rel="nofollow" where applicable
p.AllowStandardURLs()
// AllowStandardURLs already ensures that the href will be valid, and so we
// can skip the .Matching()
p.AllowAttrs("href").OnElements("a")
// We allow paragraphs too
p.AllowElements("p")
html := p.Sanitize(
`<p><a onblur="alert(secret)" href="http://www.google.com">Google</a></p>`,
)
fmt.Println(html)
// Output:
//<p><a href="http://www.google.com" rel="nofollow">Google</a></p>
}
func ExampleStrictPolicy() {
// StrictPolicy is equivalent to NewPolicy and as nothing else is declared
// we are stripping all elements (and their attributes)
p := bluemonday.StrictPolicy()
html := p.Sanitize(
`Goodbye <a onblur="alert(secret)" href="http://en.wikipedia.org/wiki/Goodbye_Cruel_World_(Pink_Floyd_song)">Cruel</a> World`,
)
fmt.Println(html)
// Output:
//Goodbye Cruel World
}
func ExampleUGCPolicy() {
// UGCPolicy is a convenience policy for user generated content.
p := bluemonday.UGCPolicy()
html := p.Sanitize(
`<a onblur="alert(secret)" href="http://www.google.com">Google</a>`,
)
fmt.Println(html)
// Output:
//<a href="http://www.google.com" rel="nofollow">Google</a>
}
func ExamplePolicy_AllowAttrs() {
p := bluemonday.NewPolicy()
// Allow the 'title' attribute on every HTML element that has been
// whitelisted
p.AllowAttrs("title").Matching(bluemonday.Paragraph).Globally()
// Allow the 'abbr' attribute on only the 'td' and 'th' elements.
p.AllowAttrs("abbr").Matching(bluemonday.Paragraph).OnElements("td", "th")
// Allow the 'colspan' and 'rowspan' attributes, matching a positive integer
// pattern, on only the 'td' and 'th' elements.
p.AllowAttrs("colspan", "rowspan").Matching(
bluemonday.Integer,
).OnElements("td", "th")
}
func ExamplePolicy_AllowElements() {
p := bluemonday.NewPolicy()
// Allow styling elements without attributes
p.AllowElements("br", "div", "hr", "p", "span")
}
func ExamplePolicy_Sanitize() {
// UGCPolicy is a convenience policy for user generated content.
p := bluemonday.UGCPolicy()
// string in, string out
html := p.Sanitize(`<a onblur="alert(secret)" href="http://www.google.com">Google</a>`)
fmt.Println(html)
// Output:
//<a href="http://www.google.com" rel="nofollow">Google</a>
}
func ExamplePolicy_SanitizeBytes() {
// UGCPolicy is a convenience policy for user generated content.
p := bluemonday.UGCPolicy()
// []byte in, []byte out
b := []byte(`<a onblur="alert(secret)" href="http://www.google.com">Google</a>`)
b = p.SanitizeBytes(b)
fmt.Println(string(b))
// Output:
//<a href="http://www.google.com" rel="nofollow">Google</a>
}
func ExamplePolicy_SanitizeReader() {
// UGCPolicy is a convenience policy for user generated content.
p := bluemonday.UGCPolicy()
// io.Reader in, bytes.Buffer out
r := strings.NewReader(`<a onblur="alert(secret)" href="http://www.google.com">Google</a>`)
buf := p.SanitizeReader(r)
fmt.Println(buf.String())
// Output:
//<a href="http://www.google.com" rel="nofollow">Google</a>
}

View File

@ -0,0 +1,297 @@
// Copyright (c) 2014, David Kitchen <david@buro9.com>
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * Neither the name of the organisation (Microcosm) nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package bluemonday
import (
"encoding/base64"
"net/url"
"regexp"
)
// A selection of regular expressions that can be used as .Matching() rules on
// HTML attributes.
var (
// CellAlign handles the `align` attribute
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-align
CellAlign = regexp.MustCompile(`(?i)^(center|justify|left|right|char)$`)
// CellVerticalAlign handles the `valign` attribute
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-valign
CellVerticalAlign = regexp.MustCompile(`(?i)^(baseline|bottom|middle|top)$`)
// Direction handles the `dir` attribute
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/bdo#attr-dir
Direction = regexp.MustCompile(`(?i)^(rtl|ltr)$`)
// ImageAlign handles the `align` attribute on the `image` tag
// http://www.w3.org/MarkUp/Test/Img/imgtest.html
ImageAlign = regexp.MustCompile(
`(?i)^(left|right|top|texttop|middle|absmiddle|baseline|bottom|absbottom)$`,
)
// Integer describes whole positive integers (including 0) used in places
// like td.colspan
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-colspan
Integer = regexp.MustCompile(`^[0-9]+$`)
// ISO8601 according to the W3 group is only a subset of the ISO8601
// standard: http://www.w3.org/TR/NOTE-datetime
//
// Used in places like time.datetime
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/time#attr-datetime
//
// Matches patterns:
// Year:
// YYYY (eg 1997)
// Year and month:
// YYYY-MM (eg 1997-07)
// Complete date:
// YYYY-MM-DD (eg 1997-07-16)
// Complete date plus hours and minutes:
// YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00)
// Complete date plus hours, minutes and seconds:
// YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)
// Complete date plus hours, minutes, seconds and a decimal fraction of a
// second
// YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
ISO8601 = regexp.MustCompile(
`^[0-9]{4}(-[0-9]{2}(-[0-9]{2}([ T][0-9]{2}(:[0-9]{2}){1,2}(.[0-9]{1,6})` +
`?Z?([\+-][0-9]{2}:[0-9]{2})?)?)?)?$`,
)
// ListType encapsulates the common value as well as the latest spec
// values for lists
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ol#attr-type
ListType = regexp.MustCompile(`(?i)^(circle|disc|square|a|A|i|I|1)$`)
// SpaceSeparatedTokens is used in places like `a.rel` and the common attribute
// `class` which both contain space delimited lists of data tokens
// http://www.w3.org/TR/html-markup/datatypes.html#common.data.tokens-def
// Regexp: \p{L} matches unicode letters, \p{N} matches unicode numbers
SpaceSeparatedTokens = regexp.MustCompile(`^([\s\p{L}\p{N}_-]+)$`)
// Number is a double value used on HTML5 meter and progress elements
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-button-element.html#the-meter-element
Number = regexp.MustCompile(`^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$`)
// NumberOrPercent is used predominantly as units of measurement in width
// and height attributes
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#attr-height
NumberOrPercent = regexp.MustCompile(`^[0-9]+[%]?$`)
// Paragraph of text in an attribute such as *.'title', img.alt, etc
// https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes#attr-title
// Note that we are not allowing chars that could close tags like '>'
Paragraph = regexp.MustCompile(`^[\p{L}\p{N}\s\-_',\[\]!\./\\\(\)]*$`)
// dataURIImagePrefix is used by AllowDataURIImages to define the acceptable
// prefix of data URIs that contain common web image formats.
//
// This is not exported as it's not useful by itself, and only has value
// within the AllowDataURIImages func
dataURIImagePrefix = regexp.MustCompile(
`^image/(gif|jpeg|png|webp);base64,`,
)
)
// AllowStandardURLs is a convenience function that will enable rel="nofollow"
// on "a", "area" and "link" (if you have allowed those elements) and will
// ensure that the URL values are parseable and either relative or belong to the
// "mailto", "http", or "https" schemes
func (p *Policy) AllowStandardURLs() {
// URLs must be parseable by net/url.Parse()
p.RequireParseableURLs(true)
// !url.IsAbs() is permitted
p.AllowRelativeURLs(true)
// Most common URL schemes only
p.AllowURLSchemes("mailto", "http", "https")
// For all anchors we will add rel="nofollow" if it does not already exist
// This applies to "a" "area" "link"
p.RequireNoFollowOnLinks(true)
}
// AllowStandardAttributes will enable "id", "title" and the language specific
// attributes "dir" and "lang" on all elements that are whitelisted
func (p *Policy) AllowStandardAttributes() {
// "dir" "lang" are permitted as both language attributes affect charsets
// and direction of text.
p.AllowAttrs("dir").Matching(Direction).Globally()
p.AllowAttrs(
"lang",
).Matching(regexp.MustCompile(`[a-zA-Z]{2,20}`)).Globally()
// "id" is permitted. This is pretty much as some HTML elements require this
// to work well ("dfn" is an example of a "id" being value)
// This does create a risk that JavaScript and CSS within your web page
// might identify the wrong elements. Ensure that you select things
// accurately
p.AllowAttrs("id").Matching(
regexp.MustCompile(`[a-zA-Z0-9\:\-_\.]+`),
).Globally()
// "title" is permitted as it improves accessibility.
p.AllowAttrs("title").Matching(Paragraph).Globally()
}
// AllowStyling presently enables the class attribute globally.
//
// Note: When bluemonday ships a CSS parser and we can safely sanitise that,
// this will also allow sanitized styling of elements via the style attribute.
func (p *Policy) AllowStyling() {
// "class" is permitted globally
p.AllowAttrs("class").Matching(SpaceSeparatedTokens).Globally()
}
// AllowImages enables the img element and some popular attributes. It will also
// ensure that URL values are parseable. This helper does not enable data URI
// images, for that you should also use the AllowDataURIImages() helper.
func (p *Policy) AllowImages() {
// "img" is permitted
p.AllowAttrs("align").Matching(ImageAlign).OnElements("img")
p.AllowAttrs("alt").Matching(Paragraph).OnElements("img")
p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("img")
// Standard URLs enabled
p.AllowStandardURLs()
p.AllowAttrs("src").OnElements("img")
}
// AllowDataURIImages permits the use of inline images defined in RFC2397
// http://tools.ietf.org/html/rfc2397
// http://en.wikipedia.org/wiki/Data_URI_scheme
//
// Images must have a mimetype matching:
// image/gif
// image/jpeg
// image/png
// image/webp
//
// NOTE: There is a potential security risk to allowing data URIs and you should
// only permit them on content you already trust.
// http://palizine.plynt.com/issues/2010Oct/bypass-xss-filters/
// https://capec.mitre.org/data/definitions/244.html
func (p *Policy) AllowDataURIImages() {
// URLs must be parseable by net/url.Parse()
p.RequireParseableURLs(true)
// Supply a function to validate images contained within data URI
p.AllowURLSchemeWithCustomPolicy(
"data",
func(url *url.URL) (allowUrl bool) {
if url.RawQuery != "" || url.Fragment != "" {
return false
}
matched := dataURIImagePrefix.FindString(url.Opaque)
if matched == "" {
return false
}
_, err := base64.StdEncoding.DecodeString(url.Opaque[len(matched):])
if err != nil {
return false
}
return true
},
)
}
// AllowLists will enabled ordered and unordered lists, as well as definition
// lists
func (p *Policy) AllowLists() {
// "ol" "ul" are permitted
p.AllowAttrs("type").Matching(ListType).OnElements("ol", "ul")
// "li" is permitted
p.AllowAttrs("type").Matching(ListType).OnElements("li")
p.AllowAttrs("value").Matching(Integer).OnElements("li")
// "dl" "dt" "dd" are permitted
p.AllowElements("dl", "dt", "dd")
}
// AllowTables will enable a rich set of elements and attributes to describe
// HTML tables
func (p *Policy) AllowTables() {
// "table" is permitted
p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("table")
p.AllowAttrs("summary").Matching(Paragraph).OnElements("table")
// "caption" is permitted
p.AllowElements("caption")
// "col" "colgroup" are permitted
p.AllowAttrs("align").Matching(CellAlign).OnElements("col", "colgroup")
p.AllowAttrs("height", "width").Matching(
NumberOrPercent,
).OnElements("col", "colgroup")
p.AllowAttrs("span").Matching(Integer).OnElements("colgroup", "col")
p.AllowAttrs("valign").Matching(
CellVerticalAlign,
).OnElements("col", "colgroup")
// "thead" "tr" are permitted
p.AllowAttrs("align").Matching(CellAlign).OnElements("thead", "tr")
p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("thead", "tr")
// "td" "th" are permitted
p.AllowAttrs("abbr").Matching(Paragraph).OnElements("td", "th")
p.AllowAttrs("align").Matching(CellAlign).OnElements("td", "th")
p.AllowAttrs("colspan", "rowspan").Matching(Integer).OnElements("td", "th")
p.AllowAttrs("headers").Matching(
SpaceSeparatedTokens,
).OnElements("td", "th")
p.AllowAttrs("height", "width").Matching(
NumberOrPercent,
).OnElements("td", "th")
p.AllowAttrs(
"scope",
).Matching(
regexp.MustCompile(`(?i)(?:row|col)(?:group)?`),
).OnElements("td", "th")
p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("td", "th")
p.AllowAttrs("nowrap").Matching(
regexp.MustCompile(`(?i)|nowrap`),
).OnElements("td", "th")
// "tbody" "tfoot"
p.AllowAttrs("align").Matching(CellAlign).OnElements("tbody", "tfoot")
p.AllowAttrs("valign").Matching(
CellVerticalAlign,
).OnElements("tbody", "tfoot")
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,253 @@
// Copyright (c) 2014, David Kitchen <david@buro9.com>
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * Neither the name of the organisation (Microcosm) nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package bluemonday
import (
"regexp"
)
// StrictPolicy returns an empty policy, which will effectively strip all HTML
// elements and their attributes from a document.
func StrictPolicy() *Policy {
return NewPolicy()
}
// StripTagsPolicy is DEPRECATED. Use StrictPolicy instead.
func StripTagsPolicy() *Policy {
return StrictPolicy()
}
// UGCPolicy returns a policy aimed at user generated content that is a result
// of HTML WYSIWYG tools and Markdown conversions.
//
// This is expected to be a fairly rich document where as much markup as
// possible should be retained. Markdown permits raw HTML so we are basically
// providing a policy to sanitise HTML5 documents safely but with the
// least intrusion on the formatting expectations of the user.
func UGCPolicy() *Policy {
p := NewPolicy()
///////////////////////
// Global attributes //
///////////////////////
// "class" is not permitted as we are not allowing users to style their own
// content
p.AllowStandardAttributes()
//////////////////////////////
// Global URL format policy //
//////////////////////////////
p.AllowStandardURLs()
////////////////////////////////
// Declarations and structure //
////////////////////////////////
// "xml" "xslt" "DOCTYPE" "html" "head" are not permitted as we are
// expecting user generated content to be a fragment of HTML and not a full
// document.
//////////////////////////
// Sectioning root tags //
//////////////////////////
// "article" and "aside" are permitted and takes no attributes
p.AllowElements("article", "aside")
// "body" is not permitted as we are expecting user generated content to be a fragment
// of HTML and not a full document.
// "details" is permitted, including the "open" attribute which can either
// be blank or the value "open".
p.AllowAttrs(
"open",
).Matching(regexp.MustCompile(`(?i)^(|open)$`)).OnElements("details")
// "fieldset" is not permitted as we are not allowing forms to be created.
// "figure" is permitted and takes no attributes
p.AllowElements("figure")
// "nav" is not permitted as it is assumed that the site (and not the user)
// has defined navigation elements
// "section" is permitted and takes no attributes
p.AllowElements("section")
// "summary" is permitted and takes no attributes
p.AllowElements("summary")
//////////////////////////
// Headings and footers //
//////////////////////////
// "footer" is not permitted as we expect user content to be a fragment and
// not structural to this extent
// "h1" through "h6" are permitted and take no attributes
p.AllowElements("h1", "h2", "h3", "h4", "h5", "h6")
// "header" is not permitted as we expect user content to be a fragment and
// not structural to this extent
// "hgroup" is permitted and takes no attributes
p.AllowElements("hgroup")
/////////////////////////////////////
// Content grouping and separating //
/////////////////////////////////////
// "blockquote" is permitted, including the "cite" attribute which must be
// a standard URL.
p.AllowAttrs("cite").OnElements("blockquote")
// "br" "div" "hr" "p" "span" "wbr" are permitted and take no attributes
p.AllowElements("br", "div", "hr", "p", "span", "wbr")
///////////
// Links //
///////////
// "a" is permitted
p.AllowAttrs("href").OnElements("a")
// "area" is permitted along with the attributes that map image maps work
p.AllowAttrs("name").Matching(
regexp.MustCompile(`^([\p{L}\p{N}_-]+)$`),
).OnElements("map")
p.AllowAttrs("alt").Matching(Paragraph).OnElements("area")
p.AllowAttrs("coords").Matching(
regexp.MustCompile(`^([0-9]+,)+[0-9]+$`),
).OnElements("area")
p.AllowAttrs("href").OnElements("area")
p.AllowAttrs("rel").Matching(SpaceSeparatedTokens).OnElements("area")
p.AllowAttrs("shape").Matching(
regexp.MustCompile(`(?i)^(default|circle|rect|poly)$`),
).OnElements("area")
p.AllowAttrs("usemap").Matching(
regexp.MustCompile(`(?i)^#[\p{L}\p{N}_-]+$`),
).OnElements("img")
// "link" is not permitted
/////////////////////
// Phrase elements //
/////////////////////
// The following are all inline phrasing elements
p.AllowElements("abbr", "acronym", "cite", "code", "dfn", "em",
"figcaption", "mark", "s", "samp", "strong", "sub", "sup", "var")
// "q" is permitted and "cite" is a URL and handled by URL policies
p.AllowAttrs("cite").OnElements("q")
// "time" is permitted
p.AllowAttrs("datetime").Matching(ISO8601).OnElements("time")
////////////////////
// Style elements //
////////////////////
// block and inline elements that impart no semantic meaning but style the
// document
p.AllowElements("b", "i", "pre", "small", "strike", "tt", "u")
// "style" is not permitted as we are not yet sanitising CSS and it is an
// XSS attack vector
//////////////////////
// HTML5 Formatting //
//////////////////////
// "bdi" "bdo" are permitted
p.AllowAttrs("dir").Matching(Direction).OnElements("bdi", "bdo")
// "rp" "rt" "ruby" are permitted
p.AllowElements("rp", "rt", "ruby")
///////////////////////////
// HTML5 Change tracking //
///////////////////////////
// "del" "ins" are permitted
p.AllowAttrs("cite").Matching(Paragraph).OnElements("del", "ins")
p.AllowAttrs("datetime").Matching(ISO8601).OnElements("del", "ins")
///////////
// Lists //
///////////
p.AllowLists()
////////////
// Tables //
////////////
p.AllowTables()
///////////
// Forms //
///////////
// By and large, forms are not permitted. However there are some form
// elements that can be used to present data, and we do permit those
//
// "button" "fieldset" "input" "keygen" "label" "output" "select" "datalist"
// "textarea" "optgroup" "option" are all not permitted
// "meter" is permitted
p.AllowAttrs(
"value",
"min",
"max",
"low",
"high",
"optimum",
).Matching(Number).OnElements("meter")
// "progress" is permitted
p.AllowAttrs("value", "max").Matching(Number).OnElements("progress")
//////////////////////
// Embedded content //
//////////////////////
// Vast majority not permitted
// "audio" "canvas" "embed" "iframe" "object" "param" "source" "svg" "track"
// "video" are all not permitted
p.AllowImages()
return p
}

View File

@ -0,0 +1,203 @@
// Copyright (c) 2014, David Kitchen <david@buro9.com>
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * Neither the name of the organisation (Microcosm) nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package bluemonday
import "testing"
func TestStrictPolicy(t *testing.T) {
p := StrictPolicy()
tests := []test{
{
in: "Hello, <b>World</b>!",
expected: "Hello, World!",
},
{
in: "<blockquote>Hello, <b>World</b>!",
expected: "Hello, World!",
},
{ // Real world example from a message board
in: `<quietly>email me - addy in profile</quiet>`,
expected: `email me - addy in profile`,
},
{},
}
for ii, test := range tests {
out := p.Sanitize(test.in)
if out != test.expected {
t.Errorf(
"test %d failed;\ninput : %s\noutput : %s\nexpected: %s",
ii,
test.in,
out,
test.expected,
)
}
}
}
func TestUGCPolicy(t *testing.T) {
tests := []test{
// Simple formatting
{in: "Hello, World!", expected: "Hello, World!"},
{in: "Hello, <b>World</b>!", expected: "Hello, <b>World</b>!"},
// Blocks and formatting
{
in: "<p>Hello, <b onclick=alert(1337)>World</b>!</p>",
expected: "<p>Hello, <b>World</b>!</p>",
},
{
in: "<p onclick=alert(1337)>Hello, <b>World</b>!</p>",
expected: "<p>Hello, <b>World</b>!</p>",
},
// Inline tags featuring globals
{
in: `<a href="http://example.org/" rel="nofollow">Hello, <b>World</b></a><a href="https://example.org/#!" rel="nofollow">!</a>`,
expected: `<a href="http://example.org/" rel="nofollow">Hello, <b>World</b></a><a href="https://example.org/#%21" rel="nofollow">!</a>`,
},
{
in: `Hello, <b>World</b><a title="!" href="https://example.org/#!" rel="nofollow">!</a>`,
expected: `Hello, <b>World</b><a title="!" href="https://example.org/#%21" rel="nofollow">!</a>`,
},
// Images
{
in: `<a href="javascript:alert(1337)">foo</a>`,
expected: `foo`,
},
{
in: `<img src="http://example.org/foo.gif">`,
expected: `<img src="http://example.org/foo.gif">`,
},
{
in: `<img src="http://example.org/x.gif" alt="y" width=96 height=64 border=0>`,
expected: `<img src="http://example.org/x.gif" alt="y" width="96" height="64">`,
},
{
in: `<img src="http://example.org/x.png" alt="y" width="widgy" height=64 border=0>`,
expected: `<img src="http://example.org/x.png" alt="y" height="64">`,
},
// Anchors
{
in: `<a href="foo.html">Link text</a>`,
expected: `<a href="foo.html" rel="nofollow">Link text</a>`,
},
{
in: `<a href="foo.html" onclick="alert(1337)">Link text</a>`,
expected: `<a href="foo.html" rel="nofollow">Link text</a>`,
},
{
in: `<a href="http://example.org/x.html" onclick="alert(1337)">Link text</a>`,
expected: `<a href="http://example.org/x.html" rel="nofollow">Link text</a>`,
},
{
in: `<a href="https://example.org/x.html" onclick="alert(1337)">Link text</a>`,
expected: `<a href="https://example.org/x.html" rel="nofollow">Link text</a>`,
},
{
in: `<a href="HTTPS://example.org/x.html" onclick="alert(1337)">Link text</a>`,
expected: `<a href="https://example.org/x.html" rel="nofollow">Link text</a>`,
},
{
in: `<a href="//example.org/x.html" onclick="alert(1337)">Link text</a>`,
expected: `<a href="//example.org/x.html" rel="nofollow">Link text</a>`,
},
{
in: `<a href="javascript:alert(1337).html" onclick="alert(1337)">Link text</a>`,
expected: `Link text`,
},
{
in: `<a name="header" id="header">Header text</a>`,
expected: `<a id="header">Header text</a>`,
},
// Image map and links
{
in: `<img src="planets.gif" width="145" height="126" alt="" usemap="#demomap"><map name="demomap"><area shape="rect" coords="0,0,82,126" href="demo.htm" alt="1"><area shape="circle" coords="90,58,3" href="demo.htm" alt="2"><area shape="circle" coords="124,58,8" href="demo.htm" alt="3"></map>`,
expected: `<img src="planets.gif" width="145" height="126" alt="" usemap="#demomap"><map name="demomap"><area shape="rect" coords="0,0,82,126" href="demo.htm" alt="1" rel="nofollow"><area shape="circle" coords="90,58,3" href="demo.htm" alt="2" rel="nofollow"><area shape="circle" coords="124,58,8" href="demo.htm" alt="3" rel="nofollow"></map>`,
},
// Tables
{
in: `<table style="color: rgb(0, 0, 0);">` +
`<tbody>` +
`<tr>` +
`<th>Column One</th><th>Column Two</th>` +
`</tr>` +
`<tr>` +
`<td align="center"` +
` style="background-color: rgb(255, 255, 254);">` +
`<font size="2">Size 2</font></td>` +
`<td align="center"` +
` style="background-color: rgb(255, 255, 254);">` +
`<font size="7">Size 7</font></td>` +
`</tr>` +
`</tbody>` +
`</table>`,
expected: "" +
`<table>` +
`<tbody>` +
`<tr>` +
`<th>Column One</th><th>Column Two</th>` +
`</tr>` +
`<tr>` +
`<td align="center">Size 2</td>` +
`<td align="center">Size 7</td>` +
`</tr>` +
`</tbody>` +
`</table>`,
},
// Ordering
{
in: `xss<a href="http://www.google.de" style="color:red;" onmouseover=alert(1) onmousemove="alert(2)" onclick=alert(3)>g<img src="http://example.org"/>oogle</a>`,
expected: `xss<a href="http://www.google.de" rel="nofollow">g<img src="http://example.org"/>oogle</a>`,
},
// OWASP 25 June 2014 09:15 Strange behaviour
{
in: "<table>Hallo\r\n<script>SCRIPT</script>\nEnde\n\r",
expected: "<table>Hallo\n\nEnde\n\n",
},
}
p := UGCPolicy()
for ii, test := range tests {
out := p.Sanitize(test.in)
if out != test.expected {
t.Errorf(
"test %d failed;\ninput : %s\noutput : %s\nexpected: %s",
ii,
test.in,
out,
test.expected,
)
}
}
}

View File

@ -0,0 +1,550 @@
// Copyright (c) 2014, David Kitchen <david@buro9.com>
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * Neither the name of the organisation (Microcosm) nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package bluemonday
import (
"net/url"
"regexp"
"strings"
)
// Policy encapsulates the whitelist of HTML elements and attributes that will
// be applied to the sanitised HTML.
//
// You should use bluemonday.NewPolicy() to create a blank policy as the
// unexported fields contain maps that need to be initialized.
type Policy struct {
// Declares whether the maps have been initialized, used as a cheap check to
// ensure that those using Policy{} directly won't cause nil pointer
// exceptions
initialized bool
// Allows the <!DOCTYPE > tag to exist in the sanitized document
allowDocType bool
// If true then we add spaces when stripping tags, specifically the closing
// tag is replaced by a space character.
addSpaces bool
// When true, add rel="nofollow" to HTML anchors
requireNoFollow bool
// When true, add rel="nofollow" to HTML anchors
// Will add for href="http://foo"
// Will skip for href="/foo" or href="foo"
requireNoFollowFullyQualifiedLinks bool
// When true add target="_blank" to fully qualified links
// Will add for href="http://foo"
// Will skip for href="/foo" or href="foo"
addTargetBlankToFullyQualifiedLinks bool
// When true, URLs must be parseable by "net/url" url.Parse()
requireParseableURLs bool
// When true, u, _ := url.Parse("url"); !u.IsAbs() is permitted
allowRelativeURLs bool
// map[htmlElementName]map[htmlAttributeName]attrPolicy
elsAndAttrs map[string]map[string]attrPolicy
// map[htmlAttributeName]attrPolicy
globalAttrs map[string]attrPolicy
// If urlPolicy is nil, all URLs with matching schema are allowed.
// Otherwise, only the URLs with matching schema and urlPolicy(url)
// returning true are allowed.
allowURLSchemes map[string]urlPolicy
// If an element has had all attributes removed as a result of a policy
// being applied, then the element would be removed from the output.
//
// However some elements are valid and have strong layout meaning without
// any attributes, i.e. <table>. To prevent those being removed we maintain
// a list of elements that are allowed to have no attributes and that will
// be maintained in the output HTML.
setOfElementsAllowedWithoutAttrs map[string]struct{}
setOfElementsToSkipContent map[string]struct{}
}
type attrPolicy struct {
// optional pattern to match, when not nil the regexp needs to match
// otherwise the attribute is removed
regexp *regexp.Regexp
}
type attrPolicyBuilder struct {
p *Policy
attrNames []string
regexp *regexp.Regexp
allowEmpty bool
}
type urlPolicy func(url *url.URL) (allowUrl bool)
// init initializes the maps if this has not been done already
func (p *Policy) init() {
if !p.initialized {
p.elsAndAttrs = make(map[string]map[string]attrPolicy)
p.globalAttrs = make(map[string]attrPolicy)
p.allowURLSchemes = make(map[string]urlPolicy)
p.setOfElementsAllowedWithoutAttrs = make(map[string]struct{})
p.setOfElementsToSkipContent = make(map[string]struct{})
p.initialized = true
}
}
// NewPolicy returns a blank policy with nothing whitelisted or permitted. This
// is the recommended way to start building a policy and you should now use
// AllowAttrs() and/or AllowElements() to construct the whitelist of HTML
// elements and attributes.
func NewPolicy() *Policy {
p := Policy{}
p.addDefaultElementsWithoutAttrs()
p.addDefaultSkipElementContent()
return &p
}
// AllowAttrs takes a range of HTML attribute names and returns an
// attribute policy builder that allows you to specify the pattern and scope of
// the whitelisted attribute.
//
// The attribute policy is only added to the core policy when either Globally()
// or OnElements(...) are called.
func (p *Policy) AllowAttrs(attrNames ...string) *attrPolicyBuilder {
p.init()
abp := attrPolicyBuilder{
p: p,
allowEmpty: false,
}
for _, attrName := range attrNames {
abp.attrNames = append(abp.attrNames, strings.ToLower(attrName))
}
return &abp
}
// AllowNoAttrs says that attributes on element are optional.
//
// The attribute policy is only added to the core policy when OnElements(...)
// are called.
func (p *Policy) AllowNoAttrs() *attrPolicyBuilder {
p.init()
abp := attrPolicyBuilder{
p: p,
allowEmpty: true,
}
return &abp
}
// AllowNoAttrs says that attributes on element are optional.
//
// The attribute policy is only added to the core policy when OnElements(...)
// are called.
func (abp *attrPolicyBuilder) AllowNoAttrs() *attrPolicyBuilder {
abp.allowEmpty = true
return abp
}
// Matching allows a regular expression to be applied to a nascent attribute
// policy, and returns the attribute policy. Calling this more than once will
// replace the existing regexp.
func (abp *attrPolicyBuilder) Matching(regex *regexp.Regexp) *attrPolicyBuilder {
abp.regexp = regex
return abp
}
// OnElements will bind an attribute policy to a given range of HTML elements
// and return the updated policy
func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy {
for _, element := range elements {
element = strings.ToLower(element)
for _, attr := range abp.attrNames {
if _, ok := abp.p.elsAndAttrs[element]; !ok {
abp.p.elsAndAttrs[element] = make(map[string]attrPolicy)
}
ap := attrPolicy{}
if abp.regexp != nil {
ap.regexp = abp.regexp
}
abp.p.elsAndAttrs[element][attr] = ap
}
if abp.allowEmpty {
abp.p.setOfElementsAllowedWithoutAttrs[element] = struct{}{}
if _, ok := abp.p.elsAndAttrs[element]; !ok {
abp.p.elsAndAttrs[element] = make(map[string]attrPolicy)
}
}
}
return abp.p
}
// Globally will bind an attribute policy to all HTML elements and return the
// updated policy
func (abp *attrPolicyBuilder) Globally() *Policy {
for _, attr := range abp.attrNames {
if _, ok := abp.p.globalAttrs[attr]; !ok {
abp.p.globalAttrs[attr] = attrPolicy{}
}
ap := attrPolicy{}
if abp.regexp != nil {
ap.regexp = abp.regexp
}
abp.p.globalAttrs[attr] = ap
}
return abp.p
}
// AllowElements will append HTML elements to the whitelist without applying an
// attribute policy to those elements (the elements are permitted
// sans-attributes)
func (p *Policy) AllowElements(names ...string) *Policy {
p.init()
for _, element := range names {
element = strings.ToLower(element)
if _, ok := p.elsAndAttrs[element]; !ok {
p.elsAndAttrs[element] = make(map[string]attrPolicy)
}
}
return p
}
// RequireNoFollowOnLinks will result in all <a> tags having a rel="nofollow"
// added to them if one does not already exist
//
// Note: This requires p.RequireParseableURLs(true) and will enable it.
func (p *Policy) RequireNoFollowOnLinks(require bool) *Policy {
p.requireNoFollow = require
p.requireParseableURLs = true
return p
}
// RequireNoFollowOnFullyQualifiedLinks will result in all <a> tags that point
// to a non-local destination (i.e. starts with a protocol and has a host)
// having a rel="nofollow" added to them if one does not already exist
//
// Note: This requires p.RequireParseableURLs(true) and will enable it.
func (p *Policy) RequireNoFollowOnFullyQualifiedLinks(require bool) *Policy {
p.requireNoFollowFullyQualifiedLinks = require
p.requireParseableURLs = true
return p
}
// AddTargetBlankToFullyQualifiedLinks will result in all <a> tags that point
// to a non-local destination (i.e. starts with a protocol and has a host)
// having a target="_blank" added to them if one does not already exist
//
// Note: This requires p.RequireParseableURLs(true) and will enable it.
func (p *Policy) AddTargetBlankToFullyQualifiedLinks(require bool) *Policy {
p.addTargetBlankToFullyQualifiedLinks = require
p.requireParseableURLs = true
return p
}
// RequireParseableURLs will result in all URLs requiring that they be parseable
// by "net/url" url.Parse()
// This applies to:
// - a.href
// - area.href
// - blockquote.cite
// - img.src
// - link.href
// - script.src
func (p *Policy) RequireParseableURLs(require bool) *Policy {
p.requireParseableURLs = require
return p
}
// AllowRelativeURLs enables RequireParseableURLs and then permits URLs that
// are parseable, have no schema information and url.IsAbs() returns false
// This permits local URLs
func (p *Policy) AllowRelativeURLs(require bool) *Policy {
p.RequireParseableURLs(true)
p.allowRelativeURLs = require
return p
}
// AllowURLSchemes will append URL schemes to the whitelist
// Example: p.AllowURLSchemes("mailto", "http", "https")
func (p *Policy) AllowURLSchemes(schemes ...string) *Policy {
p.init()
p.RequireParseableURLs(true)
for _, scheme := range schemes {
scheme = strings.ToLower(scheme)
// Allow all URLs with matching scheme.
p.allowURLSchemes[scheme] = nil
}
return p
}
// AllowURLSchemeWithCustomPolicy will append URL schemes with
// a custom URL policy to the whitelist.
// Only the URLs with matching schema and urlPolicy(url)
// returning true will be allowed.
func (p *Policy) AllowURLSchemeWithCustomPolicy(
scheme string,
urlPolicy func(url *url.URL) (allowUrl bool),
) *Policy {
p.init()
p.RequireParseableURLs(true)
scheme = strings.ToLower(scheme)
p.allowURLSchemes[scheme] = urlPolicy
return p
}
// AllowDocType states whether the HTML sanitised by the sanitizer is allowed to
// contain the HTML DocType tag: <!DOCTYPE HTML> or one of it's variants.
//
// The HTML spec only permits one doctype per document, and as you know how you
// are using the output of this, you know best as to whether we should ignore it
// (default) or not.
//
// If you are sanitizing a HTML fragment the default (false) is fine.
func (p *Policy) AllowDocType(allow bool) *Policy {
p.allowDocType = allow
return p
}
// AddSpaceWhenStrippingTag states whether to add a single space " " when
// removing tags that are not whitelisted by the policy.
//
// This is useful if you expect to strip tags in dense markup and may lose the
// value of whitespace.
//
// For example: "<p>Hello</p><p>World</p>"" would be sanitized to "HelloWorld"
// with the default value of false, but you may wish to sanitize this to
// " Hello World " by setting AddSpaceWhenStrippingTag to true as this would
// retain the intent of the text.
func (p *Policy) AddSpaceWhenStrippingTag(allow bool) *Policy {
p.addSpaces = allow
return p
}
// SkipElementsContent adds the HTML elements whose tags is needed to be removed
// with it's content.
func (p *Policy) SkipElementsContent(names ...string) *Policy {
p.init()
for _, element := range names {
element = strings.ToLower(element)
if _, ok := p.setOfElementsToSkipContent[element]; !ok {
p.setOfElementsToSkipContent[element] = struct{}{}
}
}
return p
}
// AllowElementsContent marks the HTML elements whose content should be
// retained after removing the tag.
func (p *Policy) AllowElementsContent(names ...string) *Policy {
p.init()
for _, element := range names {
delete(p.setOfElementsToSkipContent, strings.ToLower(element))
}
return p
}
// addDefaultElementsWithoutAttrs adds the HTML elements that we know are valid
// without any attributes to an internal map.
// i.e. we know that <table> is valid, but <bdo> isn't valid as the "dir" attr
// is mandatory
func (p *Policy) addDefaultElementsWithoutAttrs() {
p.init()
p.setOfElementsAllowedWithoutAttrs["abbr"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["acronym"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["article"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["aside"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["audio"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["b"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["bdi"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["blockquote"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["body"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["br"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["button"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["canvas"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["caption"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["center"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["cite"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["code"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["col"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["colgroup"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["datalist"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["dd"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["del"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["details"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["dfn"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["div"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["dl"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["dt"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["em"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["fieldset"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["figcaption"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["figure"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["footer"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["h1"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["h2"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["h3"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["h4"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["h5"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["h6"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["head"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["header"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["hgroup"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["hr"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["html"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["i"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["ins"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["kbd"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["li"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["mark"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["marquee"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["nav"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["ol"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["optgroup"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["option"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["p"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["pre"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["q"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["rp"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["rt"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["ruby"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["s"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["samp"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["section"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["select"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["small"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["span"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["strike"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["strong"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["style"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["sub"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["summary"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["sup"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["svg"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["table"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["tbody"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["td"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["textarea"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["tfoot"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["th"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["thead"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["title"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["time"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["tr"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["tt"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["u"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["ul"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["var"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["video"] = struct{}{}
p.setOfElementsAllowedWithoutAttrs["wbr"] = struct{}{}
}
// addDefaultSkipElementContent adds the HTML elements that we should skip
// rendering the character content of, if the element itself is not allowed.
// This is all character data that the end user would not normally see.
// i.e. if we exclude a <script> tag then we shouldn't render the JavaScript or
// anything else until we encounter the closing </script> tag.
func (p *Policy) addDefaultSkipElementContent() {
p.init()
p.setOfElementsToSkipContent["frame"] = struct{}{}
p.setOfElementsToSkipContent["frameset"] = struct{}{}
p.setOfElementsToSkipContent["iframe"] = struct{}{}
p.setOfElementsToSkipContent["noembed"] = struct{}{}
p.setOfElementsToSkipContent["noframes"] = struct{}{}
p.setOfElementsToSkipContent["noscript"] = struct{}{}
p.setOfElementsToSkipContent["nostyle"] = struct{}{}
p.setOfElementsToSkipContent["object"] = struct{}{}
p.setOfElementsToSkipContent["script"] = struct{}{}
p.setOfElementsToSkipContent["style"] = struct{}{}
p.setOfElementsToSkipContent["title"] = struct{}{}
}

View File

@ -0,0 +1,60 @@
// Copyright (c) 2014, David Kitchen <david@buro9.com>
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * Neither the name of the organisation (Microcosm) nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package bluemonday
import "testing"
func TestAllowElementsContent(t *testing.T) {
policy := NewPolicy().AllowElementsContent("iframe", "script")
tests := []test{
{
in: "<iframe src='http://url.com/test'>this is fallback content</iframe>",
expected: "this is fallback content",
},
{
in: "<script>var a = 10; alert(a);</script>",
expected: "var a = 10; alert(a);",
},
}
for ii, test := range tests {
out := policy.Sanitize(test.in)
if out != test.expected {
t.Errorf(
"test %d failed;\ninput : %s\noutput : %s\nexpected: %s",
ii,
test.in,
out,
test.expected,
)
}
}
}

View File

@ -0,0 +1,539 @@
// Copyright (c) 2014, David Kitchen <david@buro9.com>
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * Neither the name of the organisation (Microcosm) nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package bluemonday
import (
"bytes"
"io"
"net/url"
"strings"
"golang.org/x/net/html"
)
// Sanitize takes a string that contains a HTML fragment or document and applies
// the given policy whitelist.
//
// It returns a HTML string that has been sanitized by the policy or an empty
// string if an error has occurred (most likely as a consequence of extremely
// malformed input)
func (p *Policy) Sanitize(s string) string {
if strings.TrimSpace(s) == "" {
return s
}
return p.sanitize(strings.NewReader(s)).String()
}
// SanitizeBytes takes a []byte that contains a HTML fragment or document and applies
// the given policy whitelist.
//
// It returns a []byte containing the HTML that has been sanitized by the policy
// or an empty []byte if an error has occurred (most likely as a consequence of
// extremely malformed input)
func (p *Policy) SanitizeBytes(b []byte) []byte {
if len(bytes.TrimSpace(b)) == 0 {
return b
}
return p.sanitize(bytes.NewReader(b)).Bytes()
}
// SanitizeReader takes an io.Reader that contains a HTML fragment or document
// and applies the given policy whitelist.
//
// It returns a bytes.Buffer containing the HTML that has been sanitized by the
// policy. Errors during sanitization will merely return an empty result.
func (p *Policy) SanitizeReader(r io.Reader) *bytes.Buffer {
return p.sanitize(r)
}
// Performs the actual sanitization process.
func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
// It is possible that the developer has created the policy via:
// p := bluemonday.Policy{}
// rather than:
// p := bluemonday.NewPolicy()
// If this is the case, and if they haven't yet triggered an action that
// would initiliaze the maps, then we need to do that.
p.init()
var (
buff bytes.Buffer
skipElementContent bool
skippingElementsCount int64
skipClosingTag bool
closingTagToSkipStack []string
mostRecentlyStartedToken string
)
tokenizer := html.NewTokenizer(r)
for {
if tokenizer.Next() == html.ErrorToken {
err := tokenizer.Err()
if err == io.EOF {
// End of input means end of processing
return &buff
}
// Raw tokenizer error
return &bytes.Buffer{}
}
token := tokenizer.Token()
switch token.Type {
case html.DoctypeToken:
if p.allowDocType {
buff.WriteString(token.String())
}
case html.CommentToken:
// Comments are ignored by default
case html.StartTagToken:
mostRecentlyStartedToken = token.Data
aps, ok := p.elsAndAttrs[token.Data]
if !ok {
if _, ok := p.setOfElementsToSkipContent[token.Data]; ok {
skipElementContent = true
skippingElementsCount++
}
if p.addSpaces {
buff.WriteString(" ")
}
break
}
if len(token.Attr) != 0 {
token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
}
if len(token.Attr) == 0 {
if !p.allowNoAttrs(token.Data) {
skipClosingTag = true
closingTagToSkipStack = append(closingTagToSkipStack, token.Data)
if p.addSpaces {
buff.WriteString(" ")
}
break
}
}
if !skipElementContent {
buff.WriteString(token.String())
}
case html.EndTagToken:
if mostRecentlyStartedToken == token.Data {
mostRecentlyStartedToken = ""
}
if skipClosingTag && closingTagToSkipStack[len(closingTagToSkipStack)-1] == token.Data {
closingTagToSkipStack = closingTagToSkipStack[:len(closingTagToSkipStack)-1]
if len(closingTagToSkipStack) == 0 {
skipClosingTag = false
}
if p.addSpaces {
buff.WriteString(" ")
}
break
}
if _, ok := p.elsAndAttrs[token.Data]; !ok {
if _, ok := p.setOfElementsToSkipContent[token.Data]; ok {
skippingElementsCount--
if skippingElementsCount == 0 {
skipElementContent = false
}
}
if p.addSpaces {
buff.WriteString(" ")
}
break
}
if !skipElementContent {
buff.WriteString(token.String())
}
case html.SelfClosingTagToken:
aps, ok := p.elsAndAttrs[token.Data]
if !ok {
if p.addSpaces {
buff.WriteString(" ")
}
break
}
if len(token.Attr) != 0 {
token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
}
if len(token.Attr) == 0 && !p.allowNoAttrs(token.Data) {
if p.addSpaces {
buff.WriteString(" ")
}
break
}
if !skipElementContent {
buff.WriteString(token.String())
}
case html.TextToken:
if !skipElementContent {
switch strings.ToLower(mostRecentlyStartedToken) {
case "script":
// not encouraged, but if a policy allows JavaScript we
// should not HTML escape it as that would break the output
buff.WriteString(token.Data)
case "style":
// not encouraged, but if a policy allows CSS styles we
// should not HTML escape it as that would break the output
buff.WriteString(token.Data)
default:
// HTML escape the text
buff.WriteString(token.String())
}
}
default:
// A token that didn't exist in the html package when we wrote this
return &bytes.Buffer{}
}
}
}
// sanitizeAttrs takes a set of element attribute policies and the global
// attribute policies and applies them to the []html.Attribute returning a set
// of html.Attributes that match the policies
func (p *Policy) sanitizeAttrs(
elementName string,
attrs []html.Attribute,
aps map[string]attrPolicy,
) []html.Attribute {
if len(attrs) == 0 {
return attrs
}
// Builds a new attribute slice based on the whether the attribute has been
// whitelisted explicitly or globally.
cleanAttrs := []html.Attribute{}
for _, htmlAttr := range attrs {
// Is there an element specific attribute policy that applies?
if ap, ok := aps[htmlAttr.Key]; ok {
if ap.regexp != nil {
if ap.regexp.MatchString(htmlAttr.Val) {
cleanAttrs = append(cleanAttrs, htmlAttr)
continue
}
} else {
cleanAttrs = append(cleanAttrs, htmlAttr)
continue
}
}
// Is there a global attribute policy that applies?
if ap, ok := p.globalAttrs[htmlAttr.Key]; ok {
if ap.regexp != nil {
if ap.regexp.MatchString(htmlAttr.Val) {
cleanAttrs = append(cleanAttrs, htmlAttr)
}
} else {
cleanAttrs = append(cleanAttrs, htmlAttr)
}
}
}
if len(cleanAttrs) == 0 {
// If nothing was allowed, let's get out of here
return cleanAttrs
}
// cleanAttrs now contains the attributes that are permitted
if linkable(elementName) {
if p.requireParseableURLs {
// Ensure URLs are parseable:
// - a.href
// - area.href
// - link.href
// - blockquote.cite
// - q.cite
// - img.src
// - script.src
tmpAttrs := []html.Attribute{}
for _, htmlAttr := range cleanAttrs {
switch elementName {
case "a", "area", "link":
if htmlAttr.Key == "href" {
if u, ok := p.validURL(htmlAttr.Val); ok {
htmlAttr.Val = u
tmpAttrs = append(tmpAttrs, htmlAttr)
}
break
}
tmpAttrs = append(tmpAttrs, htmlAttr)
case "blockquote", "q":
if htmlAttr.Key == "cite" {
if u, ok := p.validURL(htmlAttr.Val); ok {
htmlAttr.Val = u
tmpAttrs = append(tmpAttrs, htmlAttr)
}
break
}
tmpAttrs = append(tmpAttrs, htmlAttr)
case "img", "script":
if htmlAttr.Key == "src" {
if u, ok := p.validURL(htmlAttr.Val); ok {
htmlAttr.Val = u
tmpAttrs = append(tmpAttrs, htmlAttr)
}
break
}
tmpAttrs = append(tmpAttrs, htmlAttr)
default:
tmpAttrs = append(tmpAttrs, htmlAttr)
}
}
cleanAttrs = tmpAttrs
}
if (p.requireNoFollow ||
p.requireNoFollowFullyQualifiedLinks ||
p.addTargetBlankToFullyQualifiedLinks) &&
len(cleanAttrs) > 0 {
// Add rel="nofollow" if a "href" exists
switch elementName {
case "a", "area", "link":
var hrefFound bool
var externalLink bool
for _, htmlAttr := range cleanAttrs {
if htmlAttr.Key == "href" {
hrefFound = true
u, err := url.Parse(htmlAttr.Val)
if err != nil {
continue
}
if u.Host != "" {
externalLink = true
}
continue
}
}
if hrefFound {
var (
noFollowFound bool
targetBlankFound bool
)
addNoFollow := (p.requireNoFollow ||
externalLink && p.requireNoFollowFullyQualifiedLinks)
addTargetBlank := (externalLink &&
p.addTargetBlankToFullyQualifiedLinks)
tmpAttrs := []html.Attribute{}
for _, htmlAttr := range cleanAttrs {
var appended bool
if htmlAttr.Key == "rel" && addNoFollow {
if strings.Contains(htmlAttr.Val, "nofollow") {
noFollowFound = true
tmpAttrs = append(tmpAttrs, htmlAttr)
appended = true
} else {
htmlAttr.Val += " nofollow"
noFollowFound = true
tmpAttrs = append(tmpAttrs, htmlAttr)
appended = true
}
}
if elementName == "a" && htmlAttr.Key == "target" {
if htmlAttr.Val == "_blank" {
targetBlankFound = true
}
if addTargetBlank && !targetBlankFound {
htmlAttr.Val = "_blank"
targetBlankFound = true
tmpAttrs = append(tmpAttrs, htmlAttr)
appended = true
}
}
if !appended {
tmpAttrs = append(tmpAttrs, htmlAttr)
}
}
if noFollowFound || targetBlankFound {
cleanAttrs = tmpAttrs
}
if addNoFollow && !noFollowFound {
rel := html.Attribute{}
rel.Key = "rel"
rel.Val = "nofollow"
cleanAttrs = append(cleanAttrs, rel)
}
if elementName == "a" && addTargetBlank && !targetBlankFound {
rel := html.Attribute{}
rel.Key = "target"
rel.Val = "_blank"
targetBlankFound = true
cleanAttrs = append(cleanAttrs, rel)
}
if targetBlankFound {
// target="_blank" has a security risk that allows the
// opened window/tab to issue JavaScript calls against
// window.opener, which in effect allow the destination
// of the link to control the source:
// https://dev.to/ben/the-targetblank-vulnerability-by-example
//
// To mitigate this risk, we need to add a specific rel
// attribute if it is not already present.
// rel="noopener"
//
// Unfortunately this is processing the rel twice (we
// already looked at it earlier ^^) as we cannot be sure
// of the ordering of the href and rel, and whether we
// have fully satisfied that we need to do this. This
// double processing only happens *if* target="_blank"
// is true.
var noOpenerAdded bool
tmpAttrs := []html.Attribute{}
for _, htmlAttr := range cleanAttrs {
var appended bool
if htmlAttr.Key == "rel" {
if strings.Contains(htmlAttr.Val, "noopener") {
noOpenerAdded = true
tmpAttrs = append(tmpAttrs, htmlAttr)
} else {
htmlAttr.Val += " noopener"
noOpenerAdded = true
tmpAttrs = append(tmpAttrs, htmlAttr)
}
appended = true
}
if !appended {
tmpAttrs = append(tmpAttrs, htmlAttr)
}
}
if noOpenerAdded {
cleanAttrs = tmpAttrs
} else {
// rel attr was not found, or else noopener would
// have been added already
rel := html.Attribute{}
rel.Key = "rel"
rel.Val = "noopener"
cleanAttrs = append(cleanAttrs, rel)
}
}
}
default:
}
}
}
return cleanAttrs
}
func (p *Policy) allowNoAttrs(elementName string) bool {
_, ok := p.setOfElementsAllowedWithoutAttrs[elementName]
return ok
}
func (p *Policy) validURL(rawurl string) (string, bool) {
if p.requireParseableURLs {
// URLs do not contain whitespace
if strings.Contains(rawurl, " ") ||
strings.Contains(rawurl, "\t") ||
strings.Contains(rawurl, "\n") {
return "", false
}
u, err := url.Parse(rawurl)
if err != nil {
return "", false
}
if u.Scheme != "" {
urlPolicy, ok := p.allowURLSchemes[u.Scheme]
if !ok {
return "", false
}
if urlPolicy == nil || urlPolicy(u) == true {
return u.String(), true
}
return "", false
}
if p.allowRelativeURLs {
if u.String() != "" {
return u.String(), true
}
}
return "", false
}
return rawurl, true
}
func linkable(elementName string) bool {
switch elementName {
case "a", "area", "blockquote", "img", "link", "script":
return true
default:
return false
}
}

View File

@ -0,0 +1,74 @@
// Copyright (c) 2014, David Kitchen <david@buro9.com>
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * Neither the name of the organisation (Microcosm) nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// +build go1.8
package bluemonday
import (
"sync"
"testing"
)
func TestXSSGo18(t *testing.T) {
p := UGCPolicy()
tests := []test{
{
in: `<IMG SRC="jav&#x0D;ascript:alert('XSS');">`,
expected: ``,
},
{
in: "<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>",
expected: ``,
},
}
// These tests are run concurrently to enable the race detector to pick up
// potential issues
wg := sync.WaitGroup{}
wg.Add(len(tests))
for ii, tt := range tests {
go func(ii int, tt test) {
out := p.Sanitize(tt.in)
if out != tt.expected {
t.Errorf(
"test %d failed;\ninput : %s\noutput : %s\nexpected: %s",
ii,
tt.in,
out,
tt.expected,
)
}
wg.Done()
}(ii, tt)
}
wg.Wait()
}

View File

@ -0,0 +1,74 @@
// Copyright (c) 2014, David Kitchen <david@buro9.com>
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * Neither the name of the organisation (Microcosm) nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// +build go1.1,!go1.8
package bluemonday
import (
"sync"
"testing"
)
func TestXSSLTGo18(t *testing.T) {
p := UGCPolicy()
tests := []test{
{
in: `<IMG SRC="jav&#x0D;ascript:alert('XSS');">`,
expected: `<img src="jav%0Dascript:alert%28%27XSS%27%29;">`,
},
{
in: "<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>",
expected: `<img src="%60javascript:alert%28%22RSnake">`,
},
}
// These tests are run concurrently to enable the race detector to pick up
// potential issues
wg := sync.WaitGroup{}
wg.Add(len(tests))
for ii, tt := range tests {
go func(ii int, tt test) {
out := p.Sanitize(tt.in)
if out != tt.expected {
t.Errorf(
"test %d failed;\ninput : %s\noutput : %s\nexpected: %s",
ii,
tt.in,
out,
tt.expected,
)
}
wg.Done()
}(ii, tt)
}
wg.Wait()
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,8 @@
*.out
*.swp
*.8
*.6
_obj
_test*
markdown
tags

View File

@ -0,0 +1,18 @@
# Travis CI (http://travis-ci.org/) is a continuous integration service for
# open source projects. This file configures it to run unit tests for
# blackfriday.
language: go
go:
- 1.5
- 1.6
- 1.7
install:
- go get -d -t -v ./...
- go build -v ./...
script:
- go test -v ./...
- go test -run=^$ -bench=BenchmarkReference -benchmem

View File

@ -0,0 +1,29 @@
Blackfriday is distributed under the Simplified BSD License:
> Copyright © 2011 Russ Ross
> All rights reserved.
>
> Redistribution and use in source and binary forms, with or without
> modification, are permitted provided that the following conditions
> are met:
>
> 1. Redistributions of source code must retain the above copyright
> notice, this list of conditions and the following disclaimer.
>
> 2. Redistributions in binary form must reproduce the above
> copyright notice, this list of conditions and the following
> disclaimer in the documentation and/or other materials provided with
> the distribution.
>
> THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
> FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
> BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
> CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
> ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
> POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,283 @@
Blackfriday [![Build Status](https://travis-ci.org/russross/blackfriday.svg?branch=master)](https://travis-ci.org/russross/blackfriday)
===========
Blackfriday is a [Markdown][1] processor implemented in [Go][2]. It
is paranoid about its input (so you can safely feed it user-supplied
data), it is fast, it supports common extensions (tables, smart
punctuation substitutions, etc.), and it is safe for all utf-8
(unicode) input.
HTML output is currently supported, along with Smartypants
extensions.
It started as a translation from C of [Sundown][3].
Installation
------------
Blackfriday is compatible with any modern Go release. With Go 1.7 and git
installed:
go get gopkg.in/russross/blackfriday.v2
will download, compile, and install the package into your `$GOPATH`
directory hierarchy. Alternatively, you can achieve the same if you
import it into a project:
import "gopkg.in/russross/blackfriday.v2"
and `go get` without parameters.
Versions
--------
Currently maintained and recommended version of Blackfriday is `v2`. It's being
developed on its own branch: https://github.com/russross/blackfriday/v2. You
should install and import it via [gopkg.in][6] at
`gopkg.in/russross/blackfriday.v2`.
Version 2 offers a number of improvements over v1:
* Cleaned up API
* A separate call to [`Parse`][4], which produces an abstract syntax tree for
the document
* Latest bug fixes
* Flexibility to easily add your own rendering extensions
Potential drawbacks:
* Our benchmarks show v2 to be slightly slower than v1. Currently in the
ballpark of around 15%.
* API breakage. If you can't afford modifying your code to adhere to the new API
and don't care too much about the new features, v2 is probably not for you.
* Several bug fixes are trailing behind and still need to be forward-ported to
v2. See issue [#348](https://github.com/russross/blackfriday/issues/348) for
tracking.
Usage
-----
For the most sensible markdown processing, it is as simple as getting your input
into a byte slice and calling:
```go
output := blackfriday.Run(input)
```
Your input will be parsed and the output rendered with a set of most popular
extensions enabled. If you want the most basic feature set, corresponding with
the bare Markdown specification, use:
```go
output := blackfriday.Run(input, blackfriday.WithNoExtensions())
```
### Sanitize untrusted content
Blackfriday itself does nothing to protect against malicious content. If you are
dealing with user-supplied markdown, we recommend running Blackfriday's output
through HTML sanitizer such as [Bluemonday][5].
Here's an example of simple usage of Blackfriday together with Bluemonday:
```go
import (
"github.com/microcosm-cc/bluemonday"
"github.com/russross/blackfriday"
)
// ...
unsafe := blackfriday.Run(input)
html := bluemonday.UGCPolicy().SanitizeBytes(unsafe)
```
### Custom options
If you want to customize the set of options, use `blackfriday.WithExtensions`,
`blackfriday.WithRenderer` and `blackfriday.WithRefOverride`.
You can also check out `blackfriday-tool` for a more complete example
of how to use it. Download and install it using:
go get github.com/russross/blackfriday-tool
This is a simple command-line tool that allows you to process a
markdown file using a standalone program. You can also browse the
source directly on github if you are just looking for some example
code:
* <http://github.com/russross/blackfriday-tool>
Note that if you have not already done so, installing
`blackfriday-tool` will be sufficient to download and install
blackfriday in addition to the tool itself. The tool binary will be
installed in `$GOPATH/bin`. This is a statically-linked binary that
can be copied to wherever you need it without worrying about
dependencies and library versions.
Features
--------
All features of Sundown are supported, including:
* **Compatibility**. The Markdown v1.0.3 test suite passes with
the `--tidy` option. Without `--tidy`, the differences are
mostly in whitespace and entity escaping, where blackfriday is
more consistent and cleaner.
* **Common extensions**, including table support, fenced code
blocks, autolinks, strikethroughs, non-strict emphasis, etc.
* **Safety**. Blackfriday is paranoid when parsing, making it safe
to feed untrusted user input without fear of bad things
happening. The test suite stress tests this and there are no
known inputs that make it crash. If you find one, please let me
know and send me the input that does it.
NOTE: "safety" in this context means *runtime safety only*. In order to
protect yourself against JavaScript injection in untrusted content, see
[this example](https://github.com/russross/blackfriday#sanitize-untrusted-content).
* **Fast processing**. It is fast enough to render on-demand in
most web applications without having to cache the output.
* **Thread safety**. You can run multiple parsers in different
goroutines without ill effect. There is no dependence on global
shared state.
* **Minimal dependencies**. Blackfriday only depends on standard
library packages in Go. The source code is pretty
self-contained, so it is easy to add to any project, including
Google App Engine projects.
* **Standards compliant**. Output successfully validates using the
W3C validation tool for HTML 4.01 and XHTML 1.0 Transitional.
Extensions
----------
In addition to the standard markdown syntax, this package
implements the following extensions:
* **Intra-word emphasis supression**. The `_` character is
commonly used inside words when discussing code, so having
markdown interpret it as an emphasis command is usually the
wrong thing. Blackfriday lets you treat all emphasis markers as
normal characters when they occur inside a word.
* **Tables**. Tables can be created by drawing them in the input
using a simple syntax:
```
Name | Age
--------|------
Bob | 27
Alice | 23
```
* **Fenced code blocks**. In addition to the normal 4-space
indentation to mark code blocks, you can explicitly mark them
and supply a language (to make syntax highlighting simple). Just
mark it like this:
```go
func getTrue() bool {
return true
}
```
You can use 3 or more backticks to mark the beginning of the
block, and the same number to mark the end of the block.
* **Definition lists**. A simple definition list is made of a single-line
term followed by a colon and the definition for that term.
Cat
: Fluffy animal everyone likes
Internet
: Vector of transmission for pictures of cats
Terms must be separated from the previous definition by a blank line.
* **Footnotes**. A marker in the text that will become a superscript number;
a footnote definition that will be placed in a list of footnotes at the
end of the document. A footnote looks like this:
This is a footnote.[^1]
[^1]: the footnote text.
* **Autolinking**. Blackfriday can find URLs that have not been
explicitly marked as links and turn them into links.
* **Strikethrough**. Use two tildes (`~~`) to mark text that
should be crossed out.
* **Hard line breaks**. With this extension enabled newlines in the input
translate into line breaks in the output. This extension is off by default.
* **Smart quotes**. Smartypants-style punctuation substitution is
supported, turning normal double- and single-quote marks into
curly quotes, etc.
* **LaTeX-style dash parsing** is an additional option, where `--`
is translated into `&ndash;`, and `---` is translated into
`&mdash;`. This differs from most smartypants processors, which
turn a single hyphen into an ndash and a double hyphen into an
mdash.
* **Smart fractions**, where anything that looks like a fraction
is translated into suitable HTML (instead of just a few special
cases like most smartypant processors). For example, `4/5`
becomes `<sup>4</sup>&frasl;<sub>5</sub>`, which renders as
<sup>4</sup>&frasl;<sub>5</sub>.
Other renderers
---------------
Blackfriday is structured to allow alternative rendering engines. Here
are a few of note:
* [github_flavored_markdown](https://godoc.org/github.com/shurcooL/github_flavored_markdown):
provides a GitHub Flavored Markdown renderer with fenced code block
highlighting, clickable heading anchor links.
It's not customizable, and its goal is to produce HTML output
equivalent to the [GitHub Markdown API endpoint](https://developer.github.com/v3/markdown/#render-a-markdown-document-in-raw-mode),
except the rendering is performed locally.
* [markdownfmt](https://github.com/shurcooL/markdownfmt): like gofmt,
but for markdown.
* [LaTeX output](https://bitbucket.org/ambrevar/blackfriday-latex):
renders output as LaTeX.
Todo
----
* More unit testing
* Improve unicode support. It does not understand all unicode
rules (about what constitutes a letter, a punctuation symbol,
etc.), so it may fail to detect word boundaries correctly in
some instances. It is safe on all utf-8 input.
License
-------
[Blackfriday is distributed under the Simplified BSD License](LICENSE.txt)
[1]: https://daringfireball.net/projects/markdown/ "Markdown"
[2]: https://golang.org/ "Go Language"
[3]: https://github.com/vmg/sundown "Sundown"
[4]: https://godoc.org/gopkg.in/russross/blackfriday.v2#Parse "Parse func"
[5]: https://github.com/microcosm-cc/bluemonday "Bluemonday"
[6]: https://labix.org/gopkg.in "gopkg.in"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,18 @@
// Package blackfriday is a markdown processor.
//
// It translates plain text with simple formatting rules into an AST, which can
// then be further processed to HTML (provided by Blackfriday itself) or other
// formats (provided by the community).
//
// The simplest way to invoke Blackfriday is to call the Run function. It will
// take a text input and produce a text output in HTML (or other format).
//
// A slightly more sophisticated way to use Blackfriday is to create a Markdown
// processor and to call Parse, which returns a syntax tree for the input
// document. You can leverage Blackfriday's parsing for content extraction from
// markdown documents. You can assign a custom renderer and set various options
// to the Markdown processor.
//
// If you're interested in calling Blackfriday from command line, see
// https://github.com/russross/blackfriday-tool.
package blackfriday

View File

@ -0,0 +1,34 @@
package blackfriday
import (
"html"
"io"
)
var htmlEscaper = [256][]byte{
'&': []byte("&amp;"),
'<': []byte("&lt;"),
'>': []byte("&gt;"),
'"': []byte("&quot;"),
}
func escapeHTML(w io.Writer, s []byte) {
var start, end int
for end < len(s) {
escSeq := htmlEscaper[s[end]]
if escSeq != nil {
w.Write(s[start:end])
w.Write(escSeq)
start = end + 1
}
end++
}
if start < len(s) && end <= len(s) {
w.Write(s[start:end])
}
}
func escLink(w io.Writer, text []byte) {
unesc := html.UnescapeString(string(text))
escapeHTML(w, []byte(unesc))
}

View File

@ -0,0 +1,48 @@
package blackfriday
import (
"bytes"
"testing"
)
func TestEsc(t *testing.T) {
tests := []string{
"abc", "abc",
"a&c", "a&amp;c",
"<", "&lt;",
"[]:<", "[]:&lt;",
"Hello <!--", "Hello &lt;!--",
}
for i := 0; i < len(tests); i += 2 {
var b bytes.Buffer
escapeHTML(&b, []byte(tests[i]))
if !bytes.Equal(b.Bytes(), []byte(tests[i+1])) {
t.Errorf("\nInput [%#v]\nExpected[%#v]\nActual [%#v]",
tests[i], tests[i+1], b.String())
}
}
}
func BenchmarkEscapeHTML(b *testing.B) {
tests := [][]byte{
[]byte(""),
[]byte("AT&T has an ampersand in their name."),
[]byte("AT&amp;T is another way to write it."),
[]byte("This & that."),
[]byte("4 < 5."),
[]byte("6 > 5."),
[]byte("Here's a [link] [1] with an ampersand in the URL."),
[]byte("Here's a link with an ampersand in the link text: [AT&T] [2]."),
[]byte("Here's an inline [link](/script?foo=1&bar=2)."),
[]byte("Here's an inline [link](</script?foo=1&bar=2>)."),
[]byte("[1]: http://example.com/?foo=1&bar=2"),
[]byte("[2]: http://att.com/ \"AT&T\""),
}
var buf bytes.Buffer
for n := 0; n < b.N; n++ {
for _, t := range tests {
escapeHTML(&buf, t)
buf.Reset()
}
}
}

View File

@ -0,0 +1,186 @@
//
// Blackfriday Markdown Processor
// Available at http://github.com/russross/blackfriday
//
// Copyright © 2011 Russ Ross <russ@russross.com>.
// Distributed under the Simplified BSD License.
// See README.md for details.
//
//
// Helper functions for unit testing
//
package blackfriday
import (
"io/ioutil"
"path/filepath"
"regexp"
"testing"
)
type TestParams struct {
extensions Extensions
referenceOverride ReferenceOverrideFunc
HTMLFlags
HTMLRendererParameters
}
func execRecoverableTestSuite(t *testing.T, tests []string, params TestParams, suite func(candidate *string)) {
// Catch and report panics. This is useful when running 'go test -v' on
// the integration server. When developing, though, crash dump is often
// preferable, so recovery can be easily turned off with doRecover = false.
var candidate string
const doRecover = true
if doRecover {
defer func() {
if err := recover(); err != nil {
t.Errorf("\npanic while processing [%#v]: %s\n", candidate, err)
}
}()
}
suite(&candidate)
}
func runMarkdown(input string, params TestParams) string {
params.HTMLRendererParameters.Flags = params.HTMLFlags
renderer := NewHTMLRenderer(params.HTMLRendererParameters)
return string(Run([]byte(input), WithRenderer(renderer),
WithExtensions(params.extensions),
WithRefOverride(params.referenceOverride)))
}
// doTests runs full document tests using MarkdownCommon configuration.
func doTests(t *testing.T, tests []string) {
doTestsParam(t, tests, TestParams{
extensions: CommonExtensions,
HTMLRendererParameters: HTMLRendererParameters{
Flags: CommonHTMLFlags,
},
})
}
func doTestsBlock(t *testing.T, tests []string, extensions Extensions) {
doTestsParam(t, tests, TestParams{
extensions: extensions,
HTMLFlags: UseXHTML,
})
}
func doTestsParam(t *testing.T, tests []string, params TestParams) {
execRecoverableTestSuite(t, tests, params, func(candidate *string) {
for i := 0; i+1 < len(tests); i += 2 {
input := tests[i]
*candidate = input
expected := tests[i+1]
actual := runMarkdown(*candidate, params)
if actual != expected {
t.Errorf("\nInput [%#v]\nExpected[%#v]\nActual [%#v]",
*candidate, expected, actual)
}
// now test every substring to stress test bounds checking
if !testing.Short() {
for start := 0; start < len(input); start++ {
for end := start + 1; end <= len(input); end++ {
*candidate = input[start:end]
runMarkdown(*candidate, params)
}
}
}
}
})
}
func doTestsInline(t *testing.T, tests []string) {
doTestsInlineParam(t, tests, TestParams{})
}
func doLinkTestsInline(t *testing.T, tests []string) {
doTestsInline(t, tests)
prefix := "http://localhost"
params := HTMLRendererParameters{AbsolutePrefix: prefix}
transformTests := transformLinks(tests, prefix)
doTestsInlineParam(t, transformTests, TestParams{
HTMLRendererParameters: params,
})
doTestsInlineParam(t, transformTests, TestParams{
HTMLFlags: UseXHTML,
HTMLRendererParameters: params,
})
}
func doSafeTestsInline(t *testing.T, tests []string) {
doTestsInlineParam(t, tests, TestParams{HTMLFlags: Safelink})
// All the links in this test should not have the prefix appended, so
// just rerun it with different parameters and the same expectations.
prefix := "http://localhost"
params := HTMLRendererParameters{AbsolutePrefix: prefix}
transformTests := transformLinks(tests, prefix)
doTestsInlineParam(t, transformTests, TestParams{
HTMLFlags: Safelink,
HTMLRendererParameters: params,
})
}
func doTestsInlineParam(t *testing.T, tests []string, params TestParams) {
params.extensions |= Autolink | Strikethrough
params.HTMLFlags |= UseXHTML
doTestsParam(t, tests, params)
}
func transformLinks(tests []string, prefix string) []string {
newTests := make([]string, len(tests))
anchorRe := regexp.MustCompile(`<a href="/(.*?)"`)
imgRe := regexp.MustCompile(`<img src="/(.*?)"`)
for i, test := range tests {
if i%2 == 1 {
test = anchorRe.ReplaceAllString(test, `<a href="`+prefix+`/$1"`)
test = imgRe.ReplaceAllString(test, `<img src="`+prefix+`/$1"`)
}
newTests[i] = test
}
return newTests
}
func doTestsReference(t *testing.T, files []string, flag Extensions) {
params := TestParams{extensions: flag}
execRecoverableTestSuite(t, files, params, func(candidate *string) {
for _, basename := range files {
filename := filepath.Join("testdata", basename+".text")
inputBytes, err := ioutil.ReadFile(filename)
if err != nil {
t.Errorf("Couldn't open '%s', error: %v\n", filename, err)
continue
}
input := string(inputBytes)
filename = filepath.Join("testdata", basename+".html")
expectedBytes, err := ioutil.ReadFile(filename)
if err != nil {
t.Errorf("Couldn't open '%s', error: %v\n", filename, err)
continue
}
expected := string(expectedBytes)
actual := string(runMarkdown(input, params))
if actual != expected {
t.Errorf("\n [%#v]\nExpected[%#v]\nActual [%#v]",
basename+".text", expected, actual)
}
// now test every prefix of every input to check for
// bounds checking
if !testing.Short() {
start, max := 0, len(input)
for end := start + 1; end <= max; end++ {
*candidate = input[start:end]
runMarkdown(*candidate, params)
}
}
}
})
}

View File

@ -0,0 +1,940 @@
//
// Blackfriday Markdown Processor
// Available at http://github.com/russross/blackfriday
//
// Copyright © 2011 Russ Ross <russ@russross.com>.
// Distributed under the Simplified BSD License.
// See README.md for details.
//
//
//
// HTML rendering backend
//
//
package blackfriday
import (
"bytes"
"fmt"
"io"
"regexp"
"strings"
)
// HTMLFlags control optional behavior of HTML renderer.
type HTMLFlags int
// HTML renderer configuration options.
const (
HTMLFlagsNone HTMLFlags = 0
SkipHTML HTMLFlags = 1 << iota // Skip preformatted HTML blocks
SkipImages // Skip embedded images
SkipLinks // Skip all links
Safelink // Only link to trusted protocols
NofollowLinks // Only link with rel="nofollow"
NoreferrerLinks // Only link with rel="noreferrer"
HrefTargetBlank // Add a blank target
CompletePage // Generate a complete HTML page
UseXHTML // Generate XHTML output instead of HTML
FootnoteReturnLinks // Generate a link at the end of a footnote to return to the source
Smartypants // Enable smart punctuation substitutions
SmartypantsFractions // Enable smart fractions (with Smartypants)
SmartypantsDashes // Enable smart dashes (with Smartypants)
SmartypantsLatexDashes // Enable LaTeX-style dashes (with Smartypants)
SmartypantsAngledQuotes // Enable angled double quotes (with Smartypants) for double quotes rendering
SmartypantsQuotesNBSP // Enable « French guillemets » (with Smartypants)
TOC // Generate a table of contents
)
var (
htmlTagRe = regexp.MustCompile("(?i)^" + htmlTag)
)
const (
htmlTag = "(?:" + openTag + "|" + closeTag + "|" + htmlComment + "|" +
processingInstruction + "|" + declaration + "|" + cdata + ")"
closeTag = "</" + tagName + "\\s*[>]"
openTag = "<" + tagName + attribute + "*" + "\\s*/?>"
attribute = "(?:" + "\\s+" + attributeName + attributeValueSpec + "?)"
attributeValue = "(?:" + unquotedValue + "|" + singleQuotedValue + "|" + doubleQuotedValue + ")"
attributeValueSpec = "(?:" + "\\s*=" + "\\s*" + attributeValue + ")"
attributeName = "[a-zA-Z_:][a-zA-Z0-9:._-]*"
cdata = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>"
declaration = "<![A-Z]+" + "\\s+[^>]*>"
doubleQuotedValue = "\"[^\"]*\""
htmlComment = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->"
processingInstruction = "[<][?].*?[?][>]"
singleQuotedValue = "'[^']*'"
tagName = "[A-Za-z][A-Za-z0-9-]*"
unquotedValue = "[^\"'=<>`\\x00-\\x20]+"
)
// HTMLRendererParameters is a collection of supplementary parameters tweaking
// the behavior of various parts of HTML renderer.
type HTMLRendererParameters struct {
// Prepend this text to each relative URL.
AbsolutePrefix string
// Add this text to each footnote anchor, to ensure uniqueness.
FootnoteAnchorPrefix string
// Show this text inside the <a> tag for a footnote return link, if the
// HTML_FOOTNOTE_RETURN_LINKS flag is enabled. If blank, the string
// <sup>[return]</sup> is used.
FootnoteReturnLinkContents string
// If set, add this text to the front of each Heading ID, to ensure
// uniqueness.
HeadingIDPrefix string
// If set, add this text to the back of each Heading ID, to ensure uniqueness.
HeadingIDSuffix string
Title string // Document title (used if CompletePage is set)
CSS string // Optional CSS file URL (used if CompletePage is set)
Icon string // Optional icon file URL (used if CompletePage is set)
Flags HTMLFlags // Flags allow customizing this renderer's behavior
}
// HTMLRenderer is a type that implements the Renderer interface for HTML output.
//
// Do not create this directly, instead use the NewHTMLRenderer function.
type HTMLRenderer struct {
HTMLRendererParameters
closeTag string // how to end singleton tags: either " />" or ">"
// Track heading IDs to prevent ID collision in a single generation.
headingIDs map[string]int
lastOutputLen int
disableTags int
sr *SPRenderer
}
const (
xhtmlClose = " />"
htmlClose = ">"
)
// NewHTMLRenderer creates and configures an HTMLRenderer object, which
// satisfies the Renderer interface.
func NewHTMLRenderer(params HTMLRendererParameters) *HTMLRenderer {
// configure the rendering engine
closeTag := htmlClose
if params.Flags&UseXHTML != 0 {
closeTag = xhtmlClose
}
if params.FootnoteReturnLinkContents == "" {
params.FootnoteReturnLinkContents = `<sup>[return]</sup>`
}
return &HTMLRenderer{
HTMLRendererParameters: params,
closeTag: closeTag,
headingIDs: make(map[string]int),
sr: NewSmartypantsRenderer(params.Flags),
}
}
func isHTMLTag(tag []byte, tagname string) bool {
found, _ := findHTMLTagPos(tag, tagname)
return found
}
// Look for a character, but ignore it when it's in any kind of quotes, it
// might be JavaScript
func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
inSingleQuote := false
inDoubleQuote := false
inGraveQuote := false
i := start
for i < len(html) {
switch {
case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
return i
case html[i] == '\'':
inSingleQuote = !inSingleQuote
case html[i] == '"':
inDoubleQuote = !inDoubleQuote
case html[i] == '`':
inGraveQuote = !inGraveQuote
}
i++
}
return start
}
func findHTMLTagPos(tag []byte, tagname string) (bool, int) {
i := 0
if i < len(tag) && tag[0] != '<' {
return false, -1
}
i++
i = skipSpace(tag, i)
if i < len(tag) && tag[i] == '/' {
i++
}
i = skipSpace(tag, i)
j := 0
for ; i < len(tag); i, j = i+1, j+1 {
if j >= len(tagname) {
break
}
if strings.ToLower(string(tag[i]))[0] != tagname[j] {
return false, -1
}
}
if i == len(tag) {
return false, -1
}
rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
if rightAngle >= i {
return true, rightAngle
}
return false, -1
}
func skipSpace(tag []byte, i int) int {
for i < len(tag) && isspace(tag[i]) {
i++
}
return i
}
func isRelativeLink(link []byte) (yes bool) {
// a tag begin with '#'
if link[0] == '#' {
return true
}
// link begin with '/' but not '//', the second maybe a protocol relative link
if len(link) >= 2 && link[0] == '/' && link[1] != '/' {
return true
}
// only the root '/'
if len(link) == 1 && link[0] == '/' {
return true
}
// current directory : begin with "./"
if bytes.HasPrefix(link, []byte("./")) {
return true
}
// parent directory : begin with "../"
if bytes.HasPrefix(link, []byte("../")) {
return true
}
return false
}
func (r *HTMLRenderer) ensureUniqueHeadingID(id string) string {
for count, found := r.headingIDs[id]; found; count, found = r.headingIDs[id] {
tmp := fmt.Sprintf("%s-%d", id, count+1)
if _, tmpFound := r.headingIDs[tmp]; !tmpFound {
r.headingIDs[id] = count + 1
id = tmp
} else {
id = id + "-1"
}
}
if _, found := r.headingIDs[id]; !found {
r.headingIDs[id] = 0
}
return id
}
func (r *HTMLRenderer) addAbsPrefix(link []byte) []byte {
if r.AbsolutePrefix != "" && isRelativeLink(link) && link[0] != '.' {
newDest := r.AbsolutePrefix
if link[0] != '/' {
newDest += "/"
}
newDest += string(link)
return []byte(newDest)
}
return link
}
func appendLinkAttrs(attrs []string, flags HTMLFlags, link []byte) []string {
if isRelativeLink(link) {
return attrs
}
val := []string{}
if flags&NofollowLinks != 0 {
val = append(val, "nofollow")
}
if flags&NoreferrerLinks != 0 {
val = append(val, "noreferrer")
}
if flags&HrefTargetBlank != 0 {
attrs = append(attrs, "target=\"_blank\"")
}
if len(val) == 0 {
return attrs
}
attr := fmt.Sprintf("rel=%q", strings.Join(val, " "))
return append(attrs, attr)
}
func isMailto(link []byte) bool {
return bytes.HasPrefix(link, []byte("mailto:"))
}
func needSkipLink(flags HTMLFlags, dest []byte) bool {
if flags&SkipLinks != 0 {
return true
}
return flags&Safelink != 0 && !isSafeLink(dest) && !isMailto(dest)
}
func isSmartypantable(node *Node) bool {
pt := node.Parent.Type
return pt != Link && pt != CodeBlock && pt != Code
}
func appendLanguageAttr(attrs []string, info []byte) []string {
if len(info) == 0 {
return attrs
}
endOfLang := bytes.IndexAny(info, "\t ")
if endOfLang < 0 {
endOfLang = len(info)
}
return append(attrs, fmt.Sprintf("class=\"language-%s\"", info[:endOfLang]))
}
func (r *HTMLRenderer) tag(w io.Writer, name []byte, attrs []string) {
w.Write(name)
if len(attrs) > 0 {
w.Write(spaceBytes)
w.Write([]byte(strings.Join(attrs, " ")))
}
w.Write(gtBytes)
r.lastOutputLen = 1
}
func footnoteRef(prefix string, node *Node) []byte {
urlFrag := prefix + string(slugify(node.Destination))
anchor := fmt.Sprintf(`<a rel="footnote" href="#fn:%s">%d</a>`, urlFrag, node.NoteID)
return []byte(fmt.Sprintf(`<sup class="footnote-ref" id="fnref:%s">%s</sup>`, urlFrag, anchor))
}
func footnoteItem(prefix string, slug []byte) []byte {
return []byte(fmt.Sprintf(`<li id="fn:%s%s">`, prefix, slug))
}
func footnoteReturnLink(prefix, returnLink string, slug []byte) []byte {
const format = ` <a class="footnote-return" href="#fnref:%s%s">%s</a>`
return []byte(fmt.Sprintf(format, prefix, slug, returnLink))
}
func itemOpenCR(node *Node) bool {
if node.Prev == nil {
return false
}
ld := node.Parent.ListData
return !ld.Tight && ld.ListFlags&ListTypeDefinition == 0
}
func skipParagraphTags(node *Node) bool {
grandparent := node.Parent.Parent
if grandparent == nil || grandparent.Type != List {
return false
}
tightOrTerm := grandparent.Tight || node.Parent.ListFlags&ListTypeTerm != 0
return grandparent.Type == List && tightOrTerm
}
func cellAlignment(align CellAlignFlags) string {
switch align {
case TableAlignmentLeft:
return "left"
case TableAlignmentRight:
return "right"
case TableAlignmentCenter:
return "center"
default:
return ""
}
}
func (r *HTMLRenderer) out(w io.Writer, text []byte) {
if r.disableTags > 0 {
w.Write(htmlTagRe.ReplaceAll(text, []byte{}))
} else {
w.Write(text)
}
r.lastOutputLen = len(text)
}
func (r *HTMLRenderer) cr(w io.Writer) {
if r.lastOutputLen > 0 {
r.out(w, nlBytes)
}
}
var (
nlBytes = []byte{'\n'}
gtBytes = []byte{'>'}
spaceBytes = []byte{' '}
)
var (
brTag = []byte("<br>")
brXHTMLTag = []byte("<br />")
emTag = []byte("<em>")
emCloseTag = []byte("</em>")
strongTag = []byte("<strong>")
strongCloseTag = []byte("</strong>")
delTag = []byte("<del>")
delCloseTag = []byte("</del>")
ttTag = []byte("<tt>")
ttCloseTag = []byte("</tt>")
aTag = []byte("<a")
aCloseTag = []byte("</a>")
preTag = []byte("<pre>")
preCloseTag = []byte("</pre>")
codeTag = []byte("<code>")
codeCloseTag = []byte("</code>")
pTag = []byte("<p>")
pCloseTag = []byte("</p>")
blockquoteTag = []byte("<blockquote>")
blockquoteCloseTag = []byte("</blockquote>")
hrTag = []byte("<hr>")
hrXHTMLTag = []byte("<hr />")
ulTag = []byte("<ul>")
ulCloseTag = []byte("</ul>")
olTag = []byte("<ol>")
olCloseTag = []byte("</ol>")
dlTag = []byte("<dl>")
dlCloseTag = []byte("</dl>")
liTag = []byte("<li>")
liCloseTag = []byte("</li>")
ddTag = []byte("<dd>")
ddCloseTag = []byte("</dd>")
dtTag = []byte("<dt>")
dtCloseTag = []byte("</dt>")
tableTag = []byte("<table>")
tableCloseTag = []byte("</table>")
tdTag = []byte("<td")
tdCloseTag = []byte("</td>")
thTag = []byte("<th")
thCloseTag = []byte("</th>")
theadTag = []byte("<thead>")
theadCloseTag = []byte("</thead>")
tbodyTag = []byte("<tbody>")
tbodyCloseTag = []byte("</tbody>")
trTag = []byte("<tr>")
trCloseTag = []byte("</tr>")
h1Tag = []byte("<h1")
h1CloseTag = []byte("</h1>")
h2Tag = []byte("<h2")
h2CloseTag = []byte("</h2>")
h3Tag = []byte("<h3")
h3CloseTag = []byte("</h3>")
h4Tag = []byte("<h4")
h4CloseTag = []byte("</h4>")
h5Tag = []byte("<h5")
h5CloseTag = []byte("</h5>")
h6Tag = []byte("<h6")
h6CloseTag = []byte("</h6>")
footnotesDivBytes = []byte("\n<div class=\"footnotes\">\n\n")
footnotesCloseDivBytes = []byte("\n</div>\n")
)
func headingTagsFromLevel(level int) ([]byte, []byte) {
switch level {
case 1:
return h1Tag, h1CloseTag
case 2:
return h2Tag, h2CloseTag
case 3:
return h3Tag, h3CloseTag
case 4:
return h4Tag, h4CloseTag
case 5:
return h5Tag, h5CloseTag
default:
return h6Tag, h6CloseTag
}
}
func (r *HTMLRenderer) outHRTag(w io.Writer) {
if r.Flags&UseXHTML == 0 {
r.out(w, hrTag)
} else {
r.out(w, hrXHTMLTag)
}
}
// RenderNode is a default renderer of a single node of a syntax tree. For
// block nodes it will be called twice: first time with entering=true, second
// time with entering=false, so that it could know when it's working on an open
// tag and when on close. It writes the result to w.
//
// The return value is a way to tell the calling walker to adjust its walk
// pattern: e.g. it can terminate the traversal by returning Terminate. Or it
// can ask the walker to skip a subtree of this node by returning SkipChildren.
// The typical behavior is to return GoToNext, which asks for the usual
// traversal to the next node.
func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkStatus {
attrs := []string{}
switch node.Type {
case Text:
if r.Flags&Smartypants != 0 {
var tmp bytes.Buffer
escapeHTML(&tmp, node.Literal)
r.sr.Process(w, tmp.Bytes())
} else {
if node.Parent.Type == Link {
escLink(w, node.Literal)
} else {
escapeHTML(w, node.Literal)
}
}
case Softbreak:
r.cr(w)
// TODO: make it configurable via out(renderer.softbreak)
case Hardbreak:
if r.Flags&UseXHTML == 0 {
r.out(w, brTag)
} else {
r.out(w, brXHTMLTag)
}
r.cr(w)
case Emph:
if entering {
r.out(w, emTag)
} else {
r.out(w, emCloseTag)
}
case Strong:
if entering {
r.out(w, strongTag)
} else {
r.out(w, strongCloseTag)
}
case Del:
if entering {
r.out(w, delTag)
} else {
r.out(w, delCloseTag)
}
case HTMLSpan:
if r.Flags&SkipHTML != 0 {
break
}
r.out(w, node.Literal)
case Link:
// mark it but don't link it if it is not a safe link: no smartypants
dest := node.LinkData.Destination
if needSkipLink(r.Flags, dest) {
if entering {
r.out(w, ttTag)
} else {
r.out(w, ttCloseTag)
}
} else {
if entering {
dest = r.addAbsPrefix(dest)
var hrefBuf bytes.Buffer
hrefBuf.WriteString("href=\"")
escLink(&hrefBuf, dest)
hrefBuf.WriteByte('"')
attrs = append(attrs, hrefBuf.String())
if node.NoteID != 0 {
r.out(w, footnoteRef(r.FootnoteAnchorPrefix, node))
break
}
attrs = appendLinkAttrs(attrs, r.Flags, dest)
if len(node.LinkData.Title) > 0 {
var titleBuff bytes.Buffer
titleBuff.WriteString("title=\"")
escapeHTML(&titleBuff, node.LinkData.Title)
titleBuff.WriteByte('"')
attrs = append(attrs, titleBuff.String())
}
r.tag(w, aTag, attrs)
} else {
if node.NoteID != 0 {
break
}
r.out(w, aCloseTag)
}
}
case Image:
if r.Flags&SkipImages != 0 {
return SkipChildren
}
if entering {
dest := node.LinkData.Destination
dest = r.addAbsPrefix(dest)
if r.disableTags == 0 {
//if options.safe && potentiallyUnsafe(dest) {
//out(w, `<img src="" alt="`)
//} else {
r.out(w, []byte(`<img src="`))
escLink(w, dest)
r.out(w, []byte(`" alt="`))
//}
}
r.disableTags++
} else {
r.disableTags--
if r.disableTags == 0 {
if node.LinkData.Title != nil {
r.out(w, []byte(`" title="`))
escapeHTML(w, node.LinkData.Title)
}
r.out(w, []byte(`" />`))
}
}
case Code:
r.out(w, codeTag)
escapeHTML(w, node.Literal)
r.out(w, codeCloseTag)
case Document:
break
case Paragraph:
if skipParagraphTags(node) {
break
}
if entering {
// TODO: untangle this clusterfuck about when the newlines need
// to be added and when not.
if node.Prev != nil {
switch node.Prev.Type {
case HTMLBlock, List, Paragraph, Heading, CodeBlock, BlockQuote, HorizontalRule:
r.cr(w)
}
}
if node.Parent.Type == BlockQuote && node.Prev == nil {
r.cr(w)
}
r.out(w, pTag)
} else {
r.out(w, pCloseTag)
if !(node.Parent.Type == Item && node.Next == nil) {
r.cr(w)
}
}
case BlockQuote:
if entering {
r.cr(w)
r.out(w, blockquoteTag)
} else {
r.out(w, blockquoteCloseTag)
r.cr(w)
}
case HTMLBlock:
if r.Flags&SkipHTML != 0 {
break
}
r.cr(w)
r.out(w, node.Literal)
r.cr(w)
case Heading:
openTag, closeTag := headingTagsFromLevel(node.Level)
if entering {
if node.IsTitleblock {
attrs = append(attrs, `class="title"`)
}
if node.HeadingID != "" {
id := r.ensureUniqueHeadingID(node.HeadingID)
if r.HeadingIDPrefix != "" {
id = r.HeadingIDPrefix + id
}
if r.HeadingIDSuffix != "" {
id = id + r.HeadingIDSuffix
}
attrs = append(attrs, fmt.Sprintf(`id="%s"`, id))
}
r.cr(w)
r.tag(w, openTag, attrs)
} else {
r.out(w, closeTag)
if !(node.Parent.Type == Item && node.Next == nil) {
r.cr(w)
}
}
case HorizontalRule:
r.cr(w)
r.outHRTag(w)
r.cr(w)
case List:
openTag := ulTag
closeTag := ulCloseTag
if node.ListFlags&ListTypeOrdered != 0 {
openTag = olTag
closeTag = olCloseTag
}
if node.ListFlags&ListTypeDefinition != 0 {
openTag = dlTag
closeTag = dlCloseTag
}
if entering {
if node.IsFootnotesList {
r.out(w, footnotesDivBytes)
r.outHRTag(w)
r.cr(w)
}
r.cr(w)
if node.Parent.Type == Item && node.Parent.Parent.Tight {
r.cr(w)
}
r.tag(w, openTag[:len(openTag)-1], attrs)
r.cr(w)
} else {
r.out(w, closeTag)
//cr(w)
//if node.parent.Type != Item {
// cr(w)
//}
if node.Parent.Type == Item && node.Next != nil {
r.cr(w)
}
if node.Parent.Type == Document || node.Parent.Type == BlockQuote {
r.cr(w)
}
if node.IsFootnotesList {
r.out(w, footnotesCloseDivBytes)
}
}
case Item:
openTag := liTag
closeTag := liCloseTag
if node.ListFlags&ListTypeDefinition != 0 {
openTag = ddTag
closeTag = ddCloseTag
}
if node.ListFlags&ListTypeTerm != 0 {
openTag = dtTag
closeTag = dtCloseTag
}
if entering {
if itemOpenCR(node) {
r.cr(w)
}
if node.ListData.RefLink != nil {
slug := slugify(node.ListData.RefLink)
r.out(w, footnoteItem(r.FootnoteAnchorPrefix, slug))
break
}
r.out(w, openTag)
} else {
if node.ListData.RefLink != nil {
slug := slugify(node.ListData.RefLink)
if r.Flags&FootnoteReturnLinks != 0 {
r.out(w, footnoteReturnLink(r.FootnoteAnchorPrefix, r.FootnoteReturnLinkContents, slug))
}
}
r.out(w, closeTag)
r.cr(w)
}
case CodeBlock:
attrs = appendLanguageAttr(attrs, node.Info)
r.cr(w)
r.out(w, preTag)
r.tag(w, codeTag[:len(codeTag)-1], attrs)
escapeHTML(w, node.Literal)
r.out(w, codeCloseTag)
r.out(w, preCloseTag)
if node.Parent.Type != Item {
r.cr(w)
}
case Table:
if entering {
r.cr(w)
r.out(w, tableTag)
} else {
r.out(w, tableCloseTag)
r.cr(w)
}
case TableCell:
openTag := tdTag
closeTag := tdCloseTag
if node.IsHeader {
openTag = thTag
closeTag = thCloseTag
}
if entering {
align := cellAlignment(node.Align)
if align != "" {
attrs = append(attrs, fmt.Sprintf(`align="%s"`, align))
}
if node.Prev == nil {
r.cr(w)
}
r.tag(w, openTag, attrs)
} else {
r.out(w, closeTag)
r.cr(w)
}
case TableHead:
if entering {
r.cr(w)
r.out(w, theadTag)
} else {
r.out(w, theadCloseTag)
r.cr(w)
}
case TableBody:
if entering {
r.cr(w)
r.out(w, tbodyTag)
// XXX: this is to adhere to a rather silly test. Should fix test.
if node.FirstChild == nil {
r.cr(w)
}
} else {
r.out(w, tbodyCloseTag)
r.cr(w)
}
case TableRow:
if entering {
r.cr(w)
r.out(w, trTag)
} else {
r.out(w, trCloseTag)
r.cr(w)
}
default:
panic("Unknown node type " + node.Type.String())
}
return GoToNext
}
// RenderHeader writes HTML document preamble and TOC if requested.
func (r *HTMLRenderer) RenderHeader(w io.Writer, ast *Node) {
r.writeDocumentHeader(w)
if r.Flags&TOC != 0 {
r.writeTOC(w, ast)
}
}
// RenderFooter writes HTML document footer.
func (r *HTMLRenderer) RenderFooter(w io.Writer, ast *Node) {
if r.Flags&CompletePage == 0 {
return
}
io.WriteString(w, "\n</body>\n</html>\n")
}
func (r *HTMLRenderer) writeDocumentHeader(w io.Writer) {
if r.Flags&CompletePage == 0 {
return
}
ending := ""
if r.Flags&UseXHTML != 0 {
io.WriteString(w, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
io.WriteString(w, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
io.WriteString(w, "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
ending = " /"
} else {
io.WriteString(w, "<!DOCTYPE html>\n")
io.WriteString(w, "<html>\n")
}
io.WriteString(w, "<head>\n")
io.WriteString(w, " <title>")
if r.Flags&Smartypants != 0 {
r.sr.Process(w, []byte(r.Title))
} else {
escapeHTML(w, []byte(r.Title))
}
io.WriteString(w, "</title>\n")
io.WriteString(w, " <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
io.WriteString(w, Version)
io.WriteString(w, "\"")
io.WriteString(w, ending)
io.WriteString(w, ">\n")
io.WriteString(w, " <meta charset=\"utf-8\"")
io.WriteString(w, ending)
io.WriteString(w, ">\n")
if r.CSS != "" {
io.WriteString(w, " <link rel=\"stylesheet\" type=\"text/css\" href=\"")
escapeHTML(w, []byte(r.CSS))
io.WriteString(w, "\"")
io.WriteString(w, ending)
io.WriteString(w, ">\n")
}
if r.Icon != "" {
io.WriteString(w, " <link rel=\"icon\" type=\"image/x-icon\" href=\"")
escapeHTML(w, []byte(r.Icon))
io.WriteString(w, "\"")
io.WriteString(w, ending)
io.WriteString(w, ">\n")
}
io.WriteString(w, "</head>\n")
io.WriteString(w, "<body>\n\n")
}
func (r *HTMLRenderer) writeTOC(w io.Writer, ast *Node) {
buf := bytes.Buffer{}
inHeading := false
tocLevel := 0
headingCount := 0
ast.Walk(func(node *Node, entering bool) WalkStatus {
if node.Type == Heading && !node.HeadingData.IsTitleblock {
inHeading = entering
if entering {
node.HeadingID = fmt.Sprintf("toc_%d", headingCount)
if node.Level == tocLevel {
buf.WriteString("</li>\n\n<li>")
} else if node.Level < tocLevel {
for node.Level < tocLevel {
tocLevel--
buf.WriteString("</li>\n</ul>")
}
buf.WriteString("</li>\n\n<li>")
} else {
for node.Level > tocLevel {
tocLevel++
buf.WriteString("\n<ul>\n<li>")
}
}
fmt.Fprintf(&buf, `<a href="#toc_%d">`, headingCount)
headingCount++
} else {
buf.WriteString("</a>")
}
return GoToNext
}
if inHeading {
return r.RenderNode(&buf, node, entering)
}
return GoToNext
})
for ; tocLevel > 0; tocLevel-- {
buf.WriteString("</li>\n</ul>")
}
if buf.Len() > 0 {
io.WriteString(w, "<nav>\n")
w.Write(buf.Bytes())
io.WriteString(w, "\n\n</nav>\n")
}
r.lastOutputLen = buf.Len()
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,940 @@
// Blackfriday Markdown Processor
// Available at http://github.com/russross/blackfriday
//
// Copyright © 2011 Russ Ross <russ@russross.com>.
// Distributed under the Simplified BSD License.
// See README.md for details.
package blackfriday
import (
"bytes"
"fmt"
"io"
"strings"
"unicode/utf8"
)
//
// Markdown parsing and processing
//
// Version string of the package. Appears in the rendered document when
// CompletePage flag is on.
const Version = "2.0"
// Extensions is a bitwise or'ed collection of enabled Blackfriday's
// extensions.
type Extensions int
// These are the supported markdown parsing extensions.
// OR these values together to select multiple extensions.
const (
NoExtensions Extensions = 0
NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words
Tables // Render tables
FencedCode // Render fenced code blocks
Autolink // Detect embedded URLs that are not explicitly marked
Strikethrough // Strikethrough text using ~~test~~
LaxHTMLBlocks // Loosen up HTML block parsing rules
SpaceHeadings // Be strict about prefix heading rules
HardLineBreak // Translate newlines into line breaks
TabSizeEight // Expand tabs to eight spaces instead of four
Footnotes // Pandoc-style footnotes
NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
HeadingIDs // specify heading IDs with {#id}
Titleblock // Titleblock ala pandoc
AutoHeadingIDs // Create the heading ID from the text
BackslashLineBreak // Translate trailing backslashes into line breaks
DefinitionLists // Render definition lists
CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants |
SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
Autolink | Strikethrough | SpaceHeadings | HeadingIDs |
BackslashLineBreak | DefinitionLists
)
// ListType contains bitwise or'ed flags for list and list item objects.
type ListType int
// These are the possible flag values for the ListItem renderer.
// Multiple flag values may be ORed together.
// These are mostly of interest if you are writing a new output format.
const (
ListTypeOrdered ListType = 1 << iota
ListTypeDefinition
ListTypeTerm
ListItemContainsBlock
ListItemBeginningOfList // TODO: figure out if this is of any use now
ListItemEndOfList
)
// CellAlignFlags holds a type of alignment in a table cell.
type CellAlignFlags int
// These are the possible flag values for the table cell renderer.
// Only a single one of these values will be used; they are not ORed together.
// These are mostly of interest if you are writing a new output format.
const (
TableAlignmentLeft CellAlignFlags = 1 << iota
TableAlignmentRight
TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
)
// The size of a tab stop.
const (
TabSizeDefault = 4
TabSizeDouble = 8
)
// blockTags is a set of tags that are recognized as HTML block tags.
// Any of these can be included in markdown text without special escaping.
var blockTags = map[string]struct{}{
"blockquote": struct{}{},
"del": struct{}{},
"div": struct{}{},
"dl": struct{}{},
"fieldset": struct{}{},
"form": struct{}{},
"h1": struct{}{},
"h2": struct{}{},
"h3": struct{}{},
"h4": struct{}{},
"h5": struct{}{},
"h6": struct{}{},
"iframe": struct{}{},
"ins": struct{}{},
"math": struct{}{},
"noscript": struct{}{},
"ol": struct{}{},
"pre": struct{}{},
"p": struct{}{},
"script": struct{}{},
"style": struct{}{},
"table": struct{}{},
"ul": struct{}{},
// HTML5
"address": struct{}{},
"article": struct{}{},
"aside": struct{}{},
"canvas": struct{}{},
"figcaption": struct{}{},
"figure": struct{}{},
"footer": struct{}{},
"header": struct{}{},
"hgroup": struct{}{},
"main": struct{}{},
"nav": struct{}{},
"output": struct{}{},
"progress": struct{}{},
"section": struct{}{},
"video": struct{}{},
}
// Renderer is the rendering interface. This is mostly of interest if you are
// implementing a new rendering format.
//
// Only an HTML implementation is provided in this repository, see the README
// for external implementations.
type Renderer interface {
// RenderNode is the main rendering method. It will be called once for
// every leaf node and twice for every non-leaf node (first with
// entering=true, then with entering=false). The method should write its
// rendition of the node to the supplied writer w.
RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
// RenderHeader is a method that allows the renderer to produce some
// content preceding the main body of the output document. The header is
// understood in the broad sense here. For example, the default HTML
// renderer will write not only the HTML document preamble, but also the
// table of contents if it was requested.
//
// The method will be passed an entire document tree, in case a particular
// implementation needs to inspect it to produce output.
//
// The output should be written to the supplied writer w. If your
// implementation has no header to write, supply an empty implementation.
RenderHeader(w io.Writer, ast *Node)
// RenderFooter is a symmetric counterpart of RenderHeader.
RenderFooter(w io.Writer, ast *Node)
}
// Callback functions for inline parsing. One such function is defined
// for each character that triggers a response when parsing inline data.
type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node)
// Markdown is a type that holds extensions and the runtime state used by
// Parse, and the renderer. You can not use it directly, construct it with New.
type Markdown struct {
renderer Renderer
referenceOverride ReferenceOverrideFunc
refs map[string]*reference
inlineCallback [256]inlineParser
extensions Extensions
nesting int
maxNesting int
insideLink bool
// Footnotes need to be ordered as well as available to quickly check for
// presence. If a ref is also a footnote, it's stored both in refs and here
// in notes. Slice is nil if footnotes not enabled.
notes []*reference
doc *Node
tip *Node // = doc
oldTip *Node
lastMatchedContainer *Node // = doc
allClosed bool
}
func (p *Markdown) getRef(refid string) (ref *reference, found bool) {
if p.referenceOverride != nil {
r, overridden := p.referenceOverride(refid)
if overridden {
if r == nil {
return nil, false
}
return &reference{
link: []byte(r.Link),
title: []byte(r.Title),
noteID: 0,
hasBlock: false,
text: []byte(r.Text)}, true
}
}
// refs are case insensitive
ref, found = p.refs[strings.ToLower(refid)]
return ref, found
}
func (p *Markdown) finalize(block *Node) {
above := block.Parent
block.open = false
p.tip = above
}
func (p *Markdown) addChild(node NodeType, offset uint32) *Node {
return p.addExistingChild(NewNode(node), offset)
}
func (p *Markdown) addExistingChild(node *Node, offset uint32) *Node {
for !p.tip.canContain(node.Type) {
p.finalize(p.tip)
}
p.tip.AppendChild(node)
p.tip = node
return node
}
func (p *Markdown) closeUnmatchedBlocks() {
if !p.allClosed {
for p.oldTip != p.lastMatchedContainer {
parent := p.oldTip.Parent
p.finalize(p.oldTip)
p.oldTip = parent
}
p.allClosed = true
}
}
//
//
// Public interface
//
//
// Reference represents the details of a link.
// See the documentation in Options for more details on use-case.
type Reference struct {
// Link is usually the URL the reference points to.
Link string
// Title is the alternate text describing the link in more detail.
Title string
// Text is the optional text to override the ref with if the syntax used was
// [refid][]
Text string
}
// ReferenceOverrideFunc is expected to be called with a reference string and
// return either a valid Reference type that the reference string maps to or
// nil. If overridden is false, the default reference logic will be executed.
// See the documentation in Options for more details on use-case.
type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
// New constructs a Markdown processor. You can use the same With* functions as
// for Run() to customize parser's behavior and the renderer.
func New(opts ...Option) *Markdown {
var p Markdown
for _, opt := range opts {
opt(&p)
}
p.refs = make(map[string]*reference)
p.maxNesting = 16
p.insideLink = false
docNode := NewNode(Document)
p.doc = docNode
p.tip = docNode
p.oldTip = docNode
p.lastMatchedContainer = docNode
p.allClosed = true
// register inline parsers
p.inlineCallback[' '] = maybeLineBreak
p.inlineCallback['*'] = emphasis
p.inlineCallback['_'] = emphasis
if p.extensions&Strikethrough != 0 {
p.inlineCallback['~'] = emphasis
}
p.inlineCallback['`'] = codeSpan
p.inlineCallback['\n'] = lineBreak
p.inlineCallback['['] = link
p.inlineCallback['<'] = leftAngle
p.inlineCallback['\\'] = escape
p.inlineCallback['&'] = entity
p.inlineCallback['!'] = maybeImage
p.inlineCallback['^'] = maybeInlineFootnote
if p.extensions&Autolink != 0 {
p.inlineCallback['h'] = maybeAutoLink
p.inlineCallback['m'] = maybeAutoLink
p.inlineCallback['f'] = maybeAutoLink
p.inlineCallback['H'] = maybeAutoLink
p.inlineCallback['M'] = maybeAutoLink
p.inlineCallback['F'] = maybeAutoLink
}
if p.extensions&Footnotes != 0 {
p.notes = make([]*reference, 0)
}
return &p
}
// Option customizes the Markdown processor's default behavior.
type Option func(*Markdown)
// WithRenderer allows you to override the default renderer.
func WithRenderer(r Renderer) Option {
return func(p *Markdown) {
p.renderer = r
}
}
// WithExtensions allows you to pick some of the many extensions provided by
// Blackfriday. You can bitwise OR them.
func WithExtensions(e Extensions) Option {
return func(p *Markdown) {
p.extensions = e
}
}
// WithNoExtensions turns off all extensions and custom behavior.
func WithNoExtensions() Option {
return func(p *Markdown) {
p.extensions = NoExtensions
p.renderer = NewHTMLRenderer(HTMLRendererParameters{
Flags: HTMLFlagsNone,
})
}
}
// WithRefOverride sets an optional function callback that is called every
// time a reference is resolved.
//
// In Markdown, the link reference syntax can be made to resolve a link to
// a reference instead of an inline URL, in one of the following ways:
//
// * [link text][refid]
// * [refid][]
//
// Usually, the refid is defined at the bottom of the Markdown document. If
// this override function is provided, the refid is passed to the override
// function first, before consulting the defined refids at the bottom. If
// the override function indicates an override did not occur, the refids at
// the bottom will be used to fill in the link details.
func WithRefOverride(o ReferenceOverrideFunc) Option {
return func(p *Markdown) {
p.referenceOverride = o
}
}
// Run is the main entry point to Blackfriday. It parses and renders a
// block of markdown-encoded text.
//
// The simplest invocation of Run takes one argument, input:
// output := Run(input)
// This will parse the input with CommonExtensions enabled and render it with
// the default HTMLRenderer (with CommonHTMLFlags).
//
// Variadic arguments opts can customize the default behavior. Since Markdown
// type does not contain exported fields, you can not use it directly. Instead,
// use the With* functions. For example, this will call the most basic
// functionality, with no extensions:
// output := Run(input, WithNoExtensions())
//
// You can use any number of With* arguments, even contradicting ones. They
// will be applied in order of appearance and the latter will override the
// former:
// output := Run(input, WithNoExtensions(), WithExtensions(exts),
// WithRenderer(yourRenderer))
func Run(input []byte, opts ...Option) []byte {
r := NewHTMLRenderer(HTMLRendererParameters{
Flags: CommonHTMLFlags,
})
optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)}
optList = append(optList, opts...)
parser := New(optList...)
ast := parser.Parse(input)
var buf bytes.Buffer
parser.renderer.RenderHeader(&buf, ast)
ast.Walk(func(node *Node, entering bool) WalkStatus {
return parser.renderer.RenderNode(&buf, node, entering)
})
parser.renderer.RenderFooter(&buf, ast)
return buf.Bytes()
}
// Parse is an entry point to the parsing part of Blackfriday. It takes an
// input markdown document and produces a syntax tree for its contents. This
// tree can then be rendered with a default or custom renderer, or
// analyzed/transformed by the caller to whatever non-standard needs they have.
// The return value is the root node of the syntax tree.
func (p *Markdown) Parse(input []byte) *Node {
p.block(input)
// Walk the tree and finish up some of unfinished blocks
for p.tip != nil {
p.finalize(p.tip)
}
// Walk the tree again and process inline markdown in each block
p.doc.Walk(func(node *Node, entering bool) WalkStatus {
if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell {
p.inline(node, node.content)
node.content = nil
}
return GoToNext
})
p.parseRefsToAST()
return p.doc
}
func (p *Markdown) parseRefsToAST() {
if p.extensions&Footnotes == 0 || len(p.notes) == 0 {
return
}
p.tip = p.doc
block := p.addBlock(List, nil)
block.IsFootnotesList = true
block.ListFlags = ListTypeOrdered
flags := ListItemBeginningOfList
// Note: this loop is intentionally explicit, not range-form. This is
// because the body of the loop will append nested footnotes to p.notes and
// we need to process those late additions. Range form would only walk over
// the fixed initial set.
for i := 0; i < len(p.notes); i++ {
ref := p.notes[i]
p.addExistingChild(ref.footnote, 0)
block := ref.footnote
block.ListFlags = flags | ListTypeOrdered
block.RefLink = ref.link
if ref.hasBlock {
flags |= ListItemContainsBlock
p.block(ref.title)
} else {
p.inline(block, ref.title)
}
flags &^= ListItemBeginningOfList | ListItemContainsBlock
}
above := block.Parent
finalizeList(block)
p.tip = above
block.Walk(func(node *Node, entering bool) WalkStatus {
if node.Type == Paragraph || node.Type == Heading {
p.inline(node, node.content)
node.content = nil
}
return GoToNext
})
}
//
// Link references
//
// This section implements support for references that (usually) appear
// as footnotes in a document, and can be referenced anywhere in the document.
// The basic format is:
//
// [1]: http://www.google.com/ "Google"
// [2]: http://www.github.com/ "Github"
//
// Anywhere in the document, the reference can be linked by referring to its
// label, i.e., 1 and 2 in this example, as in:
//
// This library is hosted on [Github][2], a git hosting site.
//
// Actual footnotes as specified in Pandoc and supported by some other Markdown
// libraries such as php-markdown are also taken care of. They look like this:
//
// This sentence needs a bit of further explanation.[^note]
//
// [^note]: This is the explanation.
//
// Footnotes should be placed at the end of the document in an ordered list.
// Inline footnotes such as:
//
// Inline footnotes^[Not supported.] also exist.
//
// are not yet supported.
// reference holds all information necessary for a reference-style links or
// footnotes.
//
// Consider this markdown with reference-style links:
//
// [link][ref]
//
// [ref]: /url/ "tooltip title"
//
// It will be ultimately converted to this HTML:
//
// <p><a href=\"/url/\" title=\"title\">link</a></p>
//
// And a reference structure will be populated as follows:
//
// p.refs["ref"] = &reference{
// link: "/url/",
// title: "tooltip title",
// }
//
// Alternatively, reference can contain information about a footnote. Consider
// this markdown:
//
// Text needing a footnote.[^a]
//
// [^a]: This is the note
//
// A reference structure will be populated as follows:
//
// p.refs["a"] = &reference{
// link: "a",
// title: "This is the note",
// noteID: <some positive int>,
// }
//
// TODO: As you can see, it begs for splitting into two dedicated structures
// for refs and for footnotes.
type reference struct {
link []byte
title []byte
noteID int // 0 if not a footnote ref
hasBlock bool
footnote *Node // a link to the Item node within a list of footnotes
text []byte // only gets populated by refOverride feature with Reference.Text
}
func (r *reference) String() string {
return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
r.link, r.title, r.text, r.noteID, r.hasBlock)
}
// Check whether or not data starts with a reference link.
// If so, it is parsed and stored in the list of references
// (in the render struct).
// Returns the number of bytes to skip to move past it,
// or zero if the first line is not a reference.
func isReference(p *Markdown, data []byte, tabSize int) int {
// up to 3 optional leading spaces
if len(data) < 4 {
return 0
}
i := 0
for i < 3 && data[i] == ' ' {
i++
}
noteID := 0
// id part: anything but a newline between brackets
if data[i] != '[' {
return 0
}
i++
if p.extensions&Footnotes != 0 {
if i < len(data) && data[i] == '^' {
// we can set it to anything here because the proper noteIds will
// be assigned later during the second pass. It just has to be != 0
noteID = 1
i++
}
}
idOffset := i
for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
i++
}
if i >= len(data) || data[i] != ']' {
return 0
}
idEnd := i
// footnotes can have empty ID, like this: [^], but a reference can not be
// empty like this: []. Break early if it's not a footnote and there's no ID
if noteID == 0 && idOffset == idEnd {
return 0
}
// spacer: colon (space | tab)* newline? (space | tab)*
i++
if i >= len(data) || data[i] != ':' {
return 0
}
i++
for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
i++
}
if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
i++
if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
i++
}
}
for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
i++
}
if i >= len(data) {
return 0
}
var (
linkOffset, linkEnd int
titleOffset, titleEnd int
lineEnd int
raw []byte
hasBlock bool
)
if p.extensions&Footnotes != 0 && noteID != 0 {
linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
lineEnd = linkEnd
} else {
linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
}
if lineEnd == 0 {
return 0
}
// a valid ref has been found
ref := &reference{
noteID: noteID,
hasBlock: hasBlock,
}
if noteID > 0 {
// reusing the link field for the id since footnotes don't have links
ref.link = data[idOffset:idEnd]
// if footnote, it's not really a title, it's the contained text
ref.title = raw
} else {
ref.link = data[linkOffset:linkEnd]
ref.title = data[titleOffset:titleEnd]
}
// id matches are case-insensitive
id := string(bytes.ToLower(data[idOffset:idEnd]))
p.refs[id] = ref
return lineEnd
}
func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
// link: whitespace-free sequence, optionally between angle brackets
if data[i] == '<' {
i++
}
linkOffset = i
for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
i++
}
linkEnd = i
if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
linkOffset++
linkEnd--
}
// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
i++
}
if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
return
}
// compute end-of-line
if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
lineEnd = i
}
if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
lineEnd++
}
// optional (space|tab)* spacer after a newline
if lineEnd > 0 {
i = lineEnd + 1
for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
i++
}
}
// optional title: any non-newline sequence enclosed in '"() alone on its line
if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
i++
titleOffset = i
// look for EOL
for i < len(data) && data[i] != '\n' && data[i] != '\r' {
i++
}
if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
titleEnd = i + 1
} else {
titleEnd = i
}
// step back
i--
for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
i--
}
if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
lineEnd = titleEnd
titleEnd = i
}
}
return
}
// The first bit of this logic is the same as Parser.listItem, but the rest
// is much simpler. This function simply finds the entire block and shifts it
// over by one tab if it is indeed a block (just returns the line if it's not).
// blockEnd is the end of the section in the input buffer, and contents is the
// extracted text that was shifted over one tab. It will need to be rendered at
// the end of the document.
func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
if i == 0 || len(data) == 0 {
return
}
// skip leading whitespace on first line
for i < len(data) && data[i] == ' ' {
i++
}
blockStart = i
// find the end of the line
blockEnd = i
for i < len(data) && data[i-1] != '\n' {
i++
}
// get working buffer
var raw bytes.Buffer
// put the first line into the working buffer
raw.Write(data[blockEnd:i])
blockEnd = i
// process the following lines
containsBlankLine := false
gatherLines:
for blockEnd < len(data) {
i++
// find the end of this line
for i < len(data) && data[i-1] != '\n' {
i++
}
// if it is an empty line, guess that it is part of this item
// and move on to the next line
if p.isEmpty(data[blockEnd:i]) > 0 {
containsBlankLine = true
blockEnd = i
continue
}
n := 0
if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
// this is the end of the block.
// we don't want to include this last line in the index.
break gatherLines
}
// if there were blank lines before this one, insert a new one now
if containsBlankLine {
raw.WriteByte('\n')
containsBlankLine = false
}
// get rid of that first tab, write to buffer
raw.Write(data[blockEnd+n : i])
hasBlock = true
blockEnd = i
}
if data[blockEnd-1] != '\n' {
raw.WriteByte('\n')
}
contents = raw.Bytes()
return
}
//
//
// Miscellaneous helper functions
//
//
// Test if a character is a punctuation symbol.
// Taken from a private function in regexp in the stdlib.
func ispunct(c byte) bool {
for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
if c == r {
return true
}
}
return false
}
// Test if a character is a whitespace character.
func isspace(c byte) bool {
return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
}
// Test if a character is letter.
func isletter(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
}
// Test if a character is a letter or a digit.
// TODO: check when this is looking for ASCII alnum and when it should use unicode
func isalnum(c byte) bool {
return (c >= '0' && c <= '9') || isletter(c)
}
// Replace tab characters with spaces, aligning to the next TAB_SIZE column.
// always ends output with a newline
func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
// first, check for common cases: no tabs, or only tabs at beginning of line
i, prefix := 0, 0
slowcase := false
for i = 0; i < len(line); i++ {
if line[i] == '\t' {
if prefix == i {
prefix++
} else {
slowcase = true
break
}
}
}
// no need to decode runes if all tabs are at the beginning of the line
if !slowcase {
for i = 0; i < prefix*tabSize; i++ {
out.WriteByte(' ')
}
out.Write(line[prefix:])
return
}
// the slow case: we need to count runes to figure out how
// many spaces to insert for each tab
column := 0
i = 0
for i < len(line) {
start := i
for i < len(line) && line[i] != '\t' {
_, size := utf8.DecodeRune(line[i:])
i += size
column++
}
if i > start {
out.Write(line[start:i])
}
if i >= len(line) {
break
}
for {
out.WriteByte(' ')
column++
if column%tabSize == 0 {
break
}
}
i++
}
}
// Find if a line counts as indented or not.
// Returns number of characters the indent is (0 = not indented).
func isIndented(data []byte, indentSize int) int {
if len(data) == 0 {
return 0
}
if data[0] == '\t' {
return 1
}
if len(data) < indentSize {
return 0
}
for i := 0; i < indentSize; i++ {
if data[i] != ' ' {
return 0
}
}
return indentSize
}
// Create a url-safe slug for fragments
func slugify(in []byte) []byte {
if len(in) == 0 {
return in
}
out := make([]byte, 0, len(in))
sym := false
for _, ch := range in {
if isalnum(ch) {
sym = false
out = append(out, ch)
} else if sym {
continue
} else {
out = append(out, '-')
sym = true
}
}
var a, b int
var ch byte
for a, ch = range out {
if ch != '-' {
break
}
}
for b = len(out) - 1; b > 0; b-- {
if out[b] != '-' {
break
}
}
return out[a : b+1]
}

View File

@ -0,0 +1,38 @@
//
// Blackfriday Markdown Processor
// Available at http://github.com/russross/blackfriday
//
// Copyright © 2011 Russ Ross <russ@russross.com>.
// Distributed under the Simplified BSD License.
// See README.md for details.
//
//
// Unit tests for full document parsing and rendering
//
package blackfriday
import "testing"
func TestDocument(t *testing.T) {
var tests = []string{
// Empty document.
"",
"",
" ",
"",
// This shouldn't panic.
// https://github.com/russross/blackfriday/issues/172
"[]:<",
"<p>[]:&lt;</p>\n",
// This shouldn't panic.
// https://github.com/russross/blackfriday/issues/173
" [",
"<p>[</p>\n",
}
doTests(t, tests)
}

View File

@ -0,0 +1,354 @@
package blackfriday
import (
"bytes"
"fmt"
)
// NodeType specifies a type of a single node of a syntax tree. Usually one
// node (and its type) corresponds to a single markdown feature, e.g. emphasis
// or code block.
type NodeType int
// Constants for identifying different types of nodes. See NodeType.
const (
Document NodeType = iota
BlockQuote
List
Item
Paragraph
Heading
HorizontalRule
Emph
Strong
Del
Link
Image
Text
HTMLBlock
CodeBlock
Softbreak
Hardbreak
Code
HTMLSpan
Table
TableCell
TableHead
TableBody
TableRow
)
var nodeTypeNames = []string{
Document: "Document",
BlockQuote: "BlockQuote",
List: "List",
Item: "Item",
Paragraph: "Paragraph",
Heading: "Heading",
HorizontalRule: "HorizontalRule",
Emph: "Emph",
Strong: "Strong",
Del: "Del",
Link: "Link",
Image: "Image",
Text: "Text",
HTMLBlock: "HTMLBlock",
CodeBlock: "CodeBlock",
Softbreak: "Softbreak",
Hardbreak: "Hardbreak",
Code: "Code",
HTMLSpan: "HTMLSpan",
Table: "Table",
TableCell: "TableCell",
TableHead: "TableHead",
TableBody: "TableBody",
TableRow: "TableRow",
}
func (t NodeType) String() string {
return nodeTypeNames[t]
}
// ListData contains fields relevant to a List and Item node type.
type ListData struct {
ListFlags ListType
Tight bool // Skip <p>s around list item data if true
BulletChar byte // '*', '+' or '-' in bullet lists
Delimiter byte // '.' or ')' after the number in ordered lists
RefLink []byte // If not nil, turns this list item into a footnote item and triggers different rendering
IsFootnotesList bool // This is a list of footnotes
}
// LinkData contains fields relevant to a Link node type.
type LinkData struct {
Destination []byte // Destination is what goes into a href
Title []byte // Title is the tooltip thing that goes in a title attribute
NoteID int // NoteID contains a serial number of a footnote, zero if it's not a footnote
Footnote *Node // If it's a footnote, this is a direct link to the footnote Node. Otherwise nil.
}
// CodeBlockData contains fields relevant to a CodeBlock node type.
type CodeBlockData struct {
IsFenced bool // Specifies whether it's a fenced code block or an indented one
Info []byte // This holds the info string
FenceChar byte
FenceLength int
FenceOffset int
}
// TableCellData contains fields relevant to a TableCell node type.
type TableCellData struct {
IsHeader bool // This tells if it's under the header row
Align CellAlignFlags // This holds the value for align attribute
}
// HeadingData contains fields relevant to a Heading node type.
type HeadingData struct {
Level int // This holds the heading level number
HeadingID string // This might hold heading ID, if present
IsTitleblock bool // Specifies whether it's a title block
}
// Node is a single element in the abstract syntax tree of the parsed document.
// It holds connections to the structurally neighboring nodes and, for certain
// types of nodes, additional information that might be needed when rendering.
type Node struct {
Type NodeType // Determines the type of the node
Parent *Node // Points to the parent
FirstChild *Node // Points to the first child, if any
LastChild *Node // Points to the last child, if any
Prev *Node // Previous sibling; nil if it's the first child
Next *Node // Next sibling; nil if it's the last child
Literal []byte // Text contents of the leaf nodes
HeadingData // Populated if Type is Heading
ListData // Populated if Type is List
CodeBlockData // Populated if Type is CodeBlock
LinkData // Populated if Type is Link
TableCellData // Populated if Type is TableCell
content []byte // Markdown content of the block nodes
open bool // Specifies an open block node that has not been finished to process yet
}
// NewNode allocates a node of a specified type.
func NewNode(typ NodeType) *Node {
return &Node{
Type: typ,
open: true,
}
}
func (n *Node) String() string {
ellipsis := ""
snippet := n.Literal
if len(snippet) > 16 {
snippet = snippet[:16]
ellipsis = "..."
}
return fmt.Sprintf("%s: '%s%s'", n.Type, snippet, ellipsis)
}
// Unlink removes node 'n' from the tree.
// It panics if the node is nil.
func (n *Node) Unlink() {
if n.Prev != nil {
n.Prev.Next = n.Next
} else if n.Parent != nil {
n.Parent.FirstChild = n.Next
}
if n.Next != nil {
n.Next.Prev = n.Prev
} else if n.Parent != nil {
n.Parent.LastChild = n.Prev
}
n.Parent = nil
n.Next = nil
n.Prev = nil
}
// AppendChild adds a node 'child' as a child of 'n'.
// It panics if either node is nil.
func (n *Node) AppendChild(child *Node) {
child.Unlink()
child.Parent = n
if n.LastChild != nil {
n.LastChild.Next = child
child.Prev = n.LastChild
n.LastChild = child
} else {
n.FirstChild = child
n.LastChild = child
}
}
// InsertBefore inserts 'sibling' immediately before 'n'.
// It panics if either node is nil.
func (n *Node) InsertBefore(sibling *Node) {
sibling.Unlink()
sibling.Prev = n.Prev
if sibling.Prev != nil {
sibling.Prev.Next = sibling
}
sibling.Next = n
n.Prev = sibling
sibling.Parent = n.Parent
if sibling.Prev == nil {
sibling.Parent.FirstChild = sibling
}
}
func (n *Node) isContainer() bool {
switch n.Type {
case Document:
fallthrough
case BlockQuote:
fallthrough
case List:
fallthrough
case Item:
fallthrough
case Paragraph:
fallthrough
case Heading:
fallthrough
case Emph:
fallthrough
case Strong:
fallthrough
case Del:
fallthrough
case Link:
fallthrough
case Image:
fallthrough
case Table:
fallthrough
case TableHead:
fallthrough
case TableBody:
fallthrough
case TableRow:
fallthrough
case TableCell:
return true
default:
return false
}
}
func (n *Node) canContain(t NodeType) bool {
if n.Type == List {
return t == Item
}
if n.Type == Document || n.Type == BlockQuote || n.Type == Item {
return t != Item
}
if n.Type == Table {
return t == TableHead || t == TableBody
}
if n.Type == TableHead || n.Type == TableBody {
return t == TableRow
}
if n.Type == TableRow {
return t == TableCell
}
return false
}
// WalkStatus allows NodeVisitor to have some control over the tree traversal.
// It is returned from NodeVisitor and different values allow Node.Walk to
// decide which node to go to next.
type WalkStatus int
const (
// GoToNext is the default traversal of every node.
GoToNext WalkStatus = iota
// SkipChildren tells walker to skip all children of current node.
SkipChildren
// Terminate tells walker to terminate the traversal.
Terminate
)
// NodeVisitor is a callback to be called when traversing the syntax tree.
// Called twice for every node: once with entering=true when the branch is
// first visited, then with entering=false after all the children are done.
type NodeVisitor func(node *Node, entering bool) WalkStatus
// Walk is a convenience method that instantiates a walker and starts a
// traversal of subtree rooted at n.
func (n *Node) Walk(visitor NodeVisitor) {
w := newNodeWalker(n)
for w.current != nil {
status := visitor(w.current, w.entering)
switch status {
case GoToNext:
w.next()
case SkipChildren:
w.entering = false
w.next()
case Terminate:
return
}
}
}
type nodeWalker struct {
current *Node
root *Node
entering bool
}
func newNodeWalker(root *Node) *nodeWalker {
return &nodeWalker{
current: root,
root: root,
entering: true,
}
}
func (nw *nodeWalker) next() {
if (!nw.current.isContainer() || !nw.entering) && nw.current == nw.root {
nw.current = nil
return
}
if nw.entering && nw.current.isContainer() {
if nw.current.FirstChild != nil {
nw.current = nw.current.FirstChild
nw.entering = true
} else {
nw.entering = false
}
} else if nw.current.Next == nil {
nw.current = nw.current.Parent
nw.entering = false
} else {
nw.current = nw.current.Next
nw.entering = true
}
}
func dump(ast *Node) {
fmt.Println(dumpString(ast))
}
func dumpR(ast *Node, depth int) string {
if ast == nil {
return ""
}
indent := bytes.Repeat([]byte("\t"), depth)
content := ast.Literal
if content == nil {
content = ast.content
}
result := fmt.Sprintf("%s%s(%q)\n", indent, ast.Type, content)
for n := ast.FirstChild; n != nil; n = n.Next {
result += dumpR(n, depth+1)
}
return result
}
func dumpString(ast *Node) string {
return dumpR(ast, 0)
}

View File

@ -0,0 +1,124 @@
//
// Blackfriday Markdown Processor
// Available at http://github.com/russross/blackfriday
//
// Copyright © 2011 Russ Ross <russ@russross.com>.
// Distributed under the Simplified BSD License.
// See README.md for details.
//
//
// Markdown 1.0.3 reference tests
//
package blackfriday
import (
"io/ioutil"
"path/filepath"
"testing"
)
func TestReference(t *testing.T) {
files := []string{
"Amps and angle encoding",
"Auto links",
"Backslash escapes",
"Blockquotes with code blocks",
"Code Blocks",
"Code Spans",
"Hard-wrapped paragraphs with list-like lines",
"Horizontal rules",
"Inline HTML (Advanced)",
"Inline HTML (Simple)",
"Inline HTML comments",
"Links, inline style",
"Links, reference style",
"Links, shortcut references",
"Literal quotes in titles",
"Markdown Documentation - Basics",
"Markdown Documentation - Syntax",
"Nested blockquotes",
"Ordered and unordered lists",
"Strong and em together",
"Tabs",
"Tidyness",
}
doTestsReference(t, files, 0)
}
func TestReference_EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK(t *testing.T) {
files := []string{
"Amps and angle encoding",
"Auto links",
"Backslash escapes",
"Blockquotes with code blocks",
"Code Blocks",
"Code Spans",
"Hard-wrapped paragraphs with list-like lines no empty line before block",
"Horizontal rules",
"Inline HTML (Advanced)",
"Inline HTML (Simple)",
"Inline HTML comments",
"Links, inline style",
"Links, reference style",
"Links, shortcut references",
"Literal quotes in titles",
"Markdown Documentation - Basics",
"Markdown Documentation - Syntax",
"Nested blockquotes",
"Ordered and unordered lists",
"Strong and em together",
"Tabs",
"Tidyness",
}
doTestsReference(t, files, NoEmptyLineBeforeBlock)
}
// benchResultAnchor is an anchor variable to store the result of a benchmarked
// code so that compiler could never optimize away the call to runMarkdown()
var benchResultAnchor string
func BenchmarkReference(b *testing.B) {
params := TestParams{extensions: CommonExtensions}
files := []string{
"Amps and angle encoding",
"Auto links",
"Backslash escapes",
"Blockquotes with code blocks",
"Code Blocks",
"Code Spans",
"Hard-wrapped paragraphs with list-like lines",
"Horizontal rules",
"Inline HTML (Advanced)",
"Inline HTML (Simple)",
"Inline HTML comments",
"Links, inline style",
"Links, reference style",
"Links, shortcut references",
"Literal quotes in titles",
"Markdown Documentation - Basics",
"Markdown Documentation - Syntax",
"Nested blockquotes",
"Ordered and unordered lists",
"Strong and em together",
"Tabs",
"Tidyness",
}
var tests []string
for _, basename := range files {
filename := filepath.Join("testdata", basename+".text")
inputBytes, err := ioutil.ReadFile(filename)
if err != nil {
b.Errorf("Couldn't open '%s', error: %v\n", filename, err)
continue
}
tests = append(tests, string(inputBytes))
}
b.ResetTimer()
for n := 0; n < b.N; n++ {
for _, test := range tests {
benchResultAnchor = runMarkdown(test, params)
}
}
}

View File

@ -0,0 +1,457 @@
//
// Blackfriday Markdown Processor
// Available at http://github.com/russross/blackfriday
//
// Copyright © 2011 Russ Ross <russ@russross.com>.
// Distributed under the Simplified BSD License.
// See README.md for details.
//
//
//
// SmartyPants rendering
//
//
package blackfriday
import (
"bytes"
"io"
)
// SPRenderer is a struct containing state of a Smartypants renderer.
type SPRenderer struct {
inSingleQuote bool
inDoubleQuote bool
callbacks [256]smartCallback
}
func wordBoundary(c byte) bool {
return c == 0 || isspace(c) || ispunct(c)
}
func tolower(c byte) byte {
if c >= 'A' && c <= 'Z' {
return c - 'A' + 'a'
}
return c
}
func isdigit(c byte) bool {
return c >= '0' && c <= '9'
}
func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool, addNBSP bool) bool {
// edge of the buffer is likely to be a tag that we don't get to see,
// so we treat it like text sometimes
// enumerate all sixteen possibilities for (previousChar, nextChar)
// each can be one of {0, space, punct, other}
switch {
case previousChar == 0 && nextChar == 0:
// context is not any help here, so toggle
*isOpen = !*isOpen
case isspace(previousChar) && nextChar == 0:
// [ "] might be [ "<code>foo...]
*isOpen = true
case ispunct(previousChar) && nextChar == 0:
// [!"] hmm... could be [Run!"] or [("<code>...]
*isOpen = false
case /* isnormal(previousChar) && */ nextChar == 0:
// [a"] is probably a close
*isOpen = false
case previousChar == 0 && isspace(nextChar):
// [" ] might be [...foo</code>" ]
*isOpen = false
case isspace(previousChar) && isspace(nextChar):
// [ " ] context is not any help here, so toggle
*isOpen = !*isOpen
case ispunct(previousChar) && isspace(nextChar):
// [!" ] is probably a close
*isOpen = false
case /* isnormal(previousChar) && */ isspace(nextChar):
// [a" ] this is one of the easy cases
*isOpen = false
case previousChar == 0 && ispunct(nextChar):
// ["!] hmm... could be ["$1.95] or [</code>"!...]
*isOpen = false
case isspace(previousChar) && ispunct(nextChar):
// [ "!] looks more like [ "$1.95]
*isOpen = true
case ispunct(previousChar) && ispunct(nextChar):
// [!"!] context is not any help here, so toggle
*isOpen = !*isOpen
case /* isnormal(previousChar) && */ ispunct(nextChar):
// [a"!] is probably a close
*isOpen = false
case previousChar == 0 /* && isnormal(nextChar) */ :
// ["a] is probably an open
*isOpen = true
case isspace(previousChar) /* && isnormal(nextChar) */ :
// [ "a] this is one of the easy cases
*isOpen = true
case ispunct(previousChar) /* && isnormal(nextChar) */ :
// [!"a] is probably an open
*isOpen = true
default:
// [a'b] maybe a contraction?
*isOpen = false
}
// Note that with the limited lookahead, this non-breaking
// space will also be appended to single double quotes.
if addNBSP && !*isOpen {
out.WriteString("&nbsp;")
}
out.WriteByte('&')
if *isOpen {
out.WriteByte('l')
} else {
out.WriteByte('r')
}
out.WriteByte(quote)
out.WriteString("quo;")
if addNBSP && *isOpen {
out.WriteString("&nbsp;")
}
return true
}
func (r *SPRenderer) smartSingleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
if len(text) >= 2 {
t1 := tolower(text[1])
if t1 == '\'' {
nextChar := byte(0)
if len(text) >= 3 {
nextChar = text[2]
}
if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote, false) {
return 1
}
}
if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) {
out.WriteString("&rsquo;")
return 0
}
if len(text) >= 3 {
t2 := tolower(text[2])
if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) &&
(len(text) < 4 || wordBoundary(text[3])) {
out.WriteString("&rsquo;")
return 0
}
}
}
nextChar := byte(0)
if len(text) > 1 {
nextChar = text[1]
}
if smartQuoteHelper(out, previousChar, nextChar, 's', &r.inSingleQuote, false) {
return 0
}
out.WriteByte(text[0])
return 0
}
func (r *SPRenderer) smartParens(out *bytes.Buffer, previousChar byte, text []byte) int {
if len(text) >= 3 {
t1 := tolower(text[1])
t2 := tolower(text[2])
if t1 == 'c' && t2 == ')' {
out.WriteString("&copy;")
return 2
}
if t1 == 'r' && t2 == ')' {
out.WriteString("&reg;")
return 2
}
if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' {
out.WriteString("&trade;")
return 3
}
}
out.WriteByte(text[0])
return 0
}
func (r *SPRenderer) smartDash(out *bytes.Buffer, previousChar byte, text []byte) int {
if len(text) >= 2 {
if text[1] == '-' {
out.WriteString("&mdash;")
return 1
}
if wordBoundary(previousChar) && wordBoundary(text[1]) {
out.WriteString("&ndash;")
return 0
}
}
out.WriteByte(text[0])
return 0
}
func (r *SPRenderer) smartDashLatex(out *bytes.Buffer, previousChar byte, text []byte) int {
if len(text) >= 3 && text[1] == '-' && text[2] == '-' {
out.WriteString("&mdash;")
return 2
}
if len(text) >= 2 && text[1] == '-' {
out.WriteString("&ndash;")
return 1
}
out.WriteByte(text[0])
return 0
}
func (r *SPRenderer) smartAmpVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte, addNBSP bool) int {
if bytes.HasPrefix(text, []byte("&quot;")) {
nextChar := byte(0)
if len(text) >= 7 {
nextChar = text[6]
}
if smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote, addNBSP) {
return 5
}
}
if bytes.HasPrefix(text, []byte("&#0;")) {
return 3
}
out.WriteByte('&')
return 0
}
func (r *SPRenderer) smartAmp(angledQuotes, addNBSP bool) func(*bytes.Buffer, byte, []byte) int {
var quote byte = 'd'
if angledQuotes {
quote = 'a'
}
return func(out *bytes.Buffer, previousChar byte, text []byte) int {
return r.smartAmpVariant(out, previousChar, text, quote, addNBSP)
}
}
func (r *SPRenderer) smartPeriod(out *bytes.Buffer, previousChar byte, text []byte) int {
if len(text) >= 3 && text[1] == '.' && text[2] == '.' {
out.WriteString("&hellip;")
return 2
}
if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' {
out.WriteString("&hellip;")
return 4
}
out.WriteByte(text[0])
return 0
}
func (r *SPRenderer) smartBacktick(out *bytes.Buffer, previousChar byte, text []byte) int {
if len(text) >= 2 && text[1] == '`' {
nextChar := byte(0)
if len(text) >= 3 {
nextChar = text[2]
}
if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote, false) {
return 1
}
}
out.WriteByte(text[0])
return 0
}
func (r *SPRenderer) smartNumberGeneric(out *bytes.Buffer, previousChar byte, text []byte) int {
if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
// is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b
// note: check for regular slash (/) or fraction slash (, 0x2044, or 0xe2 81 84 in utf-8)
// and avoid changing dates like 1/23/2005 into fractions.
numEnd := 0
for len(text) > numEnd && isdigit(text[numEnd]) {
numEnd++
}
if numEnd == 0 {
out.WriteByte(text[0])
return 0
}
denStart := numEnd + 1
if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 {
denStart = numEnd + 3
} else if len(text) < numEnd+2 || text[numEnd] != '/' {
out.WriteByte(text[0])
return 0
}
denEnd := denStart
for len(text) > denEnd && isdigit(text[denEnd]) {
denEnd++
}
if denEnd == denStart {
out.WriteByte(text[0])
return 0
}
if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' {
out.WriteString("<sup>")
out.Write(text[:numEnd])
out.WriteString("</sup>&frasl;<sub>")
out.Write(text[denStart:denEnd])
out.WriteString("</sub>")
return denEnd - 1
}
}
out.WriteByte(text[0])
return 0
}
func (r *SPRenderer) smartNumber(out *bytes.Buffer, previousChar byte, text []byte) int {
if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
if text[0] == '1' && text[1] == '/' && text[2] == '2' {
if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' {
out.WriteString("&frac12;")
return 2
}
}
if text[0] == '1' && text[1] == '/' && text[2] == '4' {
if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') {
out.WriteString("&frac14;")
return 2
}
}
if text[0] == '3' && text[1] == '/' && text[2] == '4' {
if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') {
out.WriteString("&frac34;")
return 2
}
}
}
out.WriteByte(text[0])
return 0
}
func (r *SPRenderer) smartDoubleQuoteVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
nextChar := byte(0)
if len(text) > 1 {
nextChar = text[1]
}
if !smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote, false) {
out.WriteString("&quot;")
}
return 0
}
func (r *SPRenderer) smartDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
return r.smartDoubleQuoteVariant(out, previousChar, text, 'd')
}
func (r *SPRenderer) smartAngledDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
return r.smartDoubleQuoteVariant(out, previousChar, text, 'a')
}
func (r *SPRenderer) smartLeftAngle(out *bytes.Buffer, previousChar byte, text []byte) int {
i := 0
for i < len(text) && text[i] != '>' {
i++
}
out.Write(text[:i+1])
return i
}
type smartCallback func(out *bytes.Buffer, previousChar byte, text []byte) int
// NewSmartypantsRenderer constructs a Smartypants renderer object.
func NewSmartypantsRenderer(flags HTMLFlags) *SPRenderer {
var (
r SPRenderer
smartAmpAngled = r.smartAmp(true, false)
smartAmpAngledNBSP = r.smartAmp(true, true)
smartAmpRegular = r.smartAmp(false, false)
smartAmpRegularNBSP = r.smartAmp(false, true)
addNBSP = flags&SmartypantsQuotesNBSP != 0
)
if flags&SmartypantsAngledQuotes == 0 {
r.callbacks['"'] = r.smartDoubleQuote
if !addNBSP {
r.callbacks['&'] = smartAmpRegular
} else {
r.callbacks['&'] = smartAmpRegularNBSP
}
} else {
r.callbacks['"'] = r.smartAngledDoubleQuote
if !addNBSP {
r.callbacks['&'] = smartAmpAngled
} else {
r.callbacks['&'] = smartAmpAngledNBSP
}
}
r.callbacks['\''] = r.smartSingleQuote
r.callbacks['('] = r.smartParens
if flags&SmartypantsDashes != 0 {
if flags&SmartypantsLatexDashes == 0 {
r.callbacks['-'] = r.smartDash
} else {
r.callbacks['-'] = r.smartDashLatex
}
}
r.callbacks['.'] = r.smartPeriod
if flags&SmartypantsFractions == 0 {
r.callbacks['1'] = r.smartNumber
r.callbacks['3'] = r.smartNumber
} else {
for ch := '1'; ch <= '9'; ch++ {
r.callbacks[ch] = r.smartNumberGeneric
}
}
r.callbacks['<'] = r.smartLeftAngle
r.callbacks['`'] = r.smartBacktick
return &r
}
// Process is the entry point of the Smartypants renderer.
func (r *SPRenderer) Process(w io.Writer, text []byte) {
mark := 0
for i := 0; i < len(text); i++ {
if action := r.callbacks[text[i]]; action != nil {
if i > mark {
w.Write(text[mark:i])
}
previousChar := byte(0)
if i > 0 {
previousChar = text[i-1]
}
var tmp bytes.Buffer
i += action(&tmp, previousChar, text[i:])
w.Write(tmp.Bytes())
mark = i + 1
}
}
if mark < len(text) {
w.Write(text[mark:])
}
}

View File

@ -0,0 +1,17 @@
<p>AT&amp;T has an ampersand in their name.</p>
<p>AT&amp;T is another way to write it.</p>
<p>This &amp; that.</p>
<p>4 &lt; 5.</p>
<p>6 &gt; 5.</p>
<p>Here's a <a href="http://example.com/?foo=1&amp;bar=2">link</a> with an ampersand in the URL.</p>
<p>Here's a link with an amersand in the link text: <a href="http://att.com/" title="AT&amp;T">AT&amp;T</a>.</p>
<p>Here's an inline <a href="/script?foo=1&amp;bar=2">link</a>.</p>
<p>Here's an inline <a href="/script?foo=1&amp;bar=2">link</a>.</p>

View File

@ -0,0 +1,21 @@
AT&T has an ampersand in their name.
AT&amp;T is another way to write it.
This & that.
4 < 5.
6 > 5.
Here's a [link] [1] with an ampersand in the URL.
Here's a link with an amersand in the link text: [AT&T] [2].
Here's an inline [link](/script?foo=1&bar=2).
Here's an inline [link](</script?foo=1&bar=2>).
[1]: http://example.com/?foo=1&bar=2
[2]: http://att.com/ "AT&T"

View File

@ -0,0 +1,18 @@
<p>Link: <a href="http://example.com/">http://example.com/</a>.</p>
<p>With an ampersand: <a href="http://example.com/?foo=1&amp;bar=2">http://example.com/?foo=1&amp;bar=2</a></p>
<ul>
<li>In a list?</li>
<li><a href="http://example.com/">http://example.com/</a></li>
<li>It should.</li>
</ul>
<blockquote>
<p>Blockquoted: <a href="http://example.com/">http://example.com/</a></p>
</blockquote>
<p>Auto-links should not occur here: <code>&lt;http://example.com/&gt;</code></p>
<pre><code>or here: &lt;http://example.com/&gt;
</code></pre>

View File

@ -0,0 +1,13 @@
Link: <http://example.com/>.
With an ampersand: <http://example.com/?foo=1&bar=2>
* In a list?
* <http://example.com/>
* It should.
> Blockquoted: <http://example.com/>
Auto-links should not occur here: `<http://example.com/>`
or here: <http://example.com/>

View File

@ -0,0 +1,123 @@
<p>These should all get escaped:</p>
<p>Backslash: \</p>
<p>Backtick: `</p>
<p>Asterisk: *</p>
<p>Underscore: _</p>
<p>Left brace: {</p>
<p>Right brace: }</p>
<p>Left bracket: [</p>
<p>Right bracket: ]</p>
<p>Left paren: (</p>
<p>Right paren: )</p>
<p>Greater-than: &gt;</p>
<p>Hash: #</p>
<p>Period: .</p>
<p>Bang: !</p>
<p>Plus: +</p>
<p>Minus: -</p>
<p>Tilde: ~</p>
<p>These should not, because they occur within a code block:</p>
<pre><code>Backslash: \\
Backtick: \`
Asterisk: \*
Underscore: \_
Left brace: \{
Right brace: \}
Left bracket: \[
Right bracket: \]
Left paren: \(
Right paren: \)
Greater-than: \&gt;
Hash: \#
Period: \.
Bang: \!
Plus: \+
Minus: \-
Tilde: \~
</code></pre>
<p>Nor should these, which occur in code spans:</p>
<p>Backslash: <code>\\</code></p>
<p>Backtick: <code>\`</code></p>
<p>Asterisk: <code>\*</code></p>
<p>Underscore: <code>\_</code></p>
<p>Left brace: <code>\{</code></p>
<p>Right brace: <code>\}</code></p>
<p>Left bracket: <code>\[</code></p>
<p>Right bracket: <code>\]</code></p>
<p>Left paren: <code>\(</code></p>
<p>Right paren: <code>\)</code></p>
<p>Greater-than: <code>\&gt;</code></p>
<p>Hash: <code>\#</code></p>
<p>Period: <code>\.</code></p>
<p>Bang: <code>\!</code></p>
<p>Plus: <code>\+</code></p>
<p>Minus: <code>\-</code></p>
<p>Tilde: <code>\~</code></p>
<p>These should get escaped, even though they're matching pairs for
other Markdown constructs:</p>
<p>*asterisks*</p>
<p>_underscores_</p>
<p>`backticks`</p>
<p>This is a code span with a literal backslash-backtick sequence: <code>\`</code></p>
<p>This is a tag with unescaped backticks <span attr='`ticks`'>bar</span>.</p>
<p>This is a tag with backslashes <span attr='\\backslashes\\'>bar</span>.</p>

View File

@ -0,0 +1,126 @@
These should all get escaped:
Backslash: \\
Backtick: \`
Asterisk: \*
Underscore: \_
Left brace: \{
Right brace: \}
Left bracket: \[
Right bracket: \]
Left paren: \(
Right paren: \)
Greater-than: \>
Hash: \#
Period: \.
Bang: \!
Plus: \+
Minus: \-
Tilde: \~
These should not, because they occur within a code block:
Backslash: \\
Backtick: \`
Asterisk: \*
Underscore: \_
Left brace: \{
Right brace: \}
Left bracket: \[
Right bracket: \]
Left paren: \(
Right paren: \)
Greater-than: \>
Hash: \#
Period: \.
Bang: \!
Plus: \+
Minus: \-
Tilde: \~
Nor should these, which occur in code spans:
Backslash: `\\`
Backtick: `` \` ``
Asterisk: `\*`
Underscore: `\_`
Left brace: `\{`
Right brace: `\}`
Left bracket: `\[`
Right bracket: `\]`
Left paren: `\(`
Right paren: `\)`
Greater-than: `\>`
Hash: `\#`
Period: `\.`
Bang: `\!`
Plus: `\+`
Minus: `\-`
Tilde: `\~`
These should get escaped, even though they're matching pairs for
other Markdown constructs:
\*asterisks\*
\_underscores\_
\`backticks\`
This is a code span with a literal backslash-backtick sequence: `` \` ``
This is a tag with unescaped backticks <span attr='`ticks`'>bar</span>.
This is a tag with backslashes <span attr='\\backslashes\\'>bar</span>.

View File

@ -0,0 +1,15 @@
<blockquote>
<p>Example:</p>
<pre><code>sub status {
print &quot;working&quot;;
}
</code></pre>
<p>Or:</p>
<pre><code>sub status {
return &quot;working&quot;;
}
</code></pre>
</blockquote>

View File

@ -0,0 +1,11 @@
> Example:
>
> sub status {
> print "working";
> }
>
> Or:
>
> sub status {
> return "working";
> }

View File

@ -0,0 +1,18 @@
<pre><code>code block on the first line
</code></pre>
<p>Regular text.</p>
<pre><code>code block indented by spaces
</code></pre>
<p>Regular text.</p>
<pre><code>the lines in this block
all contain trailing spaces
</code></pre>
<p>Regular Text.</p>
<pre><code>code block on the last line
</code></pre>

View File

@ -0,0 +1,14 @@
code block on the first line
Regular text.
code block indented by spaces
Regular text.
the lines in this block
all contain trailing spaces
Regular Text.
code block on the last line

View File

@ -0,0 +1,5 @@
<p><code>&lt;test a=&quot;</code> content of attribute <code>&quot;&gt;</code></p>
<p>Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span></p>
<p>Here's how you put <code>`backticks`</code> in a code span.</p>

View File

@ -0,0 +1,6 @@
`<test a="` content of attribute `">`
Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span>
Here's how you put `` `backticks` `` in a code span.

View File

@ -0,0 +1,14 @@
<p>In Markdown 1.0.0 and earlier. Version</p>
<ol>
<li>This line turns into a list item.
Because a hard-wrapped line in the
middle of a paragraph looked like a
list item.</li>
</ol>
<p>Here's one with a bullet.</p>
<ul>
<li>criminey.</li>
</ul>

View File

@ -0,0 +1,8 @@
In Markdown 1.0.0 and earlier. Version
8. This line turns into a list item.
Because a hard-wrapped line in the
middle of a paragraph looked like a
list item.
Here's one with a bullet.
* criminey.

View File

@ -0,0 +1,8 @@
<p>In Markdown 1.0.0 and earlier. Version
8. This line turns into a list item.
Because a hard-wrapped line in the
middle of a paragraph looked like a
list item.</p>
<p>Here's one with a bullet.
* criminey.</p>

View File

@ -0,0 +1,8 @@
In Markdown 1.0.0 and earlier. Version
8. This line turns into a list item.
Because a hard-wrapped line in the
middle of a paragraph looked like a
list item.
Here's one with a bullet.
* criminey.

View File

@ -0,0 +1,71 @@
<p>Dashes:</p>
<hr>
<hr>
<hr>
<hr>
<pre><code>---
</code></pre>
<hr>
<hr>
<hr>
<hr>
<pre><code>- - -
</code></pre>
<p>Asterisks:</p>
<hr>
<hr>
<hr>
<hr>
<pre><code>***
</code></pre>
<hr>
<hr>
<hr>
<hr>
<pre><code>* * *
</code></pre>
<p>Underscores:</p>
<hr>
<hr>
<hr>
<hr>
<pre><code>___
</code></pre>
<hr>
<hr>
<hr>
<hr>
<pre><code>_ _ _
</code></pre>

View File

@ -0,0 +1,67 @@
Dashes:
---
---
---
---
---
- - -
- - -
- - -
- - -
- - -
Asterisks:
***
***
***
***
***
* * *
* * *
* * *
* * *
* * *
Underscores:
___
___
___
___
___
_ _ _
_ _ _
_ _ _
_ _ _
_ _ _

View File

@ -0,0 +1,15 @@
<p>Simple block on one line:</p>
<div>foo</div>
<p>And nested without indentation:</p>
<div>
<div>
<div>
foo
</div>
<div style=">"/>
</div>
<div>bar</div>
</div>

View File

@ -0,0 +1,15 @@
Simple block on one line:
<div>foo</div>
And nested without indentation:
<div>
<div>
<div>
foo
</div>
<div style=">"/>
</div>
<div>bar</div>
</div>

View File

@ -0,0 +1,72 @@
<p>Here's a simple block:</p>
<div>
foo
</div>
<p>This should be a code block, though:</p>
<pre><code>&lt;div&gt;
foo
&lt;/div&gt;
</code></pre>
<p>As should this:</p>
<pre><code>&lt;div&gt;foo&lt;/div&gt;
</code></pre>
<p>Now, nested:</p>
<div>
<div>
<div>
foo
</div>
</div>
</div>
<p>This should just be an HTML comment:</p>
<!-- Comment -->
<p>Multiline:</p>
<!--
Blah
Blah
-->
<p>Code block:</p>
<pre><code>&lt;!-- Comment --&gt;
</code></pre>
<p>Just plain comment, with trailing spaces on the line:</p>
<!-- foo -->
<p>Code:</p>
<pre><code>&lt;hr /&gt;
</code></pre>
<p>Hr's:</p>
<hr>
<hr/>
<hr />
<hr>
<hr/>
<hr />
<hr class="foo" id="bar" />
<hr class="foo" id="bar"/>
<hr class="foo" id="bar" >

View File

@ -0,0 +1,69 @@
Here's a simple block:
<div>
foo
</div>
This should be a code block, though:
<div>
foo
</div>
As should this:
<div>foo</div>
Now, nested:
<div>
<div>
<div>
foo
</div>
</div>
</div>
This should just be an HTML comment:
<!-- Comment -->
Multiline:
<!--
Blah
Blah
-->
Code block:
<!-- Comment -->
Just plain comment, with trailing spaces on the line:
<!-- foo -->
Code:
<hr />
Hr's:
<hr>
<hr/>
<hr />
<hr>
<hr/>
<hr />
<hr class="foo" id="bar" />
<hr class="foo" id="bar"/>
<hr class="foo" id="bar" >

View File

@ -0,0 +1,13 @@
<p>Paragraph one.</p>
<!-- This is a simple comment -->
<!--
This is another comment.
-->
<p>Paragraph two.</p>
<!-- one comment block -- -- with two comments -->
<p>The end.</p>

View File

@ -0,0 +1,13 @@
Paragraph one.
<!-- This is a simple comment -->
<!--
This is another comment.
-->
Paragraph two.
<!-- one comment block -- -- with two comments -->
The end.

View File

@ -0,0 +1,11 @@
<p>Just a <a href="/url/">URL</a>.</p>
<p><a href="/url/" title="title">URL and title</a>.</p>
<p><a href="/url/" title="title preceded by two spaces">URL and title</a>.</p>
<p><a href="/url/" title="title preceded by a tab">URL and title</a>.</p>
<p><a href="/url/" title="title has spaces afterward">URL and title</a>.</p>
<p>[Empty]().</p>

View File

@ -0,0 +1,12 @@
Just a [URL](/url/).
[URL and title](/url/ "title").
[URL and title](/url/ "title preceded by two spaces").
[URL and title](/url/ "title preceded by a tab").
[URL and title](/url/ "title has spaces afterward" ).
[Empty]().

View File

@ -0,0 +1,52 @@
<p>Foo <a href="/url/" title="Title">bar</a>.</p>
<p>Foo <a href="/url/" title="Title">bar</a>.</p>
<p>Foo <a href="/url/" title="Title">bar</a>.</p>
<p>With <a href="/url/">embedded [brackets]</a>.</p>
<p>Indented <a href="/url">once</a>.</p>
<p>Indented <a href="/url">twice</a>.</p>
<p>Indented <a href="/url">thrice</a>.</p>
<p>Indented [four][] times.</p>
<pre><code>[four]: /url
</code></pre>
<hr>
<p><a href="foo">this</a> should work</p>
<p>So should <a href="foo">this</a>.</p>
<p>And <a href="foo">this</a>.</p>
<p>And <a href="foo">this</a>.</p>
<p>And <a href="foo">this</a>.</p>
<p>But not [that] [].</p>
<p>Nor [that][].</p>
<p>Nor [that].</p>
<p>[Something in brackets like <a href="foo">this</a> should work]</p>
<p>[Same with <a href="foo">this</a>.]</p>
<p>In this case, <a href="/somethingelse/">this</a> points to something else.</p>
<p>Backslashing should suppress [this] and [this].</p>
<hr>
<p>Here's one where the <a href="/url/">link
breaks</a> across lines.</p>
<p>Here's another where the <a href="/url/">link
breaks</a> across lines, but with a line-ending space.</p>

View File

@ -0,0 +1,71 @@
Foo [bar] [1].
Foo [bar][1].
Foo [bar]
[1].
[1]: /url/ "Title"
With [embedded [brackets]] [b].
Indented [once][].
Indented [twice][].
Indented [thrice][].
Indented [four][] times.
[once]: /url
[twice]: /url
[thrice]: /url
[four]: /url
[b]: /url/
* * *
[this] [this] should work
So should [this][this].
And [this] [].
And [this][].
And [this].
But not [that] [].
Nor [that][].
Nor [that].
[Something in brackets like [this][] should work]
[Same with [this].]
In this case, [this](/somethingelse/) points to something else.
Backslashing should suppress \[this] and [this\].
[this]: foo
* * *
Here's one where the [link
breaks] across lines.
Here's another where the [link
breaks] across lines, but with a line-ending space.
[link breaks]: /url/

View File

@ -0,0 +1,9 @@
<p>This is the <a href="/simple">simple case</a>.</p>
<p>This one has a <a href="/foo">line
break</a>.</p>
<p>This one has a <a href="/foo">line
break</a> with a line-ending space.</p>
<p><a href="/that">this</a> and the <a href="/other">other</a></p>

View File

@ -0,0 +1,20 @@
This is the [simple case].
[simple case]: /simple
This one has a [line
break].
This one has a [line
break] with a line-ending space.
[line break]: /foo
[this] [that] and the [other]
[this]: /this
[that]: /that
[other]: /other

View File

@ -0,0 +1,3 @@
<p>Foo <a href="/url/" title="Title with &quot;quotes&quot; inside">bar</a>.</p>
<p>Foo <a href="/url/" title="Title with &quot;quotes&quot; inside">bar</a>.</p>

Some files were not shown because too many files have changed in this diff Show More