peptides which do not have protein assignment drop out

annotatePeptides(
  pepinfo,
  fasta,
  peptide = "peptideSeq",
  prefix = "(([RK])|(^)|(^M))",
  suffix = ""
)

Arguments

pepinfo

- list of peptides - sequence, optional modified sequence, charge state.

fasta

- object as created by readPeptideFasta

peptide

- name of column containing peptide sequences default "peptideSeq"

prefix

- default "(([RK])|(^)|(^M))"

suffix

- default ""

Value

data.frame with columns "peptideSeq", "proteinID","Offset","proteinSequence","matched", "lengthPeptide","proteinlength"

Examples


library(dplyr)

file = system.file("extdata/IDResults.txt.gz" , package = "prozor")
specMeta <- readr::read_tsv(file)
#> Rows: 5000 Columns: 12
#> -- Column specification --------------------------------------------------------
#> Delimiter: "\t"
#> chr  (2): peptideSeq, peptideModSeq
#> dbl (10): RefSpectraId, numPeaks, precursorCharge, precursorMZ, retentionTim...
#> 
#> i Use `spec()` to retrieve the full column specification for this data.
#> i Specify the column types or set `show_col_types = FALSE` to quiet this message.
upeptide <- unique(specMeta$peptideSeq)
resCan <-
   prozor::readPeptideFasta(
       system.file("p1000_db1_example/Annotation_canSeq.fasta.gz" , package = "prozor"))

annotAll = prozor::annotatePeptides(upeptide[seq_len(20)], resCan)
#> Warning: `funs()` was deprecated in dplyr 0.8.0.
#> Please use a list of either functions or lambdas: 
#> 
#>   # Simple named list: 
#>   list(mean = mean, median = median)
#> 
#>   # Auto named with `tibble::lst()`: 
#>   tibble::lst(mean, median)
#> 
#>   # Using lambdas
#>   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
#> Joining, by = "proteinID"
dim(annotAll)
#> [1] 20  6

res <-  mutate(annotAll, proteinlength = nchar(proteinSequence))
res <-  select(res, proteinID, peptideSeq, proteinlength, Offset, lengthPeptide)
head(res)
#>               proteinID               peptideSeq proteinlength Offset
#> 1 sp|P02765|FETUA_HUMAN AQLVPLPPSTYVEFTVSGTDCVAK           367    188
#> 2  sp|P06396|GELS_HUMAN   AQPVQVAEGSEPDGFWEALGGK           782    627
#> 3  sp|P01011|AACT_HUMAN      AVLDVFEEGTEASAATAVK           423    361
#> 4  sp|P02787|TRFE_HUMAN            CSTSSLLEACTFR           698    684
#> 5  sp|P01011|AACT_HUMAN                EIGELYLPK           423    307
#> 6 sp|P01767|HV353_HUMAN      EVQLVETGGGLIQPGGSLR           116     20
#>   lengthPeptide
#> 1            24
#> 2            22
#> 3            19
#> 4            13
#> 5             9
#> 6            19